Import data in R
Post date: Oct 3, 2013 5:26:43 PM
# This code will import the data from the csv file
# and "premarily clean" the data
rm(list = setdiff(ls(), lsf.str()))
# ========= Import the data from csv file ============
libDir <- "/home/local/ANT/kittipat/research/adhoc_ship_promised"
outDir <- "/home/local/ANT/kittipat/research/adhoc_ship_promised"
data.set.name <- "dataframe_import.Rda"
input.filename <- "seller_status_shipdate_01AUG2013.csv"
# =================== Main import data ==========================
# import the data matrix
time.start <- proc.time()
data.matrix <- read.delim(file.path(libDir, input.filename),
sep = "\t",
stringsAsFactors = TRUE,
header = TRUE,
na.strings = c("","---","9999"))
time.stop <- proc.time()
cat("import data matrix: ",(time.stop - time.start)[3], " sec\n")
# =================== Preprocess data ==========================
# convert string to lowercase
colnames(data.matrix) <- tolower(colnames(data.matrix))
# format the order entry date
data.matrix[,"order_day"] <- as.Date(as.character(data.matrix[,"order_day"]),"%d-%b-%Y")
data.matrix[,"promised_ship_day"] <- as.Date(as.character(data.matrix[,"promised_ship_day"]),"%d-%b-%Y")
data.matrix[,"ship_day"] <- as.Date(as.character(data.matrix[,"ship_day"]),"%d-%b-%Y")
data.matrix[,"promised_arrival_day"] <- as.Date(as.character(data.matrix[,"promised_arrival_day"]),"%d-%b-%Y")
data.matrix[,"enforcement_date"] <- as.Date(as.character(data.matrix[,"enforcement_date"]),"%d-%b-%Y")
# ---- format the seller status ------
unique(data.matrix[,"status_final"])
data.matrix( is.na(data.matrix[,"status_final"]),"status_final") <- 'NormalSeller'
# "Block" "FraudSeller" "ReinstateSeller"
status_final <- as.numeric(data.matrix[,"status_final"])
status_final[is.na(status_final)] = 4
status_final <- as.factor(status_final)
levels(status_final)[1] <- "Block"
levels(status_final)[2] <- "FraudSeller"
levels(status_final)[3] <- "ReinstateSeller"
levels(status_final)[4] <- "NormalSeller"
data.matrix[,"status_final"] <- status_final
data.matrix[1:10,]
day_order_to_arrival <- as.numeric(data.matrix[,"promised_arrival_day"]-data.matrix[,"order_day"])
unique(day_order_to_arrival)
save(data.matrix,file="data_01AUG2013.Rda")