Import data in R

Post date: Oct 3, 2013 5:26:43 PM

# This code will import the data from the csv file

# and "premarily clean" the data

rm(list = setdiff(ls(), lsf.str()))

# ========= Import the data from csv file ============

libDir <- "/home/local/ANT/kittipat/research/adhoc_ship_promised"

outDir <- "/home/local/ANT/kittipat/research/adhoc_ship_promised"

data.set.name <- "dataframe_import.Rda"

input.filename <- "seller_status_shipdate_01AUG2013.csv"

# =================== Main import data ==========================

# import the data matrix

time.start <- proc.time()

data.matrix <- read.delim(file.path(libDir, input.filename),

sep = "\t",

stringsAsFactors = TRUE,

header = TRUE,

na.strings = c("","---","9999"))

time.stop <- proc.time()

cat("import data matrix: ",(time.stop - time.start)[3], " sec\n")

# =================== Preprocess data ==========================

# convert string to lowercase

colnames(data.matrix) <- tolower(colnames(data.matrix))

# format the order entry date

data.matrix[,"order_day"] <- as.Date(as.character(data.matrix[,"order_day"]),"%d-%b-%Y")

data.matrix[,"promised_ship_day"] <- as.Date(as.character(data.matrix[,"promised_ship_day"]),"%d-%b-%Y")

data.matrix[,"ship_day"] <- as.Date(as.character(data.matrix[,"ship_day"]),"%d-%b-%Y")

data.matrix[,"promised_arrival_day"] <- as.Date(as.character(data.matrix[,"promised_arrival_day"]),"%d-%b-%Y")

data.matrix[,"enforcement_date"] <- as.Date(as.character(data.matrix[,"enforcement_date"]),"%d-%b-%Y")

# ---- format the seller status ------

unique(data.matrix[,"status_final"])

data.matrix( is.na(data.matrix[,"status_final"]),"status_final") <- 'NormalSeller'

# "Block" "FraudSeller" "ReinstateSeller"

status_final <- as.numeric(data.matrix[,"status_final"])

status_final[is.na(status_final)] = 4

status_final <- as.factor(status_final)

levels(status_final)[1] <- "Block"

levels(status_final)[2] <- "FraudSeller"

levels(status_final)[3] <- "ReinstateSeller"

levels(status_final)[4] <- "NormalSeller"

data.matrix[,"status_final"] <- status_final

data.matrix[1:10,]

day_order_to_arrival <- as.numeric(data.matrix[,"promised_arrival_day"]-data.matrix[,"order_day"])

unique(day_order_to_arrival)

save(data.matrix,file="data_01AUG2013.Rda")