R Sample 3
This script was created for a consulting project to help a graduate student in the Department of Environment & Geography at the University of Manitoba. Our goal was to turn a big data in a form similar to that shown in Figure 1 into the form shown in Figure 2. A small sample data (similar to that used in the project, but not from the project) can be used to test out the script. The sample data contains daily record of precipitation in the city of Winnipeg, Manitoba from January, 1993 to December, 1997.
Figure 1
Figure 2
# step 1 is to convert the Winnipeg_Rain_Snow_Precip_1993-1997.xls file to Winnipeg_Rain_Snow_Precip_1993-1997.csv
workdir <- "V:/" # change working directory to data file directory
setwd(workdir)
datafile <- "V:/Winnipeg_Rain_Snow_Precip_1993-1997.csv"
originaldata <- read.table(datafile, sep=",", header=TRUE, as.is=TRUE) # import data into R and call it originaldata
originaldata <- originaldata[complete.cases(originaldata[, 1]), ] # removing data rows with "NA" as Year
names(originaldata) # list the variables that are in the data set 'originaldata'
leapCheck <- function(data) { # a function to check if the input is a leap year or not
if (data %% 4 == 0 & data %% 100 == 0 & data %% 400 == 0) return(TRUE) else
if (data %% 4 == 0 & data %% 100 == 0) return(FALSE) else
if (data %% 4 == 0) return(TRUE) else FALSE
}
# Example:
# leapCheck(2000) returns TRUE
# leapCheck(2005) returns FALSE
months <- unique(originaldata[, 2]) # actual months in the second column
whitemonth <- months[c(1, 3, 5, 7, 8, 10, 12)] # these months have 31 days
blackmonth <- months[c(4, 6, 7, 8, 9, 11)] # these months have 30 days
newdata <-c()
i <- 1
dataYear <- originaldata[i, 1]
dataMonth <- originaldata[i, 2]
leapLogical <-leapCheck(dataYear)
if (is.element(dataMonth, whitemonth)) {
newdata <- cbind(rep(dataYear, 31),
rep(dataMonth, 31),
1:31,
as.vector(t(originaldata[i, 3:(2+31)])))
} else
if (is.element(dataMonth, blackmonth)) {
newdata <- cbind(rep(dataYear, 30),
rep(dataMonth, 30),
1:30,
as.vector(t(originaldata[i, 3:(2+30)])))
} else
if (is.element(dataMonth, months[2]) & is.element(leapLogical, TRUE)) {
newdata <- cbind(rep(dataYear, 29),
rep(dataMonth, 29),
1:29,
as.vector(t(originaldata[i, 3:(2+29)])))
} else
if (is.element(dataMonth, months[2]) & is.element(leapLogical, FALSE)) {
newdata <- cbind(rep(dataYear, 28),
rep(dataMonth, 28),
1:28,
as.vector(t(originaldata[i, 3:(2+28)])))
}
for (i in 2:dim(originaldata)[1]) {
dataYear <- originaldata[i, 1]
dataMonth <- originaldata[i, 2]
leapLogical <-leapCheck(dataYear)
if (is.element(dataMonth, whitemonth)) {
newdata <- rbind(newdata, cbind(rep(dataYear, 31),
rep(dataMonth, 31),
1:31,
as.vector(t(originaldata[i, 3:(2+31)]))))
} else
if (is.element(dataMonth, blackmonth)) {
newdata <- rbind(newdata, cbind(rep(dataYear, 30),
rep(dataMonth, 30),
1:30,
as.vector(t(originaldata[i, 3:(2+30)]))))
} else
if (is.element(dataMonth, months[2]) & is.element(leapLogical, TRUE)) {
newdata <- rbind(newdata, cbind(rep(dataYear, 29),
rep(dataMonth, 29),
1:29,
as.vector(t(originaldata[i, 3:(2+29)]))))
} else
if (is.element(dataMonth, months[2]) & is.element(leapLogical, FALSE)) {
newdata <- rbind(newdata, cbind(rep(dataYear, 28),
rep(dataMonth, 28),
1:28,
as.vector(t(originaldata[i, 3:(2+28)]))))
}
}
colnames(newdata) <- c("Year", "Month", "Day of Month", "Daily Adjusted Precipitation (mm)")
write.table(newdata, file="./data_organized_in_columns.csv", row.names=FALSE, col.names=TRUE, quote=FALSE, sep=",")