R Sample 3

This script was created for a consulting project to help a graduate student in the Department of Environment & Geography at the University of Manitoba. Our goal was to turn a big data in a form similar to that shown in Figure 1 into the form shown in Figure 2. A small sample data (similar to that used in the project, but not from the project) can be used to test out the script. The sample data contains daily record of precipitation in the city of Winnipeg, Manitoba from January, 1993 to December, 1997.

Figure 1

Figure 2

# step 1 is to convert the Winnipeg_Rain_Snow_Precip_1993-1997.xls file to Winnipeg_Rain_Snow_Precip_1993-1997.csv

workdir <- "V:/" # change working directory to data file directory

setwd(workdir)

datafile <- "V:/Winnipeg_Rain_Snow_Precip_1993-1997.csv"

originaldata <- read.table(datafile, sep=",", header=TRUE, as.is=TRUE) # import data into R and call it originaldata

originaldata <- originaldata[complete.cases(originaldata[, 1]), ] # removing data rows with "NA" as Year

names(originaldata) # list the variables that are in the data set 'originaldata'

leapCheck <- function(data) { # a function to check if the input is a leap year or not

if (data %% 4 == 0 & data %% 100 == 0 & data %% 400 == 0) return(TRUE) else

if (data %% 4 == 0 & data %% 100 == 0) return(FALSE) else

if (data %% 4 == 0) return(TRUE) else FALSE

}

# Example:

# leapCheck(2000) returns TRUE

# leapCheck(2005) returns FALSE

months <- unique(originaldata[, 2]) # actual months in the second column

whitemonth <- months[c(1, 3, 5, 7, 8, 10, 12)] # these months have 31 days

blackmonth <- months[c(4, 6, 7, 8, 9, 11)] # these months have 30 days

newdata <-c()

i <- 1

dataYear <- originaldata[i, 1]

dataMonth <- originaldata[i, 2]

leapLogical <-leapCheck(dataYear)

if (is.element(dataMonth, whitemonth)) {

newdata <- cbind(rep(dataYear, 31),

rep(dataMonth, 31),

1:31,

as.vector(t(originaldata[i, 3:(2+31)])))

} else

if (is.element(dataMonth, blackmonth)) {

newdata <- cbind(rep(dataYear, 30),

rep(dataMonth, 30),

1:30,

as.vector(t(originaldata[i, 3:(2+30)])))

} else

if (is.element(dataMonth, months[2]) & is.element(leapLogical, TRUE)) {

newdata <- cbind(rep(dataYear, 29),

rep(dataMonth, 29),

1:29,

as.vector(t(originaldata[i, 3:(2+29)])))

} else

if (is.element(dataMonth, months[2]) & is.element(leapLogical, FALSE)) {

newdata <- cbind(rep(dataYear, 28),

rep(dataMonth, 28),

1:28,

as.vector(t(originaldata[i, 3:(2+28)])))

}

for (i in 2:dim(originaldata)[1]) {

dataYear <- originaldata[i, 1]

dataMonth <- originaldata[i, 2]

leapLogical <-leapCheck(dataYear)

if (is.element(dataMonth, whitemonth)) {

newdata <- rbind(newdata, cbind(rep(dataYear, 31),

rep(dataMonth, 31),

1:31,

as.vector(t(originaldata[i, 3:(2+31)]))))

} else

if (is.element(dataMonth, blackmonth)) {

newdata <- rbind(newdata, cbind(rep(dataYear, 30),

rep(dataMonth, 30),

1:30,

as.vector(t(originaldata[i, 3:(2+30)]))))

} else

if (is.element(dataMonth, months[2]) & is.element(leapLogical, TRUE)) {

newdata <- rbind(newdata, cbind(rep(dataYear, 29),

rep(dataMonth, 29),

1:29,

as.vector(t(originaldata[i, 3:(2+29)]))))

} else

if (is.element(dataMonth, months[2]) & is.element(leapLogical, FALSE)) {

newdata <- rbind(newdata, cbind(rep(dataYear, 28),

rep(dataMonth, 28),

1:28,

as.vector(t(originaldata[i, 3:(2+28)]))))

}

}

colnames(newdata) <- c("Year", "Month", "Day of Month", "Daily Adjusted Precipitation (mm)")

write.table(newdata, file="./data_organized_in_columns.csv", row.names=FALSE, col.names=TRUE, quote=FALSE, sep=",")