Textbook:
R for Marketing Research and Analytics (Use R!) 2015 Edition
by Christopher N. Chapman (Author), Elea McDonnell Feit
x<-read.xlsx(file="C:\\Users\\Scott\\Desktop\\cereal.xls", sheetIndex=1,
startRow = 1, endRow = 44, header = TRUE, colIndex = 1:10,
encoding = "UTF-8")
#### Create the data
# Names of the variables we will define for each segment
segVars <- c("age", "gender", "income", "kids", "ownHome", "subscribe")
# the data type for each segment
segVarType <- c("norm", "binom", "norm", "pois", "binom", "binom")
# names of the segments
segNames <- c("Suburb mix", "Urban hip", "Travelers", "Moving up")
# the size of each segment (N)
segSize <- c(100, 50, 80, 70)
# the means for each variable for each segment
segMeans <- matrix( c(
40, .5, 55000, 2, .5, .1,
24, .7, 21000, 1, .2, .2,
58, .5, 64000, 0, .7, .05,
36, .3, 52000, 2, .3, .2 ), ncol=length(segVars), byrow=TRUE)
# the standard deviations for each segment (NA = not applicable for the variable)
segSDs <- matrix( c(
5, NA, 12000, NA, NA, NA,
2, NA, 5000, NA, NA, NA,
8, NA, 21000, NA, NA, NA,
4, NA, 10000, NA, NA, NA ), ncol=length(segVars), byrow=TRUE)
# make sure we're starting our dataset from a known state
seg.df <- NULL
set.seed(02554)
# iterate over all the segments and create data for each
for (i in seq_along(segNames)) {
cat(i, segNames[i], "\n")
# create an empty matrix to hold this particular segment's data
this.seg <- data.frame(matrix(NA, nrow=segSize[i], ncol=length(segVars)))
# within a segment, iterate over the variables and draw appropriate random data
for (j in seq_along(segVars)) { # and iterate over each variable
if (segVarType[j] == "norm") { # draw random normals
this.seg[, j] <- rnorm(segSize[i], mean=segMeans[i, j], sd=segSDs[i, j])
} else if (segVarType[j] == "pois") { # draw counts
this.seg[, j] <- rpois(segSize[i], lambda=segMeans[i, j])
} else if (segVarType[j] == "binom") { # draw binomials
this.seg[, j] <- rbinom(segSize[i], size=1, prob=segMeans[i, j])
} else {
stop("Bad segment data type: ", segVarType[j])
}
}
# add this segment to the total dataset
seg.df <- rbind(seg.df, this.seg)
}
# make the data frame names match what we defined
names(seg.df) <- segVars
# add segment membership for each row
seg.df$Segment <- factor(rep(segNames, times=segSize))
# convert the binomial variables to nicely labeled factors
seg.df$ownHome <- factor(seg.df$ownHome, labels=c("ownNo", "ownYes"))
seg.df$gender <- factor(seg.df$gender, labels=c("Female", "Male"))
seg.df$subscribe <- factor(seg.df$subscribe, labels=c("subNo", "subYes"))
# check the data and confirm it
summary(seg.df)