Read csv files:
library("readr") #much better reader of CSV format
d1 = read_csv(".csv")
Read SAS datasets
library("haven")
d1 = read_sas("iris.sas7bdat")
First record for each patientID
d1 = by(d1, d1$patientID, head, n=1)
d1 = do.call("rbind", as.list(d1))
Put a certain level of a character variable as the reference factor level
#yy is a factor
levels(yy) = c("Usual", "Nic", "Oxi")) #"Usual" is now the reference level
Prints out p-value for package lme4
library("lmerTest") #it replaces the lmer function in
model1 = lmer(y ~ week+rand_assignment+week:rand_assignment + (1|part_id), data=ddd)
print(summary(model1))
print(anova(model1))
Boxplot that automatically prints y and x label
my.boxplot = function(x.name, y.name, data) #x and y are the character variable names
{
x = data[, x.name]
y = data[, y.name]
boxplot(y~x, xlab=x.name, ylab=y.name)
}
The following code does not work with R markdown
library("ggplot2")
qplot(x, y, data=d.master, geom=c("boxplot"))
From continuous to categorical
cut(x, breaks=c(10,20,30))
Linear regression with partial R2
fit = ols(y~x+Ventilation, data=d1)
print(fit)
plt <- plot(anova(fit), what='partial R2')
cat("Partial R2", "\n")
print(plt)
Summary statistics separately for each level of a variable
my.by.describe = function(d0) #the first column is the index variable
{
name.index = names(d0)[1]
INDICES = as.character(d0[,1])
xx = unique(INDICES)
for(j in 1:length(xx))
{
cat("################################", "\n")
if(is.na(xx[j])) next
cat("\n", "Start of stratum ", name.index, "=", as.character(xx[j]), "\n")
sub.d = subset(d0[, -1], INDICES == xx[j])
print(prettyR::describe(sub.d, num.desc=c("mean","median", "sd","min", "max", "valid.n")))
cat("\n", "End of stratum ", as.character(xx[j]), "\n")
}
}
compute the mean separately for each level of a variable
library("plyr")
d5 = ddply(.data=d1, .(part_id, week, day.order, cigarette_number), summarize,
puff_number = max.rm(single_puff_number),
puff_volume = sum.rm(puff_volume))
Multiple comparisons of contrasts
library("multcomp")
model2 = glht(model1, linfct = mcp(cell.line = "Tukey"))
summary(model2)
Ordinal logistic regression with random effects
library("ordinal")
model1 = clmm(BP~steroid+(1|pt_ID), weights=days, data=d1.month1)
Dose-response curve
library("drc")
model1 = drm(DHA ~ dose, data = d1, fct = LL.4(names=c("Slope","Lower Limit","Upper Limit", "ED50")))
http://rstats4ag.org/dose-response/
Survival analysis
library("survival")
library("survminer")
model1 = survfit( Surv(time=Survival_in_Months, event=(Vital_Status=="0 Dead"))~1, data=d1)
#plot(model1, xlab="months", ylab="survival probability")
print(ggsurvplot(model1))
cat("survival probability for individual time points", "\n")
print(summary(model1))
####################################################
cat("\n", "testing of statistical difference between different decade of initial diagnosis", "\n")
print(survdiff( Surv(time=Survival_in_Months, event=(Vital_Status=="0 Dead"))~Decade_Initial_Diag, data=d1))
d1$Decade_Initial_Diag = as.numeric(as.character(d1$Decade_Initial_Diag))
cat("\n", "Cox proportional model on decade of initial diagnosis", "\n")
print(coxph( Surv(time=Survival_in_Months, event=(Vital_Status=="0 Dead"))~Decade_Initial_Diag, data=d1))
####################################################
Constructing a factor variable
factor(x, levels=c(0,1), labels=c("female", "male")
Heat map
library("gplots")
heatmap.2(dd, dendrogram="row", Colv=FALSE, xlab="samples", margins=c(7,10))
#dd is a numerical matrix with rownames and colnames
Tutorial for building a simple R package
https://hilaryparker.com/2014/04/29/writing-an-r-package-from-scratch/