Jason Liao's R page

Read csv files:

library("readr") #much better reader of CSV format

d1 = read_csv(".csv")

Read SAS datasets

library("haven")

d1 = read_sas("iris.sas7bdat")

First record for each patientID

d1 = by(d1, d1$patientID, head, n=1)

d1 = do.call("rbind", as.list(d1))

Put a certain level of a character variable as the reference factor level

#yy is a factor

levels(yy) = c("Usual", "Nic", "Oxi")) #"Usual" is now the reference level

Prints out p-value for package lme4

library("lmerTest") #it replaces the lmer function in

model1 = lmer(y ~ week+rand_assignment+week:rand_assignment + (1|part_id), data=ddd)

print(summary(model1))

print(anova(model1))

Boxplot that automatically prints y and x label

my.boxplot = function(x.name, y.name, data) #x and y are the character variable names

{

x = data[, x.name]

y = data[, y.name]

boxplot(y~x, xlab=x.name, ylab=y.name)

}

The following code does not work with R markdown

library("ggplot2")

qplot(x, y, data=d.master, geom=c("boxplot"))

From continuous to categorical

cut(x, breaks=c(10,20,30))

Linear regression with partial R2

fit = ols(y~x+Ventilation, data=d1)

print(fit)

plt <- plot(anova(fit), what='partial R2')

cat("Partial R2", "\n")

print(plt)

Summary statistics separately for each level of a variable

my.by.describe = function(d0) #the first column is the index variable

{

name.index = names(d0)[1]

INDICES = as.character(d0[,1])

xx = unique(INDICES)

for(j in 1:length(xx))

{

cat("################################", "\n")

if(is.na(xx[j])) next

cat("\n", "Start of stratum ", name.index, "=", as.character(xx[j]), "\n")

sub.d = subset(d0[, -1], INDICES == xx[j])

print(prettyR::describe(sub.d, num.desc=c("mean","median", "sd","min", "max", "valid.n")))

cat("\n", "End of stratum ", as.character(xx[j]), "\n")

}

compute the mean separately for each level of a variable

library("plyr")

d5 = ddply(.data=d1, .(part_id, week, day.order, cigarette_number), summarize,

puff_number = max.rm(single_puff_number),

puff_volume = sum.rm(puff_volume))

Multiple comparisons of contrasts

library("multcomp")

model2 = glht(model1, linfct = mcp(cell.line = "Tukey"))

summary(model2)

Ordinal logistic regression with random effects

library("ordinal")

model1 = clmm(BP~steroid+(1|pt_ID), weights=days, data=d1.month1)

Dose-response curve

library("drc")

model1 = drm(DHA ~ dose, data = d1, fct = LL.4(names=c("Slope","Lower Limit","Upper Limit", "ED50")))

http://rstats4ag.org/dose-response/

Survival analysis

library("survival")

library("survminer")

model1 = survfit( Surv(time=Survival_in_Months, event=(Vital_Status=="0 Dead"))~1, data=d1)

#plot(model1, xlab="months", ylab="survival probability")

print(ggsurvplot(model1))

cat("survival probability for individual time points", "\n")

print(summary(model1))

####################################################

cat("\n", "testing of statistical difference between different decade of initial diagnosis", "\n")

print(survdiff( Surv(time=Survival_in_Months, event=(Vital_Status=="0 Dead"))~Decade_Initial_Diag, data=d1))

d1$Decade_Initial_Diag = as.numeric(as.character(d1$Decade_Initial_Diag))

cat("\n", "Cox proportional model on decade of initial diagnosis", "\n")

print(coxph( Surv(time=Survival_in_Months, event=(Vital_Status=="0 Dead"))~Decade_Initial_Diag, data=d1))

####################################################

Constructing a factor variable

factor(x, levels=c(0,1), labels=c("female", "male")

Heat map

library("gplots")

heatmap.2(dd, dendrogram="row", Colv=FALSE, xlab="samples", margins=c(7,10))

#dd is a numerical matrix with rownames and colnames

Tutorial for building a simple R package

https://hilaryparker.com/2014/04/29/writing-an-r-package-from-scratch/

Google Sites

Report abuse