Rtips

## R functions

## N.Mizumoto

## 19/02/20 updated

####################

###### System ######

####################

## Check R version

sessionInfo()

## Update R

library(installr)

updateR()

## object list

objects()

######################################

###### Random number generation ######

######################################

## Uniform distribution (0-1)

runif(1000,0,1)

## wrapped Cauchy distribution

library("CircStats")

N = 1000; Mu = 0; Rho = 0.8;

rwrpcauchy(N, Mu, Rho)

## Power law distribution

# r: uniform distribution(0-1), a: exponent (1<a<=3)

PL <- function(r,a,xmin){

return(xmin*(1-r)^(-1/(a-1)))

}

## Stretched exponention

# r (0-1)

SE <- function(r, lambda, beta, xmin){

return( (xmin^beta - 1/lambda*log(1-r) )^(1/beta))

}

## Truncated Pareto

# r: 0-1, a: exponent(1<a<=3)

TPareto <- function(r,a,xmin,xmax){

return(xmin*((1-r)*(1-(xmax/xmin)^(1-a))+(xmax/xmin)^(1-a))^(1/(1-a)))

}

## Truncated power-law

# r: 0-1

TP <- function(r,myu,xmin,xmax){

return( ( xmax^(1-myu) - (1-r)*(xmax^(1-myu)-xmin^(1-myu) ) )^(1/(1-myu)) )

}

############################

###### Color pallet ########

############################

# 砂漠化

cols = colorRampPalette(c("#dec67c","#b3b963","#99a256" ,"#789351","#133e27"))

# 白を挟んだ青から赤 (色薄め)

cols = colorRampPalette(c("#2166AC", "#4393C3", "#92C5DE", "#FFFFFF", "#F4A582", "#D6604D", "#B2182B"))

# 白を挟んだ青から赤 (色濃いめ) 使用例: RTV, Homosexual(Anim Behav), Perce search

cols = colorRampPalette(c("#053061", "#2166AC", "#4393C3", "#92C5DE", "#FFFFFF", "#F4A582", "#D6604D", "#B2182B", "#67001F"))

# 限りなく黒に近いグレーの例

grey2 <- "#545454"

# 透過色 (Transparent color)

rgb(0, 0, 0, alpha=touka)

rgb(0/255, 0/255, 0/255, alpha=touka/255, maxColorValue=255)

############################

###### データ読み書き ######

############################

## fileの場所設定して読み込み

setwd("E:\\Dropbox\\research\\")

Folder.path <- getwd()

f.namesplace <- list.files(Folder.path, pattern=".csv",full.names=T)

## 高速読み込み用関数（大容量データ向け）

library(data.table)

d <- data.frame(as.matrix(fread(data.path, header=T))) # matrixに変換してからdata.frame化すると使いやすい

## Output table

write.table(res2, "E:\\res2.txt", quote=F, sep=",", row.names=F)

## Output PDF

pdf(paste(data.path,".pdf",sep=""))

plot()

dev.off()

##########################

###### ベクトル操作 ######

##########################

## ベクトル内の重複した値を取り除く（数字要素が何があるかわかる）

data <- c(0,0,0,0,1,1,1,1,2,2,3,3,4)

unique(data)

# [1] 0 1 2 3 4

## 欠損値を無視して計算する

x <- c(1, 2, 3, 4, NA, NaN)

sum(x, na.rm = TRUE)

## 論理型ベクトルの中身の確認

x <- c(TRUE, TRUE, FALSE)

any(x) # どれか１つでもTRUEか？

# [1] TRUE

all(x) # 全てTRUEか？

# [1] FALSE

## T/F ベクトルのTの塊のラベル付け

tandem_label <- rep(0,L)

count <- 1

for(i in 1:L){

if(tandem[i]){

tandem_label[i] <- count

} else if(i>1&&tandem[i-1]) {

count <- count + 1

}

## 分割表の作成, Generating Contingency table

xtabs(~accb0+accb1, data=d)

## 累積和の計算 calcurate Cumulative sum

cumsum()

############################

###### マトリクス操作 ######

############################

## rowを反転（image.plotの際の上下反転に対応）

dhis <- t(apply(dhis,1,rev))

##########################

###### 文字列操作 ########

##########################

## 一部の文字パターンを置換

gsub(".csv", "-ready.csv", f.namesplace[i])

## ファイル読み込み一例（ある文字列を含むデータのみ探す）

setwd("E:\\Dropbox\\research\\")

Folder.path <- getwd()

f.namesplace <- list.files(Folder.path, pattern=".csv",full.names=T)

data.path <- f.namesplace[regexpr("ramuda1000_",f.namesplace)>0]

## 文字列の一部を切り抜き

substr(f.names, 0, 8) # from 0 to 8

#####################

###### plot #########

#####################

## 軸を0から10までにきっちりする

plot(x,y, xlim=c(0,10)m ylim=c(0,10), xaxs = "i", yaxs = "i")

## abline

abline(a, b) # aは切片, bは傾き

abline(h = y) # y = h

abline(v = x) # x = v

## 棒グラフの軸のつけ方

Fig <- barplot(Means, beside=T)

arrows(Fig, Means, Fig, Means+SE)

## barplot with two different factors

DataMean <- tapply(ds$PC1, ds[,2:3], mean)

DataSe <- tapply(ds$PC1, ds[,2:3], se)

fugo <- DataMean / abs(DataMean)

XonFig <- barplot(DataMean, beside=T, ylim=c(-8,4))

arrows(XonFig , DataMean, XonFig , DataMean+ fugo*DataSe, angle=90, length=0.1)

## Heatmap with dendrogram

library(gplots)

heatmap.2(as.matrix(scale(d)), Rowv = T, Colv=T, distfun = dist, hclustfun = function(d) hclust(d, method="ward.D")

, dendrogram = c("row"), trace="none", col=cols)

## 3次元散布図

library(scatterplot3d)s

scatterplot3d(x,y,z, type="o")

## 円を描く

theta <- seq(-pi, pi, length=100)

points(cos(theta)*50+50, sin(theta)*50+50, type="l")

## 2D histrgram-1

library(gplot)

h2 <- hist2d(cbind(x,y), nbins=c(40,40), xlim=c(-20,20), ylim=c(-20,20), col=cols1(10))

## 2D histrgram-2

library(hexbin)

h <- hexbin(d[,1:2])

plot(h)

#######################

###### ggplot #########

#######################

library(ggplot2)

library(gridExtra)

#### main

p <- ggplot(data=df, aes(x=x,y=y,colour=as.factor(ind))))

#### plot type

## scatter plot

+ geom_point()

geom_jitter() #ばらつかせたいとき

## barplot + errorbar

geom_bar(stat="identity", width = 1, col="black", bg="black")

geom_errorbar(aes(ymin=BackCountMean-BackCountMeanSE, ymax=BackCountMean+BackCountMeanSE), width=0.1)

## trajectories

+ geom_path()

## 2d histgram

p + stat_bin2d(bins=c(30,15)) + scale_fill_gradientn(colours=cols1(10))

## smoothing

stat_smooth(se=T)

## heatmap

geom_tile(aes(fill = sleeptime), color="black") + scale_fill_gradientn(name="Feeding time (min)", colours = (cols(20)))

## multiple density plot

library(ggridges)

geom_density_ridges(fill=2, stat = "binline", binwidth=1, alpha=0.2, draw_baseline = T)

## abline

# vertical line

p + geom_vline(xintercept = 0, color = "black", size=1.5)

# horizontal line

p + geom_hline(yintercept = 0, color = "black", size=0.5)

## survival curve

library("survminer")

library(survival)

df<-survfit(Surv(fronttime,cens)~species, type = "kaplan-meier", data=d_front)

ggsurvplot(fit = df, data = Res,

pval = F, pval.method = TRUE,

risk.table = F, conf.int = FALSE,

ncensor.plot = FALSE, size = 1, linetype = 1:3,

legend.title = "Species", legend.lab=c("Hetero","Paraneo","Reticuli"),

title="Spending time at the front",

xlab="Time (sec)", ggtheme = theme_bw() + theme(aspect.ratio = 0.75))

#### fecet

## divide into multiple figures

p + facet_grid(hour~colony + rep)

# change the label

week_labs <- c("1-20", "21-40", "41-60", "61-80", "81-100")

names(week_labs) <- c(0:4)

p + facet_grid(weekday ~ week, labeller = labeller(week=week_labs))

# facet_wrapで列行指定

p + facet_wrap(~period

# facet labelの編集

facet_wrap(~hour, nrow=6, ncol=3, labeller = label_both) # combining the name of the grouping variable with group levels

+ theme(strip.background = element_rect(colour="#00000000", fill="#00000000")) # 全て透明に

#### design

## tits of axis

p + scale_y_continuous(breaks=c(0,60))

## title

p + ggtitle("Reticuli")

## black frame, white background and grey grid

p + theme_bw()

## lengthen tick marks

p + theme(axis.ticks = element_line(colour = "black"), axis.ticks.length = unit(0.5, "cm"))

## adjust aspect

p + coord_fixed()

## aspect ratio in graph panel

p + theme(aspect.ratio=1)

## 全ての凡例を消す

p + theme(legend.position = 'none')

## legend の位置

theme(legend.position = "top", aspect.ratio=2)

#### 軸関連 axis

## limit

p + xlim(-220, 220) + ylim(-120, 40)

## x軸とy軸を入れ替える

p + coord_flip()

## 軸をなくす

p + theme(axis.title.x = element_blank())

## 軸のtick labelの変更

scale_y_discrete(breaks=c("0","10","20","30","40"), labels=c("0-10", "10-20", "20-30", "30-40", "40-50"))

## 軸の範囲の余白をなくす

scale_x_continuous(expand = c(0, 0))

## 軸の数字を逆順にする

scale_y_reverse(expand = c(0, 0))

#### save data

ggsave("181023_Density.png")

ggsave("BackDis.pdf", width = 8, height = 4)

#######################

##### Calcuration #####

#######################

# 10の桁で切り上げ

roundUp <- function(x) 10^ceiling(log10(x))

# 角度の計算

angle_cal <- function(X, Y, L){

Ax <- (X[3:L2-1] - X[3:L2-2])

Bx <- (X[3:L2] - X[3:L2-1])

Ay <- (Y[3:L2-1] - Y[3:L2-2])

By <- (Y[3:L2] - Y[3:L2-1])

hugo <- (Ax * By - Ay * Bx + 0.000001)/abs(Ax * By - Ay * Bx + 0.000001)

cos <- round((Ax * Bx + Ay * By) / ((Ax^2 + Ay^2)*(Bx^2 + By^2))^0.5,14)

return(acos(cos)*hugo)

}

# atan

atan(y/x) # -pi/2 ~ pi/2

atan2(x,y) # -pi ~ pi

###################################

###### Statistical analysis #######

###################################

##### Index #####

## Standard Error

se <- function(x){

y <- x[!is.na(x)] # remove the missing values

sqrt(var(as.vector(y))/length(y))

}

## Morishita's Idelta Indexの計算

Idelta <- function(x){

return(length(x)*sum(x*(x-1)) / ( sum(x) * (sum(x)-1) ))

}

## Gini coeficient

install.packages("ineq")

library(ineq)

ineq(x, type="Gini") # x: vector with values of each individual

plot(Lc(dd[1,])) # write a Lorenz curve

##### Distribution test #####

## 正規性の検定

shapiro.test(x)

## ガンマ分布

ge <- fitdistr(d[,i], "gamma", list(shape = 1, rate = 0.1), lower = 0.001)

ks.test(x, "pgamma", ge$estimate[1], ge$estimate[2], alternative = "two.sided")

##### Cumulative Distribution Function (CDF) #####

## Truncated power-law

TP_CDF <- function(myu, x, xmin, xmax){

(xmin^(1-myu)-x^(1-myu)) / (xmin^(1-myu)-xmax^(1-myu))

}

## Stretched Exponential

SE_CDF <- function(lambda, beta, x, xmin){

1-exp(-lambda*(x^beta-xmin^beta))

}

##### Log Likelihood fuction #####

## Truncated Power-Law

# param = myu

TP_LLF <- function(param, data, xmin, xmax){

length(data)*(log(param-1)-log(xmin^(1-param)-xmax^(1-param))) - param*sum(log(data))

}

## Stretched exponential

# (param[1] = Beta, param[2]= lambda)

SE_LLF <- function(param, data, xmin){

length(data)*log(param[1])+length(data)*log(param[2])+(param[1]-1)*sum(log(data))-param[2]*sum(data^param[1]-xmin^param[1])

}

## Truncated Power-Law (for binned dataset)

# param = myu

TP_bin_LLF <- function(param, data, xmin, xmax){

j = 1:(max(data)/0.2)

dj <- j

for(i in j){

dj[i] = sum(round(data*5) == i)

}

return(-length(data)*log(xmin^(1-param)-xmax^(1-param)) +sum(dj*log( (xmin+(j-1)*0.2)^(1-param) - (xmin+j*0.2)^(1-param) )))

}

## Stretched exponential (for binned dataset)

# (param[1] = Beta, param[2]= lambda)

SE_bin_LLF <- function(param, data, xmin){

j = 1:(max(data)/0.2)

dj <- j

for(i in 1:length(j)){

dj[i] = sum(round(data*5) == i)

}

return( length(data)*param[2]*xmin^param[1] + sum( dj*log( exp(-param[2]*(xmin+(j-1)*0.2)^param[1]) - exp(-param[2]*(xmin+j*0.2)^param[1]) ) ) )

}

###### Maximum Likelihood Estimation (MLE) #####

## Exponential

Exp_MLE <- function(x, xmin){

return(length(x)*(sum(x-xmin)^(-1)))

}

## Power-Law

PL_MLE <- function(x, xmin){

return(1+length(x)*(sum(log(x/xmin)))^(-1))

}

## Stretched exponential (using optim function)

optim(c(0.1,0.1), SE_LLF, data=Stop_sec, control = list(fnscale = -1), method="Nelder-Mead")

## Truncated Power-Law (using optimize function)

optimize(TP_LLF, interval=c(0,6), data=Move_sec, xmin=min_sec, xmax=max_sec_M, maximum=T)

##### Non parametric #####

## Exact Binomial Test (二項検定)

binom.test(c(a,b), p=0.5, alternative="two.side")

## Chi-squared test (カイ二乗検定)

chisq.test(c(a,b,c,d), p=c(4,3,2,1)/10)

chisq.test(matrix(c(20, 15, 16, 9), ncol=2, byrow=T))

## Fisher's Exact Test

fisher.test(xd) # default

fisher.test(xd, workspace=1000000) # indicate workspace (for large table)

fisher.test(xd,simulate.p.value=TRUE,B=1e7) # monte carlo simulation (for very large table)

## ウィルコクソンの順位和検定 (2群間に対応の無い場合)

library(exactRankTests)

wilcox.exact(x = vx, y = vy, paired=F)

wilcox.exact(dis ~ sex, data=d, paired=F)

#※ wilcox.testはタイがあるときには正確な値を返さない

## ウィルコクソンの符号順位検定 (2群間に対応がある場合)

library(exactRankTests)

wilcox.exact(x = vx, y = vy, paired=T)

wilcox.exact(dis ~ sex, data=d, paired=F)

#※ wilcox.testはタイがあるときには正確な値を返さない

## ケンドールの順位相関係数

cor.test(x1, x2, method="kendall")

## クラスカルウォリス検定

d <- list(x, y, z)

kruskal.test(d)

# Post-hoc (事後検定) (Nemenyi test, AnovaでのTukeyに似ている)

library(PMCMR)

posthoc.kruskal.nemenyi.test(d)

## Friedman test (対応あるクラスカルウォリス検定)

# dataは表形式

friedman.test(search_ind_mean)

# Post-hoc (事後検定) (Nemenyi test, AnovaでのTukeyに似ている)

library(PMCMR)

posthoc.friedman.nemenyi.test((search_ind_mean))

## Scheirer-Ray-Hare test (2標本のクラスカルウォリスに対応)

## Ref: Sokal, R.R. and F.J. Rohlf. 1995. Biometry. 3rd ed. W.H. Freeman, New York. pp146-147

library(rcompanion)

ds <- data.frame(Rank_PC1, caste, colony)

scheirerRayHare(Rank_PC1~caste*colony, data=ds)

## 生存時間解析 (Survival time analysis)

library(survival)

ge.sf<-survfit(Surv(time,cens)~treat, type = "kaplan-meier", data=gehan)

# ces: 打ち切り(censoring)か否か。 0:打ち切り, 1:完全データ

summary(ge.sf)

plot(ge.sf, lty=1:2, mark.t=T, conf.int=F) # +は打ち切り

survdiff(Surv(time,cens)~treat, rho=1, data=gehan)

# 引数 rho=0にするとログ・ランク検定、rho=1にするとGehan-Wilcoxon検定を行う。

# デフォルトはrho=0になっている。

## Cox Proportional-Hazards Model

ge.sf <- coxph(Surv(time,cens)~treat+sex, type = "kaplan-meier", data=gehan)

Anova(ge.sf)

multicomparison<-glht(ge.sf,linfct=mcp(time="Tukey"))

summary(multicomparison)

##### Parametric #####

## 1標本t検定

t.test(x, mu=0.5, alternative="two.sided")

## GLM: summaryのbaseになるfactorの変更 Change base of factorical data in summary

sex <- rep(c("M","F"), each=10) # e.g.) sex is the factorical data

sex <- factor(sex, levels=c("M", "F")) # M is base

sex <- factor(sex, levels=c("F", "M")) # F is base

## GLMM (idはcolonyにnestされている)

r <- glmer(search~time+sex+(1|colony/id)+(1|spend),family=binomial(link="logit"), data=LDd[LDd$spend>24,])

## GLMM後の多重比較

library(multcomp)

multicomparison<-glht(r,linfct=mcp(time="Tukey"))

# d$treat <-relevel(d$treat, ref="Nega")

# multicomparison<-glht(r,linfct=mcp(time="Dunnett"))

summary(multicomparison)

## GLM(binomial)でロジスティック曲線を書く

r <- glm(~~~)

logistic <- function(x){ 1/(1+exp(-x)) } #普通のロジスティック曲線を作る

Est <- summary(r)$coefficient[,1]#係数の予測値を取り出す

xdata <- seq(min(x), max(x),length.out=120) #xの範囲を出力

ydata <- logistic(Est[1]+Est[2]*xdata) #yの値を計算

lines(xdata, ydata)#線を上書き

##### Multivariate analysis #####

## Principal Component Analysis

result <- prcomp(d[,5:13], scale=TRUE)

biplot(result)

summary(result)

# Kaiser-Meyer-Olkinのサンプリング適切性基準（KMO, MSA）

kmo <- function(x)

{

x <- subset(x, complete.cases(x)) # Remove the cases with any missing value

r <- cor(x) # Correlation matrix

r2 <- r^2 # Squared correlation coefficients

i <- solve(r) # Inverse matrix of correlation matrix

d <- diag(i) # Diagonal elements of inverse matrix

p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients

diag(r2) <- diag(p2) <- 0 # Delete diagonal elements

KMO <- sum(r2)/(sum(r2)+sum(p2))

MSA <- colSums(r2)/(colSums(r2)+colSums(p2))

return(list(KMO=KMO, MSA=MSA))

}

####################

#### Others #####

################

library(rabi)

brute_IDs(3, redundancy=2, alphabet=5, num.tries = 10, available.colors = c("y","g","p","o","b"))

Google Sites

Report abuse