## R functions
## N.Mizumoto
## 19/02/20 updated
####################
###### System ######
####################
## Check R version
sessionInfo()
## Update R
library(installr)
updateR()
## object list
objects()
######################################
###### Random number generation ######
######################################
## Uniform distribution (0-1)
runif(1000,0,1)
## wrapped Cauchy distribution
library("CircStats")
N = 1000; Mu = 0; Rho = 0.8;
rwrpcauchy(N, Mu, Rho)
## Power law distribution
# r: uniform distribution(0-1), a: exponent (1<a<=3)
PL <- function(r,a,xmin){
return(xmin*(1-r)^(-1/(a-1)))
}
## Stretched exponention
# r (0-1)
SE <- function(r, lambda, beta, xmin){
return( (xmin^beta - 1/lambda*log(1-r) )^(1/beta))
}
## Truncated Pareto
# r: 0-1, a: exponent(1<a<=3)
TPareto <- function(r,a,xmin,xmax){
return(xmin*((1-r)*(1-(xmax/xmin)^(1-a))+(xmax/xmin)^(1-a))^(1/(1-a)))
}
## Truncated power-law
# r: 0-1
TP <- function(r,myu,xmin,xmax){
return( ( xmax^(1-myu) - (1-r)*(xmax^(1-myu)-xmin^(1-myu) ) )^(1/(1-myu)) )
}
############################
###### Color pallet ########
############################
# 砂漠化
cols = colorRampPalette(c("#dec67c","#b3b963","#99a256" ,"#789351","#133e27"))
# 白を挟んだ青から赤 (色薄め)
cols = colorRampPalette(c("#2166AC", "#4393C3", "#92C5DE", "#FFFFFF", "#F4A582", "#D6604D", "#B2182B"))
# 白を挟んだ青から赤 (色濃いめ) 使用例: RTV, Homosexual(Anim Behav), Perce search
cols = colorRampPalette(c("#053061", "#2166AC", "#4393C3", "#92C5DE", "#FFFFFF", "#F4A582", "#D6604D", "#B2182B", "#67001F"))
# 限りなく黒に近いグレーの例
grey2 <- "#545454"
# 透過色 (Transparent color)
rgb(0, 0, 0, alpha=touka)
rgb(0/255, 0/255, 0/255, alpha=touka/255, maxColorValue=255)
############################
###### データ読み書き ######
############################
## fileの場所設定して読み込み
setwd("E:\\Dropbox\\research\\")
Folder.path <- getwd()
f.namesplace <- list.files(Folder.path, pattern=".csv",full.names=T)
## 高速読み込み用関数(大容量データ向け)
library(data.table)
d <- data.frame(as.matrix(fread(data.path, header=T))) # matrixに変換してからdata.frame化すると使いやすい
## Output table
write.table(res2, "E:\\res2.txt", quote=F, sep=",", row.names=F)
## Output PDF
pdf(paste(data.path,".pdf",sep=""))
plot()
dev.off()
##########################
###### ベクトル操作 ######
##########################
## ベクトル内の重複した値を取り除く(数字要素が何があるかわかる)
data <- c(0,0,0,0,1,1,1,1,2,2,3,3,4)
unique(data)
# [1] 0 1 2 3 4
## 欠損値を無視して計算する
x <- c(1, 2, 3, 4, NA, NaN)
sum(x, na.rm = TRUE)
## 論理型ベクトルの中身の確認
x <- c(TRUE, TRUE, FALSE)
any(x) # どれか1つでもTRUEか?
# [1] TRUE
all(x) # 全てTRUEか?
# [1] FALSE
## T/F ベクトルのTの塊のラベル付け
tandem_label <- rep(0,L)
count <- 1
for(i in 1:L){
if(tandem[i]){
tandem_label[i] <- count
} else if(i>1&&tandem[i-1]) {
count <- count + 1
}
}
## 分割表の作成, Generating Contingency table
xtabs(~accb0+accb1, data=d)
## 累積和の計算 calcurate Cumulative sum
cumsum()
############################
###### マトリクス操作 ######
############################
## rowを反転(image.plotの際の上下反転に対応)
dhis <- t(apply(dhis,1,rev))
##########################
###### 文字列操作 ########
##########################
## 一部の文字パターンを置換
gsub(".csv", "-ready.csv", f.namesplace[i])
## ファイル読み込み一例 (ある文字列を含むデータのみ探す)
setwd("E:\\Dropbox\\research\\")
Folder.path <- getwd()
f.namesplace <- list.files(Folder.path, pattern=".csv",full.names=T)
data.path <- f.namesplace[regexpr("ramuda1000_",f.namesplace)>0]
## 文字列の一部を切り抜き
substr(f.names, 0, 8) # from 0 to 8
#####################
###### plot #########
#####################
## 軸を0から10までにきっちりする
plot(x,y, xlim=c(0,10)m ylim=c(0,10), xaxs = "i", yaxs = "i")
## abline
abline(a, b) # aは切片, bは傾き
abline(h = y) # y = h
abline(v = x) # x = v
## 棒グラフの軸のつけ方
Fig <- barplot(Means, beside=T)
arrows(Fig, Means, Fig, Means+SE)
## barplot with two different factors
DataMean <- tapply(ds$PC1, ds[,2:3], mean)
DataSe <- tapply(ds$PC1, ds[,2:3], se)
fugo <- DataMean / abs(DataMean)
XonFig <- barplot(DataMean, beside=T, ylim=c(-8,4))
arrows(XonFig , DataMean, XonFig , DataMean+ fugo*DataSe, angle=90, length=0.1)
## Heatmap with dendrogram
library(gplots)
heatmap.2(as.matrix(scale(d)), Rowv = T, Colv=T, distfun = dist, hclustfun = function(d) hclust(d, method="ward.D")
, dendrogram = c("row"), trace="none", col=cols)
## 3次元散布図
library(scatterplot3d)s
scatterplot3d(x,y,z, type="o")
## 円を描く
theta <- seq(-pi, pi, length=100)
points(cos(theta)*50+50, sin(theta)*50+50, type="l")
## 2D histrgram-1
library(gplot)
h2 <- hist2d(cbind(x,y), nbins=c(40,40), xlim=c(-20,20), ylim=c(-20,20), col=cols1(10))
## 2D histrgram-2
library(hexbin)
h <- hexbin(d[,1:2])
plot(h)
#######################
###### ggplot #########
#######################
library(ggplot2)
library(gridExtra)
#### main
p <- ggplot(data=df, aes(x=x,y=y,colour=as.factor(ind))))
#### plot type
## scatter plot
+ geom_point()
geom_jitter() #ばらつかせたいとき
## barplot + errorbar
geom_bar(stat="identity", width = 1, col="black", bg="black")
geom_errorbar(aes(ymin=BackCountMean-BackCountMeanSE, ymax=BackCountMean+BackCountMeanSE), width=0.1)
## trajectories
+ geom_path()
## 2d histgram
p + stat_bin2d(bins=c(30,15)) + scale_fill_gradientn(colours=cols1(10))
## smoothing
stat_smooth(se=T)
## heatmap
geom_tile(aes(fill = sleeptime), color="black") + scale_fill_gradientn(name="Feeding time (min)", colours = (cols(20)))
## multiple density plot
library(ggridges)
geom_density_ridges(fill=2, stat = "binline", binwidth=1, alpha=0.2, draw_baseline = T)
## abline
# vertical line
p + geom_vline(xintercept = 0, color = "black", size=1.5)
# horizontal line
p + geom_hline(yintercept = 0, color = "black", size=0.5)
## survival curve
library("survminer")
library(survival)
df<-survfit(Surv(fronttime,cens)~species, type = "kaplan-meier", data=d_front)
ggsurvplot(fit = df, data = Res,
pval = F, pval.method = TRUE,
risk.table = F, conf.int = FALSE,
ncensor.plot = FALSE, size = 1, linetype = 1:3,
legend.title = "Species", legend.lab=c("Hetero","Paraneo","Reticuli"),
title="Spending time at the front",
xlab="Time (sec)", ggtheme = theme_bw() + theme(aspect.ratio = 0.75))
#### fecet
## divide into multiple figures
p + facet_grid(hour~colony + rep)
# change the label
week_labs <- c("1-20", "21-40", "41-60", "61-80", "81-100")
names(week_labs) <- c(0:4)
p + facet_grid(weekday ~ week, labeller = labeller(week=week_labs))
# facet_wrapで列行指定
p + facet_wrap(~period
# facet labelの編集
facet_wrap(~hour, nrow=6, ncol=3, labeller = label_both) # combining the name of the grouping variable with group levels
+ theme(strip.background = element_rect(colour="#00000000", fill="#00000000")) # 全て透明に
#### design
## tits of axis
p + scale_y_continuous(breaks=c(0,60))
## title
p + ggtitle("Reticuli")
## black frame, white background and grey grid
p + theme_bw()
## lengthen tick marks
p + theme(axis.ticks = element_line(colour = "black"), axis.ticks.length = unit(0.5, "cm"))
## adjust aspect
p + coord_fixed()
## aspect ratio in graph panel
p + theme(aspect.ratio=1)
## 全ての凡例を消す
p + theme(legend.position = 'none')
## legend の位置
theme(legend.position = "top", aspect.ratio=2)
#### 軸関連 axis
## limit
p + xlim(-220, 220) + ylim(-120, 40)
## x軸とy軸を入れ替える
p + coord_flip()
## 軸をなくす
p + theme(axis.title.x = element_blank())
## 軸のtick labelの変更
scale_y_discrete(breaks=c("0","10","20","30","40"), labels=c("0-10", "10-20", "20-30", "30-40", "40-50"))
## 軸の範囲の余白をなくす
scale_x_continuous(expand = c(0, 0))
## 軸の数字を逆順にする
scale_y_reverse(expand = c(0, 0))
#### save data
ggsave("181023_Density.png")
ggsave("BackDis.pdf", width = 8, height = 4)
#######################
##### Calcuration #####
#######################
# 10の桁で切り上げ
roundUp <- function(x) 10^ceiling(log10(x))
# 角度の計算
angle_cal <- function(X, Y, L){
Ax <- (X[3:L2-1] - X[3:L2-2])
Bx <- (X[3:L2] - X[3:L2-1])
Ay <- (Y[3:L2-1] - Y[3:L2-2])
By <- (Y[3:L2] - Y[3:L2-1])
hugo <- (Ax * By - Ay * Bx + 0.000001)/abs(Ax * By - Ay * Bx + 0.000001)
cos <- round((Ax * Bx + Ay * By) / ((Ax^2 + Ay^2)*(Bx^2 + By^2))^0.5,14)
return(acos(cos)*hugo)
}
# atan
atan(y/x) # -pi/2 ~ pi/2
atan2(x,y) # -pi ~ pi
###################################
###### Statistical analysis #######
###################################
##### Index #####
## Standard Error
se <- function(x){
y <- x[!is.na(x)] # remove the missing values
sqrt(var(as.vector(y))/length(y))
}
## Morishita's Idelta Indexの計算
Idelta <- function(x){
return(length(x)*sum(x*(x-1)) / ( sum(x) * (sum(x)-1) ))
}
## Gini coeficient
install.packages("ineq")
library(ineq)
ineq(x, type="Gini") # x: vector with values of each individual
plot(Lc(dd[1,])) # write a Lorenz curve
##### Distribution test #####
## 正規性の検定
shapiro.test(x)
## ガンマ分布
ge <- fitdistr(d[,i], "gamma", list(shape = 1, rate = 0.1), lower = 0.001)
ks.test(x, "pgamma", ge$estimate[1], ge$estimate[2], alternative = "two.sided")
##### Cumulative Distribution Function (CDF) #####
## Truncated power-law
TP_CDF <- function(myu, x, xmin, xmax){
(xmin^(1-myu)-x^(1-myu)) / (xmin^(1-myu)-xmax^(1-myu))
}
## Stretched Exponential
SE_CDF <- function(lambda, beta, x, xmin){
1-exp(-lambda*(x^beta-xmin^beta))
}
##### Log Likelihood fuction #####
## Truncated Power-Law
# param = myu
TP_LLF <- function(param, data, xmin, xmax){
length(data)*(log(param-1)-log(xmin^(1-param)-xmax^(1-param))) - param*sum(log(data))
}
## Stretched exponential
# (param[1] = Beta, param[2]= lambda)
SE_LLF <- function(param, data, xmin){
length(data)*log(param[1])+length(data)*log(param[2])+(param[1]-1)*sum(log(data))-param[2]*sum(data^param[1]-xmin^param[1])
}
## Truncated Power-Law (for binned dataset)
# param = myu
TP_bin_LLF <- function(param, data, xmin, xmax){
j = 1:(max(data)/0.2)
dj <- j
for(i in j){
dj[i] = sum(round(data*5) == i)
}
return(-length(data)*log(xmin^(1-param)-xmax^(1-param)) +sum(dj*log( (xmin+(j-1)*0.2)^(1-param) - (xmin+j*0.2)^(1-param) )))
}
## Stretched exponential (for binned dataset)
# (param[1] = Beta, param[2]= lambda)
SE_bin_LLF <- function(param, data, xmin){
j = 1:(max(data)/0.2)
dj <- j
for(i in 1:length(j)){
dj[i] = sum(round(data*5) == i)
}
return( length(data)*param[2]*xmin^param[1] + sum( dj*log( exp(-param[2]*(xmin+(j-1)*0.2)^param[1]) - exp(-param[2]*(xmin+j*0.2)^param[1]) ) ) )
}
###### Maximum Likelihood Estimation (MLE) #####
## Exponential
Exp_MLE <- function(x, xmin){
return(length(x)*(sum(x-xmin)^(-1)))
}
## Power-Law
PL_MLE <- function(x, xmin){
return(1+length(x)*(sum(log(x/xmin)))^(-1))
}
## Stretched exponential (using optim function)
optim(c(0.1,0.1), SE_LLF, data=Stop_sec, control = list(fnscale = -1), method="Nelder-Mead")
## Truncated Power-Law (using optimize function)
optimize(TP_LLF, interval=c(0,6), data=Move_sec, xmin=min_sec, xmax=max_sec_M, maximum=T)
##### Non parametric #####
## Exact Binomial Test (二項検定)
binom.test(c(a,b), p=0.5, alternative="two.side")
## Chi-squared test (カイ二乗検定)
chisq.test(c(a,b,c,d), p=c(4,3,2,1)/10)
chisq.test(matrix(c(20, 15, 16, 9), ncol=2, byrow=T))
## Fisher's Exact Test
fisher.test(xd) # default
fisher.test(xd, workspace=1000000) # indicate workspace (for large table)
fisher.test(xd,simulate.p.value=TRUE,B=1e7) # monte carlo simulation (for very large table)
## ウィルコクソンの順位和検定 (2群間に対応の無い場合)
library(exactRankTests)
wilcox.exact(x = vx, y = vy, paired=F)
wilcox.exact(dis ~ sex, data=d, paired=F)
#※ wilcox.testはタイがあるときには正確な値を返さない
## ウィルコクソンの符号順位検定 (2群間に対応がある場合)
library(exactRankTests)
wilcox.exact(x = vx, y = vy, paired=T)
wilcox.exact(dis ~ sex, data=d, paired=F)
#※ wilcox.testはタイがあるときには正確な値を返さない
## ケンドールの順位相関係数
cor.test(x1, x2, method="kendall")
## クラスカルウォリス検定
d <- list(x, y, z)
kruskal.test(d)
# Post-hoc (事後検定) (Nemenyi test, AnovaでのTukeyに似ている)
library(PMCMR)
posthoc.kruskal.nemenyi.test(d)
## Friedman test (対応あるクラスカルウォリス検定)
# dataは表形式
friedman.test(search_ind_mean)
# Post-hoc (事後検定) (Nemenyi test, AnovaでのTukeyに似ている)
library(PMCMR)
posthoc.friedman.nemenyi.test((search_ind_mean))
## Scheirer-Ray-Hare test (2標本のクラスカルウォリスに対応)
## Ref: Sokal, R.R. and F.J. Rohlf. 1995. Biometry. 3rd ed. W.H. Freeman, New York. pp146-147
library(rcompanion)
ds <- data.frame(Rank_PC1, caste, colony)
scheirerRayHare(Rank_PC1~caste*colony, data=ds)
## 生存時間解析 (Survival time analysis)
library(survival)
ge.sf<-survfit(Surv(time,cens)~treat, type = "kaplan-meier", data=gehan)
# ces: 打ち切り(censoring)か否か。 0:打ち切り, 1:完全データ
summary(ge.sf)
plot(ge.sf, lty=1:2, mark.t=T, conf.int=F) # +は打ち切り
survdiff(Surv(time,cens)~treat, rho=1, data=gehan)
# 引数 rho=0にするとログ・ランク検定、rho=1にするとGehan-Wilcoxon検定を行う。
# デフォルトはrho=0になっている。
## Cox Proportional-Hazards Model
ge.sf <- coxph(Surv(time,cens)~treat+sex, type = "kaplan-meier", data=gehan)
Anova(ge.sf)
multicomparison<-glht(ge.sf,linfct=mcp(time="Tukey"))
summary(multicomparison)
##### Parametric #####
## 1標本t検定
t.test(x, mu=0.5, alternative="two.sided")
## GLM: summaryのbaseになるfactorの変更 Change base of factorical data in summary
sex <- rep(c("M","F"), each=10) # e.g.) sex is the factorical data
sex <- factor(sex, levels=c("M", "F")) # M is base
sex <- factor(sex, levels=c("F", "M")) # F is base
## GLMM (idはcolonyにnestされている)
r <- glmer(search~time+sex+(1|colony/id)+(1|spend),family=binomial(link="logit"), data=LDd[LDd$spend>24,])
f
## GLMM後の多重比較
library(multcomp)
multicomparison<-glht(r,linfct=mcp(time="Tukey"))
# d$treat <-relevel(d$treat, ref="Nega")
# multicomparison<-glht(r,linfct=mcp(time="Dunnett"))
summary(multicomparison)
## GLM(binomial)でロジスティック曲線を書く
r <- glm(~~~)
logistic <- function(x){ 1/(1+exp(-x)) } #普通のロジスティック曲線を作る
Est <- summary(r)$coefficient[,1]#係数の予測値を取り出す
xdata <- seq(min(x), max(x),length.out=120) #xの範囲を出力
ydata <- logistic(Est[1]+Est[2]*xdata) #yの値を計算
lines(xdata, ydata)#線を上書き
##### Multivariate analysis #####
## Principal Component Analysis
result <- prcomp(d[,5:13], scale=TRUE)
biplot(result)
summary(result)
# Kaiser-Meyer-Olkinのサンプリング適切性基準(KMO, MSA)
kmo <- function(x)
{
x <- subset(x, complete.cases(x)) # Remove the cases with any missing value
r <- cor(x) # Correlation matrix
r2 <- r^2 # Squared correlation coefficients
i <- solve(r) # Inverse matrix of correlation matrix
d <- diag(i) # Diagonal elements of inverse matrix
p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients
diag(r2) <- diag(p2) <- 0 # Delete diagonal elements
KMO <- sum(r2)/(sum(r2)+sum(p2))
MSA <- colSums(r2)/(colSums(r2)+colSums(p2))
return(list(KMO=KMO, MSA=MSA))
}
####################
#### Others #####
################
library(rabi)
brute_IDs(3, redundancy=2, alphabet=5, num.tries = 10, available.colors = c("y","g","p","o","b"))