#!/bin/bash
#SBATCH --time=15:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --account=usubio-kp
#SBATCH --partition=usubio-kp
#SBATCH --job-name=bwa
#SBATCH --mail-type=FAIL
#SBATCH --mail-user=tara.saley@gmail.com
module load R
cd /scratch/general/lustre/u6015714/tcs_evogenass
R CMD BATCH source.R
explanatory<-matrix(scan("/uufs/chpc.utah.edu/common/home/gompert-group1/projects/mtrunc_mel/tara_class/data/pntest_mtrunc_pruned.txt",n=94*5648772,sep=""),nrow=5648772,ncol=94,byrow=TRUE)
response<-read.table("/uufs/chpc.utah.edu/common/home/gompert-group1/projects/mtrunc_mel/tara_class/data/hdr_sub_pheno_plant_cat.txt", header=TRUE)
library(lars)
L<- dim(explanatory)[1]
output<-vector("list", 3000)
subset<-vector("list", 3000)
for(i in 1:3000){
snpsample<-sample(1:L,size=20000,replace=FALSE)
subset[[i]]<-snpsample
sub_explan<-explanatory[snpsample,]
t_sub_explan<-t(sub_explan)
larsample<-lars(t_sub_explan, response[,2], type="lasso",use.Gram=FALSE)
signif.coefs<-function(larsample, threshold=0.001){
coefs<-coef(larsample)
signif<-which(abs(coefs[nrow(coefs),])>threshold)
return(setNames(coefs[nrow(coefs),signif],signif))
}
output[[i]]<-signif.coefs(larsample)
}
saveRDS(subset,"leaflengthsubset.RDS")
saveRDS(output,"leaflengthoutput.RDS")