Information About R
On this page, we provide some resources for using R.
There are a lot of decisions that go in to how to process accelerometer data. The goal of this repository is not to address all of these decisions. However, we provide this imperfect example code to demonstrate how one may implement one of the models from the repository in to a more comprehensive analysis.
#-------------------------------------------------------------------------#
#make sure that all data sets and models are in the same folder
#set working directory (tell R where to look for the files)
setwd("C:/Users/cleve/Dropbox/test/counts")
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#install and load the following packages:
tryCatch(library(nnet),error=function(e){install.packages("nnet");library(nnet)})
tryCatch(library(dplyr),error=function(e){install.packages("dplyr");library(dplyr)})
tryCatch(library(AGread),error=function(e){install.packages("AGread");library(AGread)})
tryCatch(library(lubridate),error=function(e){install.packages("lubridate");library(lubridate)})
tryCatch(library(accelerometry),error=function(e){install.packages("accelerometry");library(accelerometry)})
tryCatch(library(data.table),error=function(e){install.packages("data.table");library(data.table)})
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#load model of choice
load("hip_count.RData")
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#make a list of all csv files
filenames<-tools::file_path_sans_ext((list.files(pattern="*.csv")))
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#define a function to read each file and predict EE using the loaded model
run=function(x)
{
filepath<-paste0(x,".csv")
data<-AGread::read_AG_counts(filepath,skip=11) #read in ActiGraph csv
data$VM<-sqrt(data$Axis1^2 + data$Axis2^2 + data$Axis3^2) #calculate vector magnitude
#exclude non-wear as 20 min of conintuous 0's in the vertical axis
wearvector<-accelerometry::weartime(counts=data$Axis1, window = 1200, nci = FALSE,days_distinct = FALSE, units_day = 86400)
wear<-as.numeric(as.list(wearvector))
data$wear<-wear
#keep only wear time
data<-subset(data,wear=="1")
#change "15 sec" to desired output epoch
data$Timestamp15 <- lubridate::floor_date(data$Timestamp, "15 sec")
#calculate features of interest of 15-s epoch-these are some examples
data <- data %>%
group_by(Timestamp15) %>%
summarise_at(vars("VM","Axis1","Axis2","Axis3"),
list(mean=mean,min=min,max=max,median=median,
var=var,sd=sd)) %>%
#renaming features used in chosen model to match model's required input
rename(MeanCounts_RH_y=Axis1_mean,
MeanCounts_RH_x=Axis2_mean,
MeanCounts_RH_z=Axis3_mean,
VarCounts_RH_y=Axis1_var,
VarCounts_RH_x=Axis2_var,
VarCounts_RH_z=Axis3_var)
#predict using chosen model
datapred<-predict(nnet_hip_count,data)
datacombined<-cbind(data,datapred)
datacombined$file<-x
return(datacombined)
}
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#apply function to all files and merge in to one dataframe
alldata<-lapply(filenames, run) %>% bind_rows()
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#identify if sufficient wear time (8 hrs/day, 4 days, 1 weekend)
#first need a few descriptive variables
alldata$dayofweek<-lubridate::wday(alldata$Timestamp)
alldata$hour<-lubridate::hour(alldata$Timestamp)
alldata$dayofyear<-lubridate::yday(alldata$Timestamp)
alldata$weekend<-as.numeric(ifelse(alldata$dayofweek == 0 | alldata$dayofweek == 7, "1", "0"))
#group data by day and id number and count the number of epochs of wear per day
hourfilter<-alldata %>%
group_by(dayofyear,file)%>%
add_count(dayofyear,file)%>%
summarise_all(max)
#identify days where less than 8 hours of wear so can later remove
#one line per day per participant only for days with at least 8 wear hours
enoughhours<-subset(hourfilter,n>=1920)
insufficienthours<-subset(hourfilter,n<1920)
#add count of number of days per participant
#one line per participant
dayfilter<-enoughhours %>%
group_by(file)%>%
add_count(file)%>%
summarise_all(max)
#identify participants with less than 4 days so can later be removed
#one line per participant, only for those with at least 4 days
enoughdays<-subset(dayfilter,n>=4)
#see if anyone is missing a weekend day
noweekend<-subset(dayfilter,weekend==0)
#go back to epoch-level data#
#remove days with insufficient hours from participants who remain in sample
setDT(alldata)
setDT(insufficienthours)
keys<-c("file","dayofyear")
setkeyv(alldata,keys)
setkeyv(insufficienthours,keys)
alldata[, keep := 'Yes'][insufficienthours, keep := 'No']
datacuthours<-subset(alldata,keep=="Yes")
#remove participants with insufficient number of days
#so this is people with enough hrs/day and enough days
datacut<-datacuthours[datacuthours$file %in% enoughdays$file,]
#remove people who didn't have a weekend day
datacutfinal<-datacut[!datacut$file %in% noweekend$file,]
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#want to get time spent in each activity intensity
#classify each epoch of predicted METS as sed/light,mod,or vig
cutpoints=c(-Inf,2.999,6,Inf)
cutpointlabels=c("1","2","3")
datacutfinal$actclass<-cut(datacutfinal$datapred,breaks=cutpoints,labels=cutpointlabels)
#count number of epochs in each activity intensity for each participant
actclass<-as.data.frame(cbind((xtabs(~file+actclass, data=datacutfinal)),xtabs(~file, data=datacutfinal)))
names(actclass)<-c("sedlight","mod","vig","wt")
#calculate percent of wear time in each intensity
actclass$sedlightperc<-((actclass$sedlight/actclass$wt)*100)
actclass$modperc<-((actclass$mod/actclass$wt)*100)
actclass$vigperc<-((actclass$vig/actclass$wt)*100)
actclass$wthr<-(actclass$wt*15)/3600
#-------------------------------------------------------------------------#
I'm new to R...
Often the best way to learn is to have a task you want to accomplish and learn how to do that first (such as applying a model from this repository). Google and Stack Overflow have answers to most questions but below are some example resources to get you started. Feel free to reach out if you are having trouble implementing a model in the repository.
What if I have many files or participants?
The code provided with the models in the repository often calculate energy expenditure or physical activity intensity at the epoch-level for one participant or data file. You likely have multiple participants or data files for which you need to use these models. Here is example code for how to easily run code on multiple files:
#make sure that all data sets and models are in the same folder
#set working directory (tell R where to look for the files)
setwd("C:/Users/cleve/Dropbox/test")
#install and load the following packages:
tryCatch(library(nnet),error=function(e){install.packages("nnet");library(nnet)})
tryCatch(library(dplyr),error=function(e){install.packages("dplyr");library(dplyr)})
#load model of choice
load("RH.RData")
#make a list of all csv files
filenames<-list.files(pattern="*.csv")
#define a function to read each file and predict using the loaded model
run=function(x)
{
dataorig<-read.csv(x)
datapred<-predict(nnet3_RH_005,read.csv(x))
data<-cbind(dataorig,datapred)
data$file<-x
return(data)
}
#apply function to files and merge in to one dataframe
alldata<-lapply(filenames, run) %>% bind_rows()
How do I calculate features?
Features can be extracted in some software programs (such as ActiLife). You can also calculate features in R using code like in this example:
#-------------------------------------------------------------------------#
#make sure that all data sets and models are in the same folder
#set working directory (tell R where to look for the files)
setwd("C:/Users/cleve/Dropbox/test/")
#-------------------------------------------------------------------------#
#-------------------------------------------------------------------------#
#install and load the following packages:
tryCatch(library(nnet),error=function(e){install.packages("nnet");library(nnet)})
tryCatch(library(dplyr),error=function(e){install.packages("dplyr");library(dplyr)})
tryCatch(library(tidyr),error=function(e){install.packages("tidyr");library(tidyr)})
tryCatch(library(purrr),error=function(e){install.packages("purrr");library(purrr)})
tryCatch(library(AGread),error=function(e){install.packages("AGread");library(AGread)})
#-------------------------------------------------------------------------#
#----------------------------------------------------------------------#
#define a function to read each file and predict using the loaded model
#this example is for raw ActiGraph data but a similar function could be made
#for other file types
run=function(x)
{
data<-AGread::read_AG_raw(x,return_raw=TRUE, skip=10)
data$Timestamp<-lubridate::ymd_hms(data$Time)
#change "5 sec" to desired output epoch
data$Timestamp5 <- lubridate::floor_date(data$Timestamp, "5 sec")
#calculate features of interest-these are some examples
data$vm<-NA
data$vm<-sqrt(data$Accelerometer_X^2+data$Accelerometer_Y^2+data$Accelerometer_Z^2)
data$AbsAccelerometer_X<-abs(data$Accelerometer_X)
data$AbsAccelerometer_Y<-abs(data$Accelerometer_Y)
data$AbsAccelerometer_Z<-abs(data$Accelerometer_Z)
data <- data %>%
group_by(Timestamp5) %>%
summarise_at(vars("vm","Accelerometer_X","Accelerometer_Y","Accelerometer_Z"),
list(mean=mean,min=min,max=max,median=median,
var=var,sd=sd,p10=~quantile(., probs = 0.10),
p25=~quantile(., probs = 0.25),p50=~quantile(., probs = 0.50),
p75=~quantile(., probs = 0.75),p90=~quantile(., probs = 0.90))) %>%
return(data)
}
data<-run("sample_data.csv")
What about non-wear time?
There are several existing R functions for determining non-wear time. Below are some examples; read the vignettes for more information.
PhysicalActivity::wearingMarking
wearingMarking(dataset,frame = 90,perMinuteCts = 60,TS = getOption("pa.timeStamp"),cts = getOption("pa.cts"),streamFrame = NULL,allowanceFrame = 2,newcolname = "wearing",getMinuteMarking = FALSE,dayStart = "00:00:00",tz = "UTC")
weartime(counts, window = 60L, tol = 0L, tol_upper = 99L, nci = FALSE,days_distinct = FALSE, units_day = 1440L)
g.shell.GGIR(datadir="filepath/to/accelerometer/files/",
outputdir="filepath/to/save/location/of/output",(windowsizes = c(5,900,3600)))
What about minimum wear time requirements?
There are multiple ways to determine if particpant's have enough valid days of wear. Here is one example:
#an example if you have a file with only wear times and need to keep only participants who have sufficient wear time
#variables like dayofyear and weekend were creating using lubridate package
#group data by day and id number and count the number of hours of wear per day
hourfilter<-data %>%
group_by(dayofyear,id)%>%
add_count(dayofyear,id)%>%
summarise_all(max)
#identify days where less than 480 observations (8 hours) so can later remove
#one line per day per participant only for days with at least 8 wear hours
enoughhours<-subset(hourfilter,n>=480)
insufficienthours<-subset(hourfilter,n<480)
#add count of number of days per participant
#one line per participant
dayfilter<-enoughhours %>%
group_by(id)%>%
add_count(id)%>%
summarise_all(max)
#identify participants with less than 4 days so can later be removed#
#one line per participant, only for those with at least 4 days
enoughdays<-subset(dayfilter,n>=4)
#see if anyone is missing a weekend day
noweekend<-subset(dayfiltercut,weekend==0)
#go back to epoch-level data#
#remove days with insufficient hours from participants who remain in sample
setDT(data)
setDT(insufficienthours)
keys<-c("id","dayofyear")
setkeyv(data,keys)
setkeyv(insufficienthours,keys)
data[, keep := 'Yes'][insufficienthours, keep := 'No']
datacuthours<-subset(data,keep=="Yes")
#remove participants with insufficient number of days
#so this is people with enough hrs/day and enough days
datacut<-datacuthours[datacuthours$id %in% enoughdays$id,]
#remove people who didn't have a weekend day
datacutfinal<-datacut[!datacut$id %in% noweekend$id,]
What R packages are available for analyzing accelerometer data?
Follow these links to read the vignettes:
Missing something? Email or fill out the form below