Information About R

On this page, we provide some resources for using R.

There are a lot of decisions that go in to how to process accelerometer data. The goal of this repository is not to address all of these decisions. However, we provide this imperfect example code to demonstrate how one may implement one of the models from the repository in to a more comprehensive analysis.

#-------------------------------------------------------------------------#

#make sure that all data sets and models are in the same folder

#set working directory (tell R where to look for the files)

setwd("C:/Users/cleve/Dropbox/test/counts")

#-------------------------------------------------------------------------#

#install and load the following packages:

tryCatch(library(nnet),error=function(e){install.packages("nnet");library(nnet)})

tryCatch(library(dplyr),error=function(e){install.packages("dplyr");library(dplyr)})

tryCatch(library(AGread),error=function(e){install.packages("AGread");library(AGread)})

tryCatch(library(lubridate),error=function(e){install.packages("lubridate");library(lubridate)})

tryCatch(library(accelerometry),error=function(e){install.packages("accelerometry");library(accelerometry)})

tryCatch(library(data.table),error=function(e){install.packages("data.table");library(data.table)})

#-------------------------------------------------------------------------#

#load model of choice

load("hip_count.RData")

#-------------------------------------------------------------------------#

#make a list of all csv files

filenames<-tools::file_path_sans_ext((list.files(pattern="*.csv")))

#-------------------------------------------------------------------------#

#define a function to read each file and predict EE using the loaded model

run=function(x)

{

filepath<-paste0(x,".csv")

data<-AGread::read_AG_counts(filepath,skip=11) #read in ActiGraph csv

data$VM<-sqrt(data$Axis1^2 + data$Axis2^2 + data$Axis3^2) #calculate vector magnitude

#exclude non-wear as 20 min of conintuous 0's in the vertical axis

wearvector<-accelerometry::weartime(counts=data$Axis1, window = 1200, nci = FALSE,days_distinct = FALSE, units_day = 86400)

wear<-as.numeric(as.list(wearvector))

data$wear<-wear

#keep only wear time

data<-subset(data,wear=="1")

#change "15 sec" to desired output epoch

data$Timestamp15 <- lubridate::floor_date(data$Timestamp, "15 sec")

#calculate features of interest of 15-s epoch-these are some examples

data <- data %>%

group_by(Timestamp15) %>%

summarise_at(vars("VM","Axis1","Axis2","Axis3"),

list(mean=mean,min=min,max=max,median=median,

var=var,sd=sd)) %>%

#renaming features used in chosen model to match model's required input

rename(MeanCounts_RH_y=Axis1_mean,

MeanCounts_RH_x=Axis2_mean,

MeanCounts_RH_z=Axis3_mean,

VarCounts_RH_y=Axis1_var,

VarCounts_RH_x=Axis2_var,

VarCounts_RH_z=Axis3_var)

#predict using chosen model

datapred<-predict(nnet_hip_count,data)

datacombined<-cbind(data,datapred)

datacombined$file<-x

return(datacombined)

}

#-------------------------------------------------------------------------#

#apply function to all files and merge in to one dataframe

alldata<-lapply(filenames, run) %>% bind_rows()

#-------------------------------------------------------------------------#

#identify if sufficient wear time (8 hrs/day, 4 days, 1 weekend)

#first need a few descriptive variables

alldata$dayofweek<-lubridate::wday(alldata$Timestamp)

alldata$hour<-lubridate::hour(alldata$Timestamp)

alldata$dayofyear<-lubridate::yday(alldata$Timestamp)

alldata$weekend<-as.numeric(ifelse(alldata$dayofweek == 0 | alldata$dayofweek == 7, "1", "0"))

#group data by day and id number and count the number of epochs of wear per day

hourfilter<-alldata %>%

group_by(dayofyear,file)%>%

add_count(dayofyear,file)%>%

summarise_all(max)

#identify days where less than 8 hours of wear so can later remove

#one line per day per participant only for days with at least 8 wear hours

enoughhours<-subset(hourfilter,n>=1920)

insufficienthours<-subset(hourfilter,n<1920)

#add count of number of days per participant

#one line per participant

dayfilter<-enoughhours %>%

group_by(file)%>%

add_count(file)%>%

summarise_all(max)

#identify participants with less than 4 days so can later be removed

#one line per participant, only for those with at least 4 days

enoughdays<-subset(dayfilter,n>=4)

#see if anyone is missing a weekend day

noweekend<-subset(dayfilter,weekend==0)

#go back to epoch-level data#

#remove days with insufficient hours from participants who remain in sample

setDT(alldata)

setDT(insufficienthours)

keys<-c("file","dayofyear")

setkeyv(alldata,keys)

setkeyv(insufficienthours,keys)

alldata[, keep := 'Yes'][insufficienthours, keep := 'No']

datacuthours<-subset(alldata,keep=="Yes")

#remove participants with insufficient number of days

#so this is people with enough hrs/day and enough days

datacut<-datacuthours[datacuthours$file %in% enoughdays$file,]

#remove people who didn't have a weekend day

datacutfinal<-datacut[!datacut$file %in% noweekend$file,]

#-------------------------------------------------------------------------#

#want to get time spent in each activity intensity

#classify each epoch of predicted METS as sed/light,mod,or vig

cutpoints=c(-Inf,2.999,6,Inf)

cutpointlabels=c("1","2","3")

datacutfinal$actclass<-cut(datacutfinal$datapred,breaks=cutpoints,labels=cutpointlabels)

#count number of epochs in each activity intensity for each participant

actclass<-as.data.frame(cbind((xtabs(~file+actclass, data=datacutfinal)),xtabs(~file, data=datacutfinal)))

names(actclass)<-c("sedlight","mod","vig","wt")

#calculate percent of wear time in each intensity

actclass$sedlightperc<-((actclass$sedlight/actclass$wt)*100)

actclass$modperc<-((actclass$mod/actclass$wt)*100)

actclass$vigperc<-((actclass$vig/actclass$wt)*100)

actclass$wthr<-(actclass$wt*15)/3600

#-------------------------------------------------------------------------#

I'm new to R...

Often the best way to learn is to have a task you want to accomplish and learn how to do that first (such as applying a model from this repository). Google and Stack Overflow have answers to most questions but below are some example resources to get you started. Feel free to reach out if you are having trouble implementing a model in the repository.

R Cookbook

R Bloggers

The swirl package

YaRrr! Pirate's Guide to R

What if I have many files or participants?

The code provided with the models in the repository often calculate energy expenditure or physical activity intensity at the epoch-level for one participant or data file. You likely have multiple participants or data files for which you need to use these models. Here is example code for how to easily run code on multiple files:

#make sure that all data sets and models are in the same folder

#set working directory (tell R where to look for the files)

setwd("C:/Users/cleve/Dropbox/test")

#install and load the following packages:

tryCatch(library(nnet),error=function(e){install.packages("nnet");library(nnet)})

tryCatch(library(dplyr),error=function(e){install.packages("dplyr");library(dplyr)})

#load model of choice

load("RH.RData")

#make a list of all csv files

filenames<-list.files(pattern="*.csv")

#define a function to read each file and predict using the loaded model

run=function(x)

{

dataorig<-read.csv(x)

datapred<-predict(nnet3_RH_005,read.csv(x))

data<-cbind(dataorig,datapred)

data$file<-x

return(data)

}

#apply function to files and merge in to one dataframe

alldata<-lapply(filenames, run) %>% bind_rows()

How do I calculate features?

Features can be extracted in some software programs (such as ActiLife). You can also calculate features in R using code like in this example:

#-------------------------------------------------------------------------#

#make sure that all data sets and models are in the same folder

#set working directory (tell R where to look for the files)

setwd("C:/Users/cleve/Dropbox/test/")

#-------------------------------------------------------------------------#

#install and load the following packages:

tryCatch(library(nnet),error=function(e){install.packages("nnet");library(nnet)})

tryCatch(library(dplyr),error=function(e){install.packages("dplyr");library(dplyr)})

tryCatch(library(tidyr),error=function(e){install.packages("tidyr");library(tidyr)})

tryCatch(library(purrr),error=function(e){install.packages("purrr");library(purrr)})

tryCatch(library(AGread),error=function(e){install.packages("AGread");library(AGread)})

#-------------------------------------------------------------------------#

#----------------------------------------------------------------------#

#define a function to read each file and predict using the loaded model

#this example is for raw ActiGraph data but a similar function could be made

#for other file types

run=function(x)

{

data<-AGread::read_AG_raw(x,return_raw=TRUE, skip=10)

data$Timestamp<-lubridate::ymd_hms(data$Time)

#change "5 sec" to desired output epoch

data$Timestamp5 <- lubridate::floor_date(data$Timestamp, "5 sec")

#calculate features of interest-these are some examples

data$vm<-NA

data$vm<-sqrt(data$Accelerometer_X^2+data$Accelerometer_Y^2+data$Accelerometer_Z^2)

data$AbsAccelerometer_X<-abs(data$Accelerometer_X)

data$AbsAccelerometer_Y<-abs(data$Accelerometer_Y)

data$AbsAccelerometer_Z<-abs(data$Accelerometer_Z)

data <- data %>%

group_by(Timestamp5) %>%

summarise_at(vars("vm","Accelerometer_X","Accelerometer_Y","Accelerometer_Z"),

list(mean=mean,min=min,max=max,median=median,

var=var,sd=sd,p10=~quantile(., probs = 0.10),

p25=~quantile(., probs = 0.25),p50=~quantile(., probs = 0.50),

p75=~quantile(., probs = 0.75),p90=~quantile(., probs = 0.90))) %>%

return(data)

}

data<-run("sample_data.csv")

What about non-wear time?

There are several existing R functions for determining non-wear time. Below are some examples; read the vignettes for more information.

PhysicalActivity::wearingMarking

wearingMarking(dataset,frame = 90,perMinuteCts = 60,TS = getOption("pa.timeStamp"),cts = getOption("pa.cts"),streamFrame = NULL,allowanceFrame = 2,newcolname = "wearing",getMinuteMarking = FALSE,dayStart = "00:00:00",tz = "UTC")

accelerometry::weartime

weartime(counts, window = 60L, tol = 0L, tol_upper = 99L, nci = FALSE,days_distinct = FALSE, units_day = 1440L)

GGIR::g.shell.GGIR

g.shell.GGIR(datadir="filepath/to/accelerometer/files/",

outputdir="filepath/to/save/location/of/output",(windowsizes = c(5,900,3600)))

What about minimum wear time requirements?

There are multiple ways to determine if particpant's have enough valid days of wear. Here is one example:

#an example if you have a file with only wear times and need to keep only participants who have sufficient wear time

#variables like dayofyear and weekend were creating using lubridate package

#group data by day and id number and count the number of hours of wear per day

hourfilter<-data %>%

group_by(dayofyear,id)%>%

add_count(dayofyear,id)%>%

summarise_all(max)

#identify days where less than 480 observations (8 hours) so can later remove

#one line per day per participant only for days with at least 8 wear hours

enoughhours<-subset(hourfilter,n>=480)

insufficienthours<-subset(hourfilter,n<480)

#add count of number of days per participant

#one line per participant

dayfilter<-enoughhours %>%

group_by(id)%>%

add_count(id)%>%

summarise_all(max)

#identify participants with less than 4 days so can later be removed#

#one line per participant, only for those with at least 4 days

enoughdays<-subset(dayfilter,n>=4)

#see if anyone is missing a weekend day

noweekend<-subset(dayfiltercut,weekend==0)

#go back to epoch-level data#

#remove days with insufficient hours from participants who remain in sample

setDT(data)

setDT(insufficienthours)

keys<-c("id","dayofyear")

setkeyv(data,keys)

setkeyv(insufficienthours,keys)

data[, keep := 'Yes'][insufficienthours, keep := 'No']

datacuthours<-subset(data,keep=="Yes")

#remove participants with insufficient number of days

#so this is people with enough hrs/day and enough days

datacut<-datacuthours[datacuthours$id %in% enoughdays$id,]

#remove people who didn't have a weekend day

datacutfinal<-datacut[!datacut$id %in% noweekend$id,]

What R packages are available for analyzing accelerometer data?

Follow these links to read the vignettes:

Missing something? Email or fill out the form below