# What are some trends in smart device usage?
# How could these trends apply to Bellabeat customers?
# How could these trends help influence Bellabeat marketing strategy
#FitBit Fitness Tracker Data (Dataset made available through Mobius on Kaggle).
#Thirty eligible Fitbit users consented to the submission of personal tracker data.
#About this data: It contains a total of 18 datasets, with various records on
participants’ activity and fitness data.
#I will be using R for this project.
# Importing Dataset
# Import datasets to view and analyze the data.
Activity <- read.csv("dailyActivity_merged.csv"")
daily_calories <- read_csv("dailyCalories_merged.csv")
daily_Intensities <- read_csv("dailyIntensities_merged.csv")
daily_Steps <- read_csv("dailySteps_merged.csv")
heart_ratesec <- read_csv("heartrate_seconds_merged.csv")
minute_MET <- read_csv("minuteMETsNarrow_merged.csv")
daily_sleep <- read_csv("sleepDay_merged.csv")
weight_log <- read_csv("weightLogInfo_merged.csv")
glimpse(daily_activity)
# Id <dbl> 1503960366, 1503960366, 1503960366, 1503960366, 1503960366, 1503960366, 150396036…
# ActivityDate <chr> "4/12/2016", "4/13/2016", "4/14/2016", "4/15/2016", "4/16/2016", "4/17/2016", "4/…
# TotalSteps <dbl> 13162, 10735, 10460, 9762, 12669, 9705, 13019, 15506, 10544, 9819, 12764, 14371, …
# TotalDistance <dbl> 8.50, 6.97, 6.74, 6.28, 8.16, 6.48, 8.59, 9.88, 6.68, 6.34, 8.13, 9.04, 6.41, 9.8…
# TrackerDistance <dbl> 8.50, 6.97, 6.74, 6.28, 8.16, 6.48, 8.59, 9.88, 6.68, 6.34, 8.13, 9.04, 6.41, 9.8…
# LoggedActivitiesDistance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
# VeryActiveDistance <dbl> 1.88, 1.57, 2.44, 2.14, 2.71, 3.19, 3.25, 3.53, 1.96, 1.34, 4.76, 2.81, 2.92, 5.2…
# ModeratelyActiveDistance <dbl> 0.55, 0.69, 0.40, 1.26, 0.41, 0.78, 0.64, 1.32, 0.48, 0.35, 1.12, 0.87, 0.21, 0.5…
# LightActiveDistance <dbl> 6.06, 4.71, 3.91, 2.83, 5.04, 2.51, 4.71, 5.03, 4.24, 4.65, 2.24, 5.36, 3.28, 3.9…
# SedentaryActiveDistance <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
# VeryActiveMinutes <dbl> 25, 21, 30, 29, 36, 38, 42, 50, 28, 19, 66, 41, 39, 73, 31, 78, 48, 16, 52, 33, 4…
# FairlyActiveMinutes <dbl> 13, 19, 11, 34, 10, 20, 16, 31, 12, 8, 27, 21, 5, 14, 23, 11, 28, 12, 34, 35, 15,…
# LightlyActiveMinutes <dbl> 328, 217, 181, 209, 221, 164, 233, 264, 205, 211, 130, 262, 238, 216, 279, 243, 1…
# SedentaryMinutes <dbl> 728, 776, 1218, 726, 773, 539, 1149, 775, 818, 838, 1217, 732, 709, 814, 833, 110…
# Calories <dbl> 1985, 1797, 1776, 1745, 1863, 1728, 1921, 2035, 1786, 1775, 1827, 1949, 1788, 201…
#Daily_activity contains most of the important tracker data including total steps,
calories, and distance information.
glimpse(heart_ratesec)
Rows: 2,483,658
Columns: 3
$ Id <dbl> 2022484408, 2022484408, 2022484408, 2022484408, 2022484408, 2022484408, 2022484408, 2022484408, 2022…
$ Time <chr> "4/12/2016 7:21:00 AM", "4/12/2016 7:21:05 AM", "4/12/2016 7:21:10 AM", "4/12/2016 7:21:20 AM", "4/1…
$ Value <dbl> 97, 102, 105, 103, 101, 95, 91, 93, 94, 93, 92, 89, 83, 61, 60, 61, 61, 57, 54, 55, 58, 60, 59, 57, …
glimpse(daily_sleep)
Rows: 413
Columns: 5
$ Id <dbl> 1503960366, 1503960366, 1503960366, 1503960366, 1503960366, 1503960366, 1503960366, 150…
$ SleepDay <chr> "4/12/2016 12:00:00 AM", "4/13/2016 12:00:00 AM", "4/15/2016 12:00:00 AM", "4/16/2016 1…
$ TotalSleepRecords <dbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ TotalMinutesAsleep <dbl> 327, 384, 412, 340, 700, 304, 360, 325, 361, 430, 277, 245, 366, 341, 404, 369, 277, 27…
$ TotalTimeInBed <dbl> 346, 407, 442, 367, 712, 320, 377, 364, 384, 449, 323, 274, 393, 354, 425, 396, 309, 29…
glimpse(weight_log)
Rows: 67
Columns: 8
$ Id <dbl> 1503960366, 1503960366, 1927972279, 2873212765, 2873212765, 4319703577, 4319703577, 4558609…
$ Date <chr> "5/2/2016 11:59:59 PM", "5/3/2016 11:59:59 PM", "4/13/2016 1:08:52 AM", "4/21/2016 11:59:59…
$ WeightKg <dbl> 52.6, 52.6, 133.5, 56.7, 57.3, 72.4, 72.3, 69.7, 70.3, 69.9, 69.2, 69.1, 90.7, 62.5, 62.1, …
$ WeightPounds <dbl> 115.9631, 115.9631, 294.3171, 125.0021, 126.3249, 159.6147, 159.3942, 153.6622, 154.9850, 1…
$ Fat <dbl> 22, NA, NA, NA, NA, 25, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ BMI <dbl> 22.65, 22.65, 47.54, 21.45, 21.69, 27.45, 27.38, 27.25, 27.46, 27.32, 27.04, 27.00, 28.00, …
$ IsManualReport <lgl> TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,…
$ LogId <dbl> 1.462234e+12, 1.462320e+12, 1.460510e+12, 1.461283e+12, 1.463098e+12, 1.460938e+12, 1.46240…
# Check distinct Id
n_distinct(daily_activity)
[1] 940
n_distinct(daily_activity$Id)
[1] 33
n_distinct(heart_ratesec)
[1] 2483658
n_distinct(heart_ratesec$Id)
[1] 14
n_distinct(daily_sleep)
[1] 410
n_distinct(daily_sleep$Id)
[1] 24
n_distinct(weight_log)
[1] 67
n_distinct(weight_log$Id)
[1] 8
#Daily Activity data contains 940 records of 33 different participants regarding their daily activities.
#Heart Rate data contains 2,483,658 records of 14 different participants on their heart rate.
#Daily Sleep data contains 410 records of 24 different participants on their daily sleep information.
#Weight Log data contains 67 records of 8 different participants on their daily weight record.
#A big problem about this data are the low entries on the tracker measurements. 14 and 8 participants
are significantly low numbers to make recommendations.
# ACTIVITY
daily_activity %>%
select(TotalSteps,
TotalDistance,
SedentaryMinutes, Calories) %>%
summary()
TotalSteps TotalDistance SedentaryMinutes Calories
Min. : 0 Min. : 0.000 Min. : 0.0 Min. : 0
1st Qu.: 3790 1st Qu.: 2.620 1st Qu.: 729.8 1st Qu.:1828
Median : 7406 Median : 5.245 Median :1057.5 Median :2134
Mean : 7638 Mean : 5.490 Mean : 991.2 Mean :2304
3rd Qu.:10727 3rd Qu.: 7.713 3rd Qu.:1229.5 3rd Qu.:2793
Max. :36019 Max. :28.030 Max. :1440.0 Max. :4900
# INTENSITIES
daily_Intensities %>%
select(VeryActiveMinutes, FairlyActiveMinutes, LightlyActiveMinutes, SedentaryMinutes) %>%
summary()
VeryActiveMinutes FairlyActiveMinutes LightlyActiveMinutes SedentaryMinutes
Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.:127.0 1st Qu.: 729.8
Median : 4.00 Median : 6.00 Median :199.0 Median :1057.5
Mean : 21.16 Mean : 13.56 Mean :192.8 Mean : 991.2
3rd Qu.: 32.00 3rd Qu.: 19.00 3rd Qu.:264.0 3rd Qu.:1229.5
Max. :210.00 Max. :143.00 Max. :518.0 Max. :1440.0
# CALORIES
daily_calories %>%
select(Calories) %>%
summary()
Calories
Min. : 0
1st Qu.:1828
Median :2134
Mean :2304
3rd Qu.:2793
Max. :4900
# SLEEP
daily_sleep %>%
select(TotalSleepRecords, TotalMinutesAsleep, TotalTimeInBed) %>%
summary()
TotalSleepRecords TotalMinutesAsleep TotalTimeInBed
Min. :1.000 Min. : 58.0 Min. : 61.0
1st Qu.:1.000 1st Qu.:361.0 1st Qu.:403.0
Median :1.000 Median :433.0 Median :463.0
Mean :1.119 Mean :419.5 Mean :458.6
3rd Qu.:1.000 3rd Qu.:490.0 3rd Qu.:526.0
Max. :3.000 Max. :796.0 Max. :961.0
# WEIGHT
weight_log %>%
select(WeightKg, Fat) %>%
summary()
WeightKg Fat
Min. : 52.60 Min. :22.00
1st Qu.: 61.40 1st Qu.:22.75
Median : 62.50 Median :23.50
Mean : 72.04 Mean :23.50
3rd Qu.: 85.05 3rd Qu.:24.25
Max. :133.50 Max. :25.00
NA's :65
# FINDINGS FROM THIS ANALYSIS:
#The sedimentary time from the participants is too high. A good marketing
strategy could reduce this.
#Average total steps per day is 7,638. According to CDC, it is recommended to
reach 10,000 steps per day. A 2020 study found that participants who took 8,000
steps per day had 51% lower risk of dying by any cause.
#Average sleep time of participants is 7 hours.
# Visualizations
# Relationship between Steps and Sedentary Time.
ggplot(data=daily_activity, aes(x=TotalSteps, y= SedentaryMinutes))+
geom_point()+geom_smooth()+labs(title="Total Steps vs Sedentary Minutes")
#There is a negative correlation between the two. The more sedimentary time you have, the less steps you're taking during the day.
#This data can be used for the next marketing strategy so that more
users will be more active and have less sedimentary time.
# Relationship between Minutes Asleep and Time in Bed.
ggplot(data=daily_sleep, aes(x=TotalMinutesAsleep, y=TotalTimeInBed))+
geom_point()+geom_smooth()+labs(title= "Minutes Asleep vs Time In Bed")
#As we can see on the plot. Its almost completely a linear trend. To help users
improve their sleep, Bellabeat should add a notification feature that lets users
know its time for bed.
# Relationship between Steps and Calories.
ggplot(data=daily_activity, aes(x= TotalSteps y= Calories))+
geom_point()+geom_smooth()+labs(title= "Total Steps vs Calories")
#The more steps we take and how active we are will result in more burned calories.
#It dates back to 2016. Technology has evolved and habits of people might have changed.
#Relatively small sample size.
#Based on limited data, participants were most likely to complete activity tracker data but not all participants record their progress.
#Since the sedimentary time is high, Bellabeat should add a vibrate function if
the user has been sitting or idle for too long. Signaling the user to get active.
#Obtain more data for an accurate analysis.
#Bellabeat should consider doing a point award incentive system where hitting
daily fitness goals can earn users points for products or membership.