Deliverable For Analysis Phase
DESCRIPTIVE ANALYSIS
# Descriptive analysis on ride_length (all figures in seconds)
mean(all_trips_V2$ride_length) #straight average (total ride length / rides)
median(all_trips_V2$ride_length) #midpoint number in the ascending array of ride lengths
max(all_trips_V2$ride_length) #longest ride
min(all_trips_V2$ride_length) #shortest ride
# You can condense the four lines above to one line using summary() on the specific attribute
summary(all_trips_V2$ride_length)
# Compare members and casual users
aggregate(all_trips_V2$ride_length ~ all_trips_V2$member_casual, FUN = mean)
aggregate(all_trips_V2$ride_length ~ all_trips_V2$member_casual, FUN = median)
aggregate(all_trips_V2$ride_length ~ all_trips_V2$member_casual, FUN = max)
aggregate(all_trips_V2$ride_length ~ all_trips_V2$member_casual, FUN = min)
# See the average ride time by each day for members vs casual users
# Notice that the days of the week are out of order. Let's fix that.
all_trips_V2$day_of_week <- ordered(all_trips_V2$day_of_week, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))
# Now, let's run the average ride time by each day for members vs casual users
aggregate(all_trips_V2$ride_length ~ all_trips_V2$member_casual + all_trips_V2$day_of_week, FUN = mean)
# analyze ridership data by type and weekday
#creates weekday field using wday()
#groups by usertype and weekday
#calculates the number of rides and average duration
# calculates the average duration
all_trips_V2 %>%
mutate(weekday = wday(started_at, label = TRUE)) %>%
group_by(member_casual, weekday) %>%
summarise(number_of_rides = n()
,average_duration = mean(ride_length)) %>%
arrange(member_casual, weekday)