Stats#101

# Stats 101

# 191230

# JMCabezas - jcabezas@umd.edu



# Data Analysis 101

rm(list=ls())



#setwd("~/Users/Folder") # for macOS users

#setwd("c:/temp/stats101") # for windowsOS users


data(Seatbelts)

seatbelts01 <- data.frame(Seatbelts)

seatbelts <- data.frame(Year=floor(time(Seatbelts)),

Month=factor(cycle(Seatbelts), labels=month.abb), Seatbelts)

names(seatbelts)

head(seatbelts)

tail(seatbelts)



# Descriptives

summary(seatbelts$DriversKilled)

summary(seatbelts$PetrolPrice)


install.packages("stargazer")

stargazer::stargazer(seatbelts, type="text")

stargazer::stargazer(seatbelts, type="text", omit.summary.stat=c("p25", "p75"), median=T)

stargazer::stargazer(seatbelts, omit.summary.stat=c("p25", "p75"), median=T, out="descriptives.html")



# Central Tendency Means


# Mean

N <- length(seatbelts$DriversKilled)

N

sum(seatbelts$DriversKilled)/N

round(sum(seatbelts$DriversKilled)/N, 2)


# Mode

hist(seatbelts$DriversKilled)


# Median

min(seatbelts$DriversKilled)

max(seatbelts$DriversKilled)

max(seatbelts$DriversKilled) - min(seatbelts$DriversKilled)

median(seatbelts$DriversKilled)

quantile(seatbelts$DriversKilled, c(.49,.50,.51))



# Dispersion means

# Standard Deviation

mean(seatbelts$DriversKilled)

seatbelts$meandistance <- seatbelts$DriversKilled-mean(seatbelts$DriversKilled)

seatbelts$meandistance_sq <- (seatbelts$DriversKilled-mean(seatbelts$DriversKilled))^2

sqrt(sum(seatbelts$meandistance_sq )/N-1)

sd(seatbelts$DriversKilled)





# Mean comparison

densp1v <- density(pres17$p171vppinera)

densp2v <- density(pres17$p172vppinera)

plot(densp1v, xlim=c(0,1), main="", xlab="")

lines(densp2v, lty=2)

abline(v=mean(pres17$p171vppinera), col="gray")

abline(v=mean(pres17$p172vppinera), col="gray", lty=2)

legend("topleft", c("1st round", "2nd round"), lty=c(1,2))