h1b=read.csv("h1b_kaggle.csv")
h1b$X=NULL
h1b$count=1
library(ggplot2)
library(dplyr)
#Top 20 Petitioners
h1btoppetitioners=aggregate(count~EMPLOYER_NAME,data=h1b,FUN=sum)
h1btoppetitioners=h1btoppetitioners[order(h1btoppetitioners$count,decreasing=TRUE),]
h1btoppetitioners=head(h1btoppetitioners,20)
h1btoppetitioners$percentage=(h1btoppetitioners$count/sum(h1btoppetitioners$count))
ggplot(data = h1btoppetitioners, aes(x = reorder(EMPLOYER_NAME,percentage),
y = percentage, fill = EMPLOYER_NAME)) +
geom_bar(stat = "identity") +
labs(title="Top 20 Petitioners for H1B Visas", x = "Company", y = "Petitions in Percentage of Top 20") +
theme(legend.position = "none") +
coord_flip()
#Top 3 Petitioners vs. San Fran
salaries=read.csv("Salaries.csv")
infosys=h1b[h1b$EMPLOYER_NAME=="INFOSYS LIMITED",]
infosyssf=infosys[infosys$WORKSITE=="SAN FRANCISCO, CALIFORNIA",]
infosyssf1=infosyssf[infosyssf$YEAR==2014,]
Infosyssalary=infosyssf1$PREVAILING_WAGE
mean(Infosyssalary,na.rm = TRUE)
tcs=h1b[h1b$EMPLOYER_NAME=="TATA CONSULTANCY SERVICES LIMITED",]
tcssf=tcs[tcs$WORKSITE=="SAN FRANCISCO, CALIFORNIA",]
tcssf1=tcssf[tcssf$YEAR==2014,]
TCSsalary=tcssf1$PREVAILING_WAGE
mean(TCSsalary,na.rm = TRUE)
wipro=h1b[h1b$EMPLOYER_NAME=="WIPRO LIMITED",]
wiprosf=wipro[wipro$WORKSITE=="SAN FRANCISCO, CALIFORNIA",]
wiprosf1=wiprosf[wiprosf$YEAR==2014,]
Wiprosalary=wiprosf1$PREVAILING_WAGE
mean(Wiprosalary,na.rm = TRUE)
salaries2=salaries[salaries$Status=="FT",]
SFsalary=salaries2$TotalPay
mean(SFsalary, na.rm=TRUE)
boxplot(Infosyssalary,Wiprosalary,TCSsalary,SFsalary, names = c("Infosys", "Wipro", "TCS", "SF"),
log = "y",
main="Salary Distribution of H1B Companies vs. San Francisco City", xlab= "Groups", ylab="Log of Salaries")
#Top 3 Petitioners vs. Stock
googleTrends=read.csv("multiTimeline.csv",row.names=NULL)
googleTrends=googleTrends[193:260,]
names(googleTrends) = c("week", "score")
googleTrends$score <- as.numeric(as.character(googleTrends$score))
googleTrends$score<-log(googleTrends$score)
googleTrends$pchange=(googleTrends$score - lag(googleTrends$score,1))/lag(googleTrends$score,1)
infosysstock=read.csv("table.csv")
infosysstock=infosysstock[1:68,]
infosysstock=infosysstock[order(infosysstock$Date,decreasing = FALSE),]
infosysstock$pchange=(infosysstock$Adj.Close - lag(infosysstock$Adj.Close,1))/lag(infosysstock$Adj.Close,1)
infosysstock$gtrendpchange=googleTrends$pchange
infosysstock$Date<-seq(as.Date("2016-01-03"),by="1 week",length.out = 68)
ggplot(infosysstock, aes(Date)) +
geom_line(aes(y=pchange, colour="Stock Price")) +
geom_line(aes(y=gtrendpchange, colour="Google Trend")) +
labs (y = "Percent Change", title="Percent Change in Google Trends v. Infosys Stock Prices")
TCSstock=read.csv("TCS.NS.csv")
TCSstock=TCSstock[order(TCSstock$Date,decreasing = FALSE),]
TCSstock$pchange=(TCSstock$Adj.Close - lag(TCSstock$Adj.Close,1))/lag(TCSstock$Adj.Close,1)
TCSstock$gtrendpchange=googleTrends$pchange
TCSstock$Date<-seq(as.Date("2016-01-03"),by="1 week",length.out = 68)
ggplot(TCSstock, aes(Date)) +
geom_line(aes(y=pchange, colour="Stock Price")) +
geom_line(aes(y=gtrendpchange, colour="Google Trend")) +
labs (y = "Percent Change", title="Percent Change in Google Trends v. TCS Stock Prices")
wiprostock=read.csv("WIT.csv")
wiprostock=wiprostock[order(wiprostock$Date,decreasing = FALSE),]
wiprostock$pchange=(wiprostock$Adj.Close - lag(wiprostock$Adj.Close,1))/lag(wiprostock$Adj.Close,1)
wiprostock$gtrendpchange=googleTrends$pchange
wiprostock$Date<-seq(as.Date("2016-01-03"),by="1 week",length.out = 68)
ggplot(wiprostock, aes(Date)) +
geom_line(aes(y=pchange, colour="Stock Price")) +
geom_line(aes(y=gtrendpchange, colour="Google Trend")) +
labs (y = "Percent Change", title="Percent Change in Google Trends v. Wipro Stock Prices")