x <- read.csv("NADAC__National_Average_Drug_Acquisition_Cost_.csv", as.is = TRUE)
x1 <- x[is.na(x$Corresponding_Generic_Drug_NADAC_Per_Unit)==0,]
x2 <- x1[!duplicated(x1$NDC.Description),]
y <- read.csv("Drug_Products_-_2018_4Q.csv", as.is = TRUE)
z <- read.csv("products.txt",sep="~", as.is = TRUE)
y$Appl_No <- as.numeric(y$FDA.Application.Number)
w <- merge(z, y, by=c("Appl_No"))
w1 <- w[w$Appl_Type=="A",]
comp <- matrix(NA,length(unique(w1$Ingredient)),2)
for (i in 1:length(unique(w1$Ingredient))) {
comp[i,1] <- sort(unique(w1$Ingredient))[i]
comp[i,2] <- length(unique(w1[w1$Ingredient==sort(unique(w1$Ingredient))[i],]$Applicant))
# print(i)
}
colnames(comp) <- c("Ingredient","Comp")
w2 <- merge(w1,comp,by="Ingredient")
w3 <- w2[,which(colnames(w2) %in% c("NDC","Trade_Name","Ingredient","Comp", "DF.Route"))]
w4 <- w3[!duplicated(w3),]
w5 <- merge(x2,w4,by="NDC")
w5$Price <- w5$Corresponding_Generic_Drug_NADAC_Per_Unit/w5$NADAC_Per_Unit
w5$Comp <- as.numeric(as.character(w5$Comp))
Scrips <- Revenue <- matrix(NA,dim(w5)[1],1)
for (i in 1:dim(w5)[1]) {
Revenue[i, 1] <- sum(v[v$NDC==w5[i,]$NDC,]$Total.Amount.Reimbursed, na.rm = TRUE)
Scrips[i, 1] <- sum(v[v$NDC==w5[i,]$NDC,]$Number.of.Prescriptions, na.rm = TRUE)
#print(i)
}
w5$Revenue <- Revenue
w5$Scrips <- Scrips
write.csv(w5,"drugs.csv")