#(Borrowed from Andy Leone and Ian Gow)
# download index file
idx <- download.file(url=paste("https://www.sec.gov/Archives/edgar/full-index/",
2019,"/QTR", 1, "/company.zip",sep=""),destfile=tempfile())
# unzip and read the index file
idxfil <- readLines(con=(zz<- unz(description=tf,filename="company.idx")))
close(zz)
# get all of the rows that meet the desired criteria
lst<-str_detect(idxfil ,regex(" 10-K"))
fls <- data.frame(idxfil [lst==TRUE])
# Clean up index file for use in getting individual files
fls$CIK <- str_extract(fls[,1],regex("(?<= )\\d+(?=( ){1,20}\\d{4}-\\d{2}-\\d{2})"))
fls$fildt <- str_extract(fls[,1],regex("(?<= )\\d{4}-\\d{2}-\\d{2}(?= )"))
fls$url <- paste("https://www.sec.gov/Archives/",str_extract(fls[,1],regex("edgar/data/.*?txt")),sep="")
rm(idxfil ,idx,lst,tf,zz)
# get an individual file
i<-1
tmp <- download.file(url=fls$url[[i]],destfile=tmphld)