SEC filings

#(Borrowed from Andy Leone and Ian Gow)

# download index file

idx <- download.file(url=paste("https://www.sec.gov/Archives/edgar/full-index/",

2019,"/QTR", 1, "/company.zip",sep=""),destfile=tempfile())

# unzip and read the index file

idxfil <- readLines(con=(zz<- unz(description=tf,filename="company.idx")))

close(zz)

# get all of the rows that meet the desired criteria

lst<-str_detect(idxfil ,regex(" 10-K"))

fls <- data.frame(idxfil [lst==TRUE])

# Clean up index file for use in getting individual files

fls$CIK <- str_extract(fls[,1],regex("(?<= )\\d+(?=( ){1,20}\\d{4}-\\d{2}-\\d{2})"))

fls$fildt <- str_extract(fls[,1],regex("(?<= )\\d{4}-\\d{2}-\\d{2}(?= )"))

fls$url <- paste("https://www.sec.gov/Archives/",str_extract(fls[,1],regex("edgar/data/.*?txt")),sep="")

rm(idxfil ,idx,lst,tf,zz)

# get an individual file

i<-1

tmp <- download.file(url=fls$url[[i]],destfile=tmphld)

Google Sites

Report abuse