Agenda:
All about APIs at SBM today:
An example of a JS library: https://github.com/SBU-BMI/redcap.
Screenshot of postman:
Jonas' experiments: https://0857f9879749e82d493945f8a805968a7c031889-www.googledrive.com/host/0BwwZEXS3GesiTjlHSmlOcEJaeDA/redcap/
...
...
really dirty (although Excell will ~take it): www.googledrive.com/host/0B7um4d7u6gG0U0VTUmJNb0NyUUE/Dsrip/Data/all.txt
after some digestion via Excel into tab-delimited: www.googledrive.com/host/0BwwZEXS3GesiTjlHSmlOcEJaeDA/lala/all.txt
a possible solution: www.googledrive.com/host/0BwwZEXS3GesiTjlHSmlOcEJaeDA/lala/all.html
Challenge
Write an app that will parse the really dirty data into the json array produced for the clearner data.
it should look something like this:
[ { "ICD-9": "38.9", "ICD-9 description": "Unspecified septicemia", "Subgroups": "Infection", "Counts": "151" }, { "ICD-9": "V30.00 ", "ICD-9 description": "Single liveborn, born in hospital, delivered without mention of cesarean section", "Subgroups": "newborn", "Counts": "65" }, { "ICD-9": "599", "ICD-9 description": "Urinary tract infection, site not specified", "Subgroups": "Infection", "Counts": "56" }, { "ICD-9": "428.33", "ICD-9 description": "Acute on chronic diastolic heart failure", "Subgroups": "Heart disease", "Counts": "52" },
...
]
Responses to challenge
Jonas: www.googledrive.com/host/0BwwZEXS3GesiTjlHSmlOcEJaeDA/lala/dirty.html
--- dirty.html ---
<html>
<head>
<script src="https://code.jquery.com/jquery-2.2.3.min.js" ></script>
</head>
<body>
<script src="dirty.js"></script>
</body>
</html>
--- dirty.js ---
console.log('dirty parsing, responding to challange at https://sites.google.com/a/mathbiol.org/i2/workshop/2016-05-18-may')
url='https://www.googledrive.com/host/0B7um4d7u6gG0U0VTUmJNb0NyUUE/Dsrip/Data/all.txt' // dirty file from Maggie
$.get(url,function(x){
console.log('loaded string with length '+x.length)
// separate the rows
var fields=[]
var y=[] // results will go here
x.split(/[\n\r]+/g)
.forEach(function(r,i){ // process one row at a time
if(i==0){ // header
fields=r.split(',')
}else{
// create arrayed row
var ar = r.replace(/ /g,'')
.replace(/(,")|(",)/g,'##')
.slice(0,-2)
.split('##')
.map(function(xi){
if(xi[0]==','){ // if string starts with "," remove it
xi = xi.slice(1)
}
if(xi.slice(-1)==' '){ // also remove trailing blanks
xi = xi.slice(0,-1)
}
return xi
})
yi={}
fields.map(function(f,j){
yi[f]=ar[j]
})
y[i-1]=yi // recall first row, i=0, had the headers
}
})
// your code processing y to, say generate interactive graphics, can go here.
// As a place holder we'll display the first 100 entries
document.body.innerHTML='<h3 style="color:navy">First 100 documents in the '+y.length+' doc long array:</h3><pre style="color:blue">'+JSON.stringify(y.slice(0,100),null,3)+'</pre>'
})
API Summit, NY June 16 (thank you Wade !!!)
Web4Bio Stony Brook June 8-10 (BYO data hackathon)