On this page, we share the code used to develop the app and make the information accessible to everyone.
FIGMA Workspace Link:
# Obtain the closet Monday date
def getMonday(dateNow):
today = dateNow
today = today + datetime.timedelta(days=-today.weekday(), weeks=-1)
return today.strftime("%d-%m-%Y")
# Update Time
def getOffSetTime():
x = datetime.date(2021,9,29)
return int((x - (datetime.date.today())).days/7)
# Convert UTC date in a string date I
def convertUTCtoString(stringDate):
TIndex = stringDate.find("T")
stringDateTrimmed = stringDate[0:TIndex]
return stringDateTrimmed
# Convert UTC date in a string date II
def convertUTCtoDatetime(stringDate):
return datetime.datetime.strptime(stringDate, '%Y-%m-%d')
def CasesPer100K(inputNumber): # Confirmed cases of COVID19 per 100 thousand people
if inputNumber <= 4: # Very low risk
return 1
elif inputNumber <= 9: # Low risk
return 2
elif inputNumber <= 50: # Medium risk
return 3
elif inputNumber <= 100: # High risk
return 4
elif inputNumber <= 199: # Very high risk
return 5
else:
return 6 # Very very high
def ConfirmedDeaths100K(inputNumber): # Confirmed deaths by COVID19 per 100 thousand people
if inputNumber < 0.1: # Very low risk
return 2
elif inputNumber <= 1: # Low risk
return 3
elif inputNumber <= 2: # Medium risk
return 4
elif inputNumber <= 5: # High risk
return 5
else: # Very high risk
return 6 # Very very high
def PositivityRate(inputNumber): # Proportion of positive tests within a number of COVID19 tests.
if inputNumber <= 2.9: # Very low risk
return 1
elif inputNumber <= 4.9: # Low risk
return 2
elif inputNumber <= 7.9: # Medium risk
return 3
elif inputNumber <= 10: # High risk
return 4
elif inputNumber <= 15: # Very high risk
return 5
else:
return 6 # Very very high
def Tests100K(inputNumber): # Number of COVID19 tests conducted per 100 thousand people.
if inputNumber >= 5000: # Very low risk
return 1
elif inputNumber >= 3000: # Low risk
return 2
elif inputNumber >= 2000: # Medium risk
return 3
elif inputNumber >= 1000: # High risk
return 4
elif inputNumber >= 500: # Very high risk
return 5
else:
return 6 # Very very high
def BedsOccupied(inputNumber): # Percentage of beds occupied by region until the last Sunday.
if inputNumber <= 3: # Very low risk
return 1
elif inputNumber <= 7: # Low risk
return 2
elif inputNumber <= 12: # Medium risk
return 3
elif inputNumber <= 15: # High risk
return 4
elif inputNumber <= 20: # Very high risk
return 5
else:
return 6 # Very very high
def BedsUCIOccupied(inputNumber): # Percentage of ICU (Intense Care Unit) beds occupied by region until the last Sunday.
if inputNumber <= 3: # Very low risk
return 1
elif inputNumber <= 7: # low risk
return 2
elif inputNumber <= 12: # Medium risk
return 3
elif inputNumber <= 15: # High risk
return 4
elif inputNumber <= 20: # Very high risk
return 5
else:
return 6 # Very very high
def ConvertToScale(inputNumber, target): # Assign a scale of HHS's model to the epidemiological variables
if target == 'confirmedCases100k':
return CasesPer100K(inputNumber)
elif target == 'appliedTests100k':
return Tests100K(inputNumber)
elif target == 'positivityRate':
return PositivityRate(inputNumber)
elif target == 'bedsOccupiedPercentage':
return BedsOccupied(inputNumber)
elif target == 'bedsUCIOccupiedPercentage':
return BedsUCIOccupied(inputNumber)
constantsWaqiData = {
'SAN_MARTIN_PORRES': '@7580',
'SAN_JUAN_LURIGANCHO': '@@7577',
'HUACHIPA': '@7578',
'US_EMBASSY': '@8780',
'SAN_BORJA': '@379',
'TOKEN': 'd91a4fcc7a848a548eaaeba48ae3a30f08e5d402',
'PM10': 'pm10',
'PM2.5': 'pm25',
'O3': 'o3',
'DATA': 'data',
'IAQI': 'iaqi',
'FORECAST': 'forecast',
'DAILY': 'daily'
}
# Make a environmental model based on past data from Open QA
def getCorrelationEnvironmental(element, region):
if region not in constantsRegions:
return 0
if element == constantsWaqiData['PM10']: # Percentage of PM10 concentration by region
if region == "AMAZONAS":
return 0.5627566916528904
elif region == 'ANCASH':
return 0.8173273010971982
elif region == 'APURIMAC':
return 0.6474805023725039
elif region == 'AREQUIPA':
return 0.00781619824387759
elif region == 'AYACUCHO':
return 0.784757253846917
elif region == "CAJAMARCA":
return 0.74164612953903
elif region == "CALLAO":
return 0.8970919031887599
elif region == "CUSCO":
return 0.6718167999215203
elif region == "HUANCAVELICA":
return 0.8545300238837067
elif region == "HUANUCO":
return 0.9363674742541704
elif region == "ICA":
return 0.8883159356278151
elif region == "JUNIN":
return 0.7652728444671624
elif region == "LA LIBERTAD":
return 0.6901790433435225
elif region == "LAMBAYEQUE":
return 0.8001240125250154
elif region == "LIMA METROPOLITANA":
return 0.8955054530573233
elif region == "LIMA REGION":
return 0.7958334419289006
elif region == "LORETO":
return 0.7671243231065161
elif region == "MADRE DE DIOS":
return 0.043260562569202896
elif region == "MOQUEGUA":
return 0.8344427809997235
elif region == "PASCO":
return 0.7625866761983898
elif region == "PIURA":
return 0.7023301190768831
elif region == "PUNO":
return 0.5806501532836558
elif region == "SAN MARTIN":
return 0.27776696450063176
elif region == "TACNA":
return 0.9045187208137486
elif region == "TUMBES":
return 0.7165344039171333
elif region == "UCAYALI":
return 0.8326786517830622
elif element == constantsWaqiData['PM2.5']: # Percentage of PM2.5 by region
if region == "AMAZONAS":
return 0.40523407228920016
elif region == 'ANCASH':
return 0.7353143421541393
elif region == 'APURIMAC':
return 0.510662452478387
elif region == 'AREQUIPA':
return 0.16232971893474246
elif region == 'AYACUCHO':
return 0.7521570003812963
elif region == "CAJAMARCA":
return 0.7064733203187411
elif region == "CALLAO":
return 0.8339499318238698
elif region == "CUSCO":
return 0.6994352173522465
elif region == "HUANCAVELICA":
return 0.8874905177687646
elif region == "HUANUCO":
return 0.7973598139768272
elif region == "ICA":
return 0.8389716317612844
elif region == "JUNIN":
return 0.7590791983766971
elif region == "LA LIBERTAD":
return 0.6778450625588925
elif region == "LAMBAYEQUE":
return 0.8293440823355036
elif region == "LIMA METROPOLITANA":
return 0.8184922730359367
elif region == "LIMA REGION":
return 0.7705467316337936
elif region == "LORETO":
return 0.6375692707379668
elif region == "MADRE DE DIOS":
return 0.23364576244812807
elif region == "MOQUEGUA":
return 0.6475992758574727
elif region == "PASCO":
return 0.7006550351215404
elif region == "PIURA":
return 0.7091754212637819
elif region == "PUNO":
return 0.6328545758367168
elif region == "SAN MARTIN":
return 0.047983266778415384
elif region == "TACNA":
return 0.8473840577109811
elif region == "TUMBES":
return 0.751373239144744
elif region == "UCAYALI":
return 0.8211500120056971
# Whith the previous historical environment dataset, we can assign 5 levels of quantity
def getFactorV2(element, difference):
if element == 'pm2.5':
if difference <= 21.683:
return 1
elif difference <= 42.367:
return 2
elif difference <= 63.05:
return 3
elif difference <= 83.733:
return 4
elif difference <= 104.417:
return 5
else:
return 6
elif element == 'pm10':
if difference <= 50.313:
return 1
elif difference <= 72.775:
return 2
elif difference <= 95.236:
return 3
elif difference <= 117.698:
return 4
elif difference <= 140.16:
return 5
else:
return 6
# The results from the above function were calculated with the functions below
# Because the Open Covid date is update each Monday, we need a function to make compatible both range time of sampling
def weeksList(DF):
i = 0
totalValues = []
datetimeInit = convertUTCtoDatetime(DF['local'][0])
while i < 34:
datetimeEnd = datetimeInit + datetime.timedelta(days=6)
datetimeTarget = datetimeInit
dateValues = None
while datetimeTarget != datetimeEnd:
datetimeTargetStr = datetimeTarget.strftime('%Y-%m-%d')
currentRow = ''
if datetimeTargetStr in list(DF['local']):
currentRow = DF[DF['local'] == datetimeTargetStr]
else:
currentRow = pd.DataFrame({'local':[datetimeTargetStr], 'value': [np.nan]})
if dateValues is None:
dateValues = currentRow
else:
dateValues = pd.concat([dateValues,currentRow],axis=0)
datetimeTarget = datetimeTarget + datetime.timedelta(days=1)
datetimeInit = datetimeEnd + datetime.timedelta(days=1)
if dateValues is not None:
dateValues = dateValues.reset_index(drop=True)
totalValues.append(dateValues)
i = i+ 1
return totalValues
# Table with correlation and weigths of epidemiological variables depending of city input
def getWeightTable(city):
weights_table_city = ''
index = 0
endDate = 238 + getOffSetTime()
rangeDates = list(range(endDate,0,-7))
totalIndividual = ''
currentDF = ''
for i in rangeDates:
try:
testDf = getProcessedTable(datetime.date.today()-datetime.timedelta(days=i))
testDf = testDf[testDf['regions'] == city]
del testDf['regions']
if i == rangeDates[-1]:
currentDF = testDf
if i == endDate:
totalIndividual = testDf
else:
totalIndividual = pd.concat([totalIndividual,testDf],ignore_index=True)
except:
continue
return totalIndividual
# Weekly avarage of environmental variables from Open QA
def getWeekAvg(df):
totalRows = None
for frame in df:
if totalRows is None:
avgList = []
for rowIndex in range(len(frame)):
if frame['value'][rowIndex] is not None:
avgList.append(frame['value'][rowIndex])
if len(avgList) != 0:
totalRows = pd.DataFrame({'local':[frame['local'][0]],'value': [mean(avgList)]})
else:
totalRows = pd.DataFrame({'local':[frame['local'][0]],'value': [np.nan]})
else:
avgList = []
for rowIndex in range(len(frame)):
if frame['value'][rowIndex] is not None:
avgList.append(frame['value'][rowIndex])
if len(avgList) != 0:
totalRows = pd.concat([totalRows,pd.DataFrame({'local':[frame['local'][0]],'value': [mean(avgList)]})],axis=0)
else:
totalRows = pd.concat([totalRows,pd.DataFrame({'local':[frame['local'][0]],'value': [np.nan]})],axis=0)
totalRows = totalRows.reset_index(drop=True)
return totalRows
# Get static correlation of environmental layer according to past data (since '2021-01-25' until 24/08/2021) from OpenQA
# (at the 1st, 25th, 50th, 75th, and 99th percentiles of the distribution), which we categorised into five levels of willingness to take risks: very low, low, neutral, high, and very high, respectively.
# It is relevant to clarify that just take environmental dataset of Carabayllo because it has an average environmental behavior and no other source has been found
def getStaticCorrelation(city):
DF = pd.read_csv('data_CARABAYLLO.csv')
DFFiltered = DF[['local','parameter','value','unit']]
for rowIndex in range(len(DFFiltered)):
DFFiltered.at[rowIndex,'local'] = (convertUTCtoString((DFFiltered['local'][rowIndex])).strip())
DFFiltered = DFFiltered[DFFiltered['local'] >= '2021-01-25']
# µg/m³
DFPM25 = DFFiltered[DFFiltered['parameter'] == 'pm2.5']
DFPM10 = DFFiltered[DFFiltered['parameter'] == 'pm10']
DFPM25 = DFPM25.reset_index()[['value','unit','local']]
DFPM25 = DFPM25.groupby('local')['value'].mean().reset_index()
DFPM10 = DFPM10.reset_index()[['value','unit','local']]
DFPM10 = DFPM10.groupby('local')['value'].mean().reset_index()
DFPM25Weekly = weeksList(DFPM25)
DFPM10Weekly = weeksList(DFPM10)
DFPM25WeeklyAvg = getWeekAvg(DFPM25Weekly)
DFPM10WeeklyAvg = getWeekAvg(DFPM10Weekly)
CorrelationMatrix = getWeightTable(city)
for row in range(len(CorrelationMatrix)):
datetimeObjectS = datetime.datetime.strptime(CorrelationMatrix['date'][row],'%d-%m-%Y %H:%M:%S')
CorrelationMatrix.at[row,'date'] = datetimeObjectS.strftime('%Y-%m-%d')
CorrelationMatrix = CorrelationMatrix.rename(columns={'date':'local'})
mergedDFPM25 = pd.merge(left=CorrelationMatrix,right=DFPM25WeeklyAvg,left_on='local',right_on='local')
mergedDFPM10 = pd.merge(left=CorrelationMatrix,right=DFPM10WeeklyAvg,left_on='local',right_on='local')
mergedDFPM25 = mergedDFPM25[['confirmedDeaths100k','value']]
mergedDFPM25 = mergedDFPM25.dropna()
DFPM25Corr = mergedDFPM25.corr()
mergedDFPM10 = mergedDFPM10[['confirmedDeaths100k','value']]
mergedDFPM10 = mergedDFPM10.dropna()
DFPM10Corr = mergedDFPM10.corr()
return {'pm10':abs(DFPM10Corr['value']['confirmedDeaths100k']),'pm2.5':abs(DFPM25Corr['value']['confirmedDeaths100k'])}
constantsRegions = [
"AMAZONAS",
"ANCASH",
"APURIMAC",
"AREQUIPA",
"AYACUCHO",
"CAJAMARCA",
"CALLAO",
"CUSCO",
"HUANCAVELICA",
"HUANUCO",
"ICA",
"JUNIN",
"LA LIBERTAD",
"LAMBAYEQUE",
"LIMA METROPOLITANA",
"LIMA REGION",
"LORETO",
"MADRE DE DIOS",
"MOQUEGUA",
"PASCO",
"PIURA",
"PUNO",
"SAN MARTIN",
"TACNA",
"TUMBES",
"UCAYALI"
]
# We consider in the model the vaccine brand (Pfizer, AstraZeneca and Sinopharm) and the number of doses applied.
def getVaccineEffect (dose, brand, risk_2):
if dose == 0:
return risk_2 # If no dose has been applied, risk_2 manteins the highst value
elif dose == 1:
if brand == 'PFIZER':
return risk_2 * (1 - 0.78) # Effectiveness of one dose of Pfizer: 78%
elif brand == 'ASTRAZENECA':
return risk_2 * (1 - 0.6401) # Effectiveness of one dose of Astrezeneca: 64.01%
elif brand == 'SINOPHARM':
return risk_2 * (1 - 0.68397) # Effectiveness of one dose of Sinopharm: 68.397%
else:
return risk_2 * ((1 - 0.68397) + (1 - 0.6401) + (1 - 0.78))/3 #
elif dose == 2:
if brand == 'PFIZER':
return risk_2 * (1 - 0.95) # Effectiveness of two doses of Pfizer: 95%
elif brand == 'ASTRAZENECA':
return risk_2 * (1 - 0.704) # Effectiveness of two doses of Astrezeneca: 70.4%
elif brand == 'SINOPHARM':
return risk_2 * (1 - 0.79) # Effectiveness of two doses of Sinopharm: 79%
else:
return risk_2 * ( (1 - 0.79) + (1 - 0.704) + (1 - 0.95))/3
# If a person has been infected with COVID-19 before, his risk of reinfection is reduce by 84%
def getReinfectionRate (covidBefore, risk):
if covidBefore:
return risk * 0.16
else:
return risk
# Depending of the date our function call to API of OPENCOVID
def getOpenCovidPeruData(date):
mondayDate = getMonday(date)
conn = http.client.HTTPSConnection("open-covid-api-vwgk4ckqbq-uk.a.run.app")
payload = ''
headers = {}
conn.request("GET", f"/api/semaforo?fecha={mondayDate}", payload, headers)
res = conn.getresponse()
data = res.read()
jsonData = json.loads(data.decode('utf-8'))
filteredData = list(map((lambda value: {'date':value['fecha'],'region':value['region'],'appliedTests':value['avgTest'],'testPositivityPercentage':value['positividad'],'population':value['poblacion'],'bedsOccupiedPercentage':value['camasCovid'],'bedsUCIOccupiedPercentage':value['uci'],'confirmedCasesWeekly':(value['poblacion']*value['incid_100'])/100000,'deathsCasesWeekly':(value['poblacion']*value['fall_100'])/100000}), jsonData[0]['regions']))
df = pd.DataFrame(filteredData)
return df
## Get data to OpenCovidPeru and make relative variables
def getProcessedTable(date):
data = getOpenCovidPeruData(date)
regions = data['region']
confirmedDeaths100k = (data['deathsCasesWeekly'] / data['population']) * 10**6
confirmedCases100k = (data['confirmedCasesWeekly'] / data['population']) * 10**6
appliedTests100k = (data['appliedTests'] / data['population']) * 10**6
positivityRate = (data['testPositivityPercentage'])
bedsOccupiedPercentage = (data['bedsOccupiedPercentage'])
bedsUCIOccupiedPercentage = data['bedsUCIOccupiedPercentage']
date = data['date']
finalDF = pd.concat([date,regions,confirmedDeaths100k,confirmedCases100k,appliedTests100k,positivityRate,bedsOccupiedPercentage,bedsUCIOccupiedPercentage],axis=1,join='inner',keys=['date','regions','confirmedDeaths100k', 'confirmedCases100k','appliedTests100k','positivityRate','bedsOccupiedPercentage','bedsUCIOccupiedPercentage'])
finalDF = finalDF.round(2)
return finalDF
## Get real time environmental data and evaluate in the static model correlation
def getWaqiData():
pm10Total = []
pm25Total = []
o3Total = []
pm10AvgTotal = []
pm25AvgTotal =[]
o3AvgTotal = []
for department in ['@7580','@7577','@7578','@8780','@379']:
conn = http.client.HTTPSConnection("api.waqi.info")
payload = ''
headers = {}
conn.request("GET", f"/feed/{department}/?token=d91a4fcc7a848a548eaaeba48ae3a30f08e5d402", payload, headers)
res = conn.getresponse()
data = res.read()
jsonData = json.loads(data.decode('utf-8'))
jsonDataFiltered = jsonData['data']['iaqi']
jsonDataAvgFiltered = jsonData['data']['forecast']['daily']
for unit in ['pm10','pm25','o3']:
if unit in jsonDataFiltered.keys() and unit == 'pm10':
pm10Total.append(jsonDataFiltered[unit]['v'])
if unit in jsonDataFiltered.keys() and unit == 'pm25':
pm25Total.append(jsonDataFiltered[unit]['v'])
if unit in jsonDataFiltered.keys() and unit == 'o3':
o3Total.append(jsonDataFiltered[unit]['v'])
if unit in jsonDataAvgFiltered.keys():
unitData = []
for day in jsonDataAvgFiltered[unit]:
avg = day['avg']
unitData.append(avg)
if unit == 'pm10':
pm10AvgTotal.append(mean(unitData))
if unit == 'pm25':
pm25AvgTotal.append(mean(unitData))
if unit == 'o3':
o3AvgTotal.append(mean(unitData))
pm10 = mean(pm10Total)
pm25 = mean(pm25Total)
o3 = mean(o3Total)
pm10Avg = mean(pm10AvgTotal)
pm25Avg = mean(pm25AvgTotal)
o3Avg = mean(o3AvgTotal)
currentData = {'pm10':pm10,'pm2.5':pm25,'o3':o3}
avgData = {'pm10':pm10Avg, 'pm2.5':pm25Avg,'o3':o3Avg}
return {'currentData':currentData,'avgData':avgData}
# First layer: Epidemiological and environmental layers
def getIndex(city):
endDate = 238 + getOffSetTime()
rangeDates = list(range(endDate,0,-7))
totalIndividual = ''
currentDF = ''
for i in rangeDates:
try:
testDf = getProcessedTable(datetime.date.today()-datetime.timedelta(days=i))
testDf = testDf[testDf['regions'] == city]
del testDf['regions']
if i == rangeDates[-1]:
currentDF = testDf
if i == endDate:
totalIndividual = testDf
else:
totalIndividual = pd.concat([totalIndividual,testDf],ignore_index=True)
except:
continue
correlation_matrix = totalIndividual.corr()
correlation_matrix_filtered = correlation_matrix['confirmedDeaths100k']
correlation_matrix_filtered['pm10'] = getCorrelationEnvironmental('pm10', city)
correlation_matrix_filtered['pm2.5'] = getCorrelationEnvironmental('pm25', city)
correlation_matrix_filtered = correlation_matrix_filtered[[1,2,3,4,5,6,7]]
correlation_matrix_filtered = correlation_matrix_filtered/sum(correlation_matrix_filtered)
correlation_matrix_filtered = pd.DataFrame(correlation_matrix_filtered)
correlation_matrix_filtered.columns = [city]
weights_table_city = correlation_matrix_filtered
weights_table_city = weights_table_city.transpose()
environmentalData = getWaqiData()
totalRisk = 0
for rate in ['confirmedCases100k','appliedTests100k','positivityRate','bedsOccupiedPercentage','bedsUCIOccupiedPercentage','pm10','pm2.5']:
dataRate = 0
riskNoWeight = 0
if rate == 'pm10' or rate == 'pm2.5':
dataRate = float(environmentalData['currentData'][rate])
riskNoWeight = getFactorV2(rate,dataRate)
else:
dataRate = float(currentDF[rate])
riskNoWeight = ConvertToScale(dataRate,rate)
factorWeight = float(weights_table_city[rate])
riskWeighted = factorWeight * riskNoWeight
totalRisk = totalRisk + riskWeighted
return totalRisk
def getTotalRisk(city, dose, brand, covidBefore):
first = getIndex(city)
second = getVaccineEffect (dose, brand, first)
third = getReinfectionRate(covidBefore, second)
return third
# Example
for city in ['MOQUEGUA', 'LIMA METROPOLITANA', 'ICA']:
print(f'{city}: ', getTotalRisk(city, 0, 'NINGUNO',0))