COVID-19 Data of Germany

Data

REAL Data Set for this analysis:

 Cumulative COVID-19 cases in Germany.: One can download the data from the following provided link. Link: COVID-19 Data

Description of the Data

We consider the data of cumulative COVID-19 cases in Germany. The data is collected from (https://ourworldindata.org/coronavirus). The data contains cumulative affected cases of COVID-19 from 31st December 2019 to 8th July 2020. Till 27th January 2020, there were no cases reported in Germany. Before fitting the models, we took 5-day moving averages of the data. It has been observed that till day 62 (29th Feb 2020), no significant changes in the data were observed. So, we consider the data from 1st March 2020 to 8th July 2020, which is 129 days in total. The measurement schedules are rescaled as t = 0 , 1 , 2 , . . . , 128.

Results

At first, we plot the size profile of the data and it clearly indicates that the logistic growth model seems to be an appropriate choice to start our analysis. But from the size profile, it is hard to conclude whether or not any variation is present in r. So, we plotted the ISRP profile of r. We obtained the empirical estimate of ISRP of r for logistic growth. To check whether there is any significant variation in r (with time), we fit different continuous functions given in Table 2 (Karim et al., 2022), and select the best model based on AIC, and RMSE values. As there is no sign of the presence of periodic variation in the data, we have not considered any kind of periodic variation in r. Based on the summary of AIC and RMSE values, we observe that r is subjected to hyperbolic variation. Hence, we infer that the actual data generation process is not logistic, but rather logistic growth with hyperbolically varying r. To support our claim, we fit several derived growth equations from the logistic model taken from Table 2 (Karim et al., 2022) to the cumulative number of cases. We observe that the logistic model with a hyperbolically varying growth coefficient is the best choice among all the models (both RMSE and AIC values support the conclusion). The analysis is carried out using the non-linear least-squares method using the nls2 function available in R.

R Code

# select the working directory where you stored the data 

if (getwd() != "D:/Covid_19_data") {

  setwd("D:/Covid_19_data")

}

Covid_data=read.csv(file="Covid_Germany_data.csv")

# Required packages

if(!require(pracma)){

  install.packages("pracma")

  library(pracma)

}


#taking 5 days moving average

data_1=movavg(Covid_data$total_cases,5,type="s")

plot(Covid_data$total_cases,col="grey",type="l",lwd=2,xlab="day",

     ylab="cumulative cases",main="Cumulative cases in Germany")

lines(data_1,col="red",lwd=2)

legend("topleft",legend=c("total cases","five days moving average"),

       lwd=c(2,2),col=c("black","red"))


#We can see that till day 63 there have been minor changes in the total number of #infected people so here we can neglect them. We took data from the 63rd day till last #from the moving average data

data=data_1[63:length(data_1)]

n=length(data)

t=0:(n-1)

h=numeric(length= n-1)

for(i in 1:length(h)){

  h[i]=t[i+1]-t[i]

}


#plot for the final data for Germany by taking five days moving average

plot(t,data,xlab = "Time (Day)", ylab = " Cumulative cases", col="red",lwd=1,cex=1.5)


#function to calculate Root Mean Square Error

rmse=function(predicted,observed){

  sqrt(sum((predicted-observed)^2)/length(predicted))

}


#fitting Logistic model

fun_logistic=function(x0,K,r,x){

  K/(1+(K/x0 - 1)*exp(-r*x))

}

# Required Packages

if(!require(nls2)){

  install.packages("nls2")

  library(nls2)

}


st1= expand.grid(x0=seq(0,100,len=10),K=seq(130000,200000,len=10),r=seq(0,3,len=10))

mod1 <- nls2(data~fun_logistic(x0,K,r,t), start = st1, algorithm = "brute-force")


# use nls object mod1 just calculated as starting value for nls optimization

logistic_fit=nls2(data~fun_logistic(x0,K,r,t), start = mod1)

summary(logistic_fit)


#plotting ISRP of parameter r of the logistic model fitted to data

r_hat_vals=numeric(length = n-2)


fun_r=function(x,y,z){

  (1/x-1/y)/(1/y-1/z)

}

for(i in 1:(n-2)){

  r_hat_vals[i]=log(fun_r(data[i],data[i+1],data[i+2]))/h[i]

}


par(mfrow=c(1,1), oma = c(1, 1, 2, 2), mar=c(4, 5, 2, 1))

plot(t[1:(length(t)-2)], r_hat_vals, col = "red", type = "p",

     ylab = expression(bold(widehat(r[j](Delta~t)))), xlab=expression(bold(Time~(Day))))


#fitting different variations of r on ISRP of r

#Linearly growing r

fun_1=function(r0,c,t){

  r0*(1+c*t)

}

param1=expand.grid(r0=seq(0,1,len=10),c=seq(0,2,len=10))

fit_1=nls2(r_hat_vals~fun_1(r0,c,t[1:(length(t)-2)]),start = param1, algorithm = "brute-force")

summary(fit_1)


#linearly decaying r

fun_2=function(r0,c,t){

  r0*(1-c*t)

}

param2=expand.grid(r0=seq(0,1,len=10),c=seq(0,2,len=10))

fit_2=nls2(r_hat_vals~fun_2(r0,c,t[1:(length(t)-2)]),start = param2, algorithm = "brute-force")

summary(fit_2)


#Extended logistic

fun_3=function(r0,c,t){

  r0*t^(c-1)

}

param3=expand.grid(r0=seq(0,1,len=10),c=seq(1,2,len=10))

fit_3=nls2(r_hat_vals~fun_3(r0,c,t[1:(length(t)-2)]),start = param3, algorithm = "brute-force")

summary(fit_3)


#Exponentially decaying r

fun_4=function(r0,c,t){

  r0*exp(-c*t)

}

param4=expand.grid(r0=seq(0,1,len=10),c=seq(-1,1,len=10))

fit_4=nls2(r_hat_vals~fun_4(r0,c,t[1:(length(t)-2)]),start = param4, algorithm = "brute-force")

summary(fit_4)


#Exponentially growing r

fun_5=function(r0,c,t){

  r0*exp(c*t)

}

param5=expand.grid(r0=seq(0,1,len=10),c=seq(-1,1,len=10))

fit_5=nls2(r_hat_vals~fun_5(r0,c,t[1:(length(t)-2)]),start = param5, algorithm = "brute-force")

summary(fit_5)


#Hyperbolically varying r

fun_6=function(r0,c,t){

  r0/(1+c*t)

}

param6=expand.grid(r0=seq(0,1,len=10),c=seq(0,2,len=10))

fit_6=nls2(r_hat_vals~fun_6(r0,c,t[1:(length(t)-2)]),start = param6, algorithm = "brute-force")

summary(fit_6)


#########################

#fitting respective variations of the logistic model on data

#linearly growing r

fun_logistic_1=function(x0,K,r0,c,t){

  K/(1+(K/x0 - 1)*exp(-r0*t*(1+c*t/2)))

}

st1= expand.grid(x0=seq(10,100,len=10),K=seq(100000,200000,len=20),

                 r0=seq(0,3,len=10),c=seq(0,3,len=10))

final1 <- nls2(data~fun_logistic_1(x0,K,r0,c,t), start = st1, algorithm = "brute-force")

summary(final1)


#linearly decaying r

fun_logistic_2=function(x0,K,r0,c,t){

  K/(1+(K/x0 - 1)*exp(-r0*t*(1-c*t/2)))

}

st2= expand.grid(x0=seq(10,100,len=10),K=seq(100000,200000,len=10),

                 r0=seq(0,3,len=10),c=seq(0,3,len=10))

final2 <- nls2(data~fun_logistic_2(x0,K,r0,c,t), start = st2, algorithm = "brute-force")

summary(final2)


#Extended logistic

fun_logistic_3=function(x0,K,r0,c,t){

  K/(1+(K/x0 - 1)*exp(-(r0/c)*t^c))

}

st3= expand.grid(x0=seq(10,100,len=10),K=seq(100000,200000,len=20),

                 r0=seq(0,3,len=10),c=seq(0.01,0.5,len=20))

final3 <- nls2(data~fun_logistic_3(x0,K,r0,c,t), start = st3, algorithm = "brute-force")

summary(final3)


#Exponentially decaying r

fun_logistic_4=function(x0,K,r0,c,t){

  K/(1+(K/x0 - 1)*exp(-(r0/c)*(1-exp(-c*t))))

}

st4= expand.grid(x0=seq(10,100,len=10),K=seq(100000,200000,len=10),

                 r0=seq(0,3,len=10),c=seq(-2,2,len=10))

final4 <- nls2(data~fun_logistic_4(x0,K,r0,c,t), start = st4, algorithm = "brute-force")


#exponentially growing r

fun_logistic_5=function(x0,K,r0,c,t){

  K/(1+(K/x0 - 1)*exp(-(r0/c)*(exp(c*t)-1)))

}

st6= expand.grid(x0=seq(10,100,len=10),K=seq(100000,200000,len=20),

                 r0=seq(0,3,len=10),c=seq(-3,3,len=10))

final5 <- nls2(data~fun_logistic_5(x0,K,r0,c,t), start = st6, algorithm = "brute-force")

summary(final5)


#Hyperbollically varying r

fun_logistic_6=function(x0,K,r0,c,t){

  K/(1+(K/x0 - 1)*(1+c*t)^(-r0/c))

}

st6= expand.grid(x0=seq(10,100,len=10),K=seq(100000,200000,len=20),

                 r0=seq(0,2,len=10),c=seq(0,2,len=10))

final6 <- nls2(data~fun_logistic_6(x0,K,r0,c,t), start = st6, algorithm = "brute-force")

summary(final6)


############################

#final fittings on real data

plot(t,data,xlab = "Time (Day)", ylab = " Cumulative cases", col="red",lwd=1,cex=1.5)

x=t

curve(fun_logistic(coef(logistic_fit)[1],coef(logistic_fit)[2],coef(logistic_fit)[3],x)

      ,col="magenta",lwd=3,add=TRUE)

curve(fun_logistic_1(coef(final1)[1],coef(final1)[2],coef(final1)[3],coef(final1)[4],x)

      ,col="green",lwd=3,add=TRUE)

curve(fun_logistic_2(coef(final2)[1],coef(final2)[2],coef(final2)[3],coef(final2)[4],x)

      ,col="yellow",lwd=3,add=TRUE)

curve(fun_logistic_3(coef(final3)[1],coef(final3)[2],coef(final3)[3],coef(final3)[4],x)

      ,col="chocolate",lwd=3,add=TRUE)

curve(fun_logistic_4(coef(final4)[1],coef(final4)[2],coef(final4)[3],coef(final4)[4],x)

      ,col="blue",lwd=3,add=TRUE)

curve(fun_logistic_5(coef(final5)[1],coef(final5)[2],coef(final5)[3],coef(final5)[4],x)

      ,col="cyan",lwd=3,add=TRUE)

curve(fun_logistic_6(coef(final6)[1],coef(final6)[2],coef(final6)[3],coef(final6)[4],x)

      ,col="black",lwd=3,add=TRUE)

legend("bottomright", legend = c("Logistic", "Linearly growing r",

        "Linearly decaying r","Extended logistic","Exponentially decaying r",

        "Exponentially growing r","Hyperbolically varying r")

       ,col=c("magenta","green","yellow","chocolate","blue","cyan","black"),lwd=3)


#final fittings on ISRP of r

plot(t[1:(length(t)-2)], r_hat_vals, col = "red",lwd=2, type = "p",

     xlim=c(-1,(length(t)-2)),ylab = expression(bold(widehat(r[j](Delta~t)))), 

     xlab = "Time (Day)")


x=t[1:(length(t)-2)]

curve(fun_1(coef(fit_1)[1],coef(fit_1)[2],x),col="green",lwd=3,add=TRUE)

curve(fun_3(coef(fit_3)[1],coef(fit_3)[2],x),col="chocolate",lwd=3,add=TRUE)

curve(fun_4(coef(fit_4)[1],coef(fit_4)[2],x),col="blue",lwd=3,add=TRUE)

curve(fun_6(coef(fit_6)[1],coef(fit_6)[2],x),col="black",lwd=3,add=TRUE)

legend("topright", legend = c( "Linearly varying r", 

      "Extended logistic","Exponentially varying r",

      "Hyperbolically varying r") ,col=c("green","yellow","cyan","black"), 

      lwd=2)


####################################

#AIC and RMSE values for fitted model onto data

final_result = matrix(data = NA, nrow = 7, ncol = 2)

colnames(final_result) = c("RMSE", "AIC")

rownames(final_result) = c("logistic", "Linearly growing r", "linearly decaying r", 

                            "Extended logistic","Exponentially decaying r",

                            "Exponentially growing r","Hyperbolically varying r")

final_result[,1] = c(rmse(data,fun_logistic(coef(logistic_fit)[1],coef(logistic_fit)[2],

                                            coef(logistic_fit)[3],t)),

                     rmse(data,fun_logistic_1(coef(final1)[1],coef(final1)[2],

                                              coef(final1)[3],coef(final1)[4],t)),

                     rmse(data,fun_logistic_2(coef(final2)[1],coef(final2)[2]

                                              ,coef(final2)[3],coef(final2)[4],t)),

                     rmse(data,fun_logistic_3(coef(final3)[1],coef(final3)[2],

                                              coef(final3)[3],coef(final3)[4],t)),

                     rmse(data,fun_logistic_4(coef(final4)[1],coef(final4)[2],

                                              coef(final4)[3],coef(final4)[4],t)),

                     rmse(data,fun_logistic_5(coef(final5)[1],coef(final5)[2],

                                              coef(final5)[3],coef(final5)[4],t)),

                     rmse(data,fun_logistic_6(coef(final6)[1],coef(final6)[2],

                                              coef(final6)[3],coef(final6)[4],t)))


final_result[,2] = c(AIC(logistic_fit),AIC(final1),AIC(final2),AIC(final3),

                     AIC(final4),AIC(final5),AIC(final6))

print("AIC and RMSE values for fitted model onto data")

print(final_result)


ISRP_fit_result= matrix(data = NA, nrow = 4, ncol = 2)

colnames(ISRP_fit_result) = c("RMSE", "AIC")

rownames(ISRP_fit_result) = c( "Linearly varying r", 

                               "Extended logistic","Exponentially varying r",

                               "Hyperbolically varying r")


ISRP_fit_result[,1] = c(rmse(r_hat_vals,fun_1(coef(fit_1)[1],coef(fit_1)[2],

                                              t[1:(length(t)-2)])),

                        rmse(r_hat_vals,fun_3(coef(fit_3)[1],coef(fit_3)[2],

                                              t[1:(length(t)-2)])),

                        rmse(r_hat_vals,fun_4(coef(fit_4)[1],coef(fit_4)[2],

                                              t[1:(length(t)-2)])),

                        rmse(r_hat_vals,fun_6(coef(fit_6)[1],coef(fit_6)[2],

                                              t[1:(length(t)-2)])))


ISRP_fit_result[,2] = c(AIC(fit_1),AIC(fit_3),AIC(fit_4),AIC(fit_6))

print("AIC and RMSE values for fitted model onto Interval specific estimates of r are")

print(ISRP_fit_result)