Derivation & Distribution of ISRP Estimates

Derivation of ISRP following Karim and Bhowmick (2023+) using R

# Initial parameters setups

K = 100  # Carring capacity    

x0 = 10  # Initial population size

r = 0.8  # Intrinsic growth rate

t = seq(from = 1, to = 15, by = 1) # Time points

q = length(t)  # length of time points


# Required Packages

library(mvtnorm)


h = numeric(length = length(t)-1)

for(i in 1:(length(t)-1)){

  h[i] = t[i+1] - t[i]

}

# Von Bertalanffy mean function

mu = K*((1 + ((x0/K)^(1/3) - 1)*exp(-r*(t/3)))^3)


#Visualization of Size profile over time

par(mfrow = c(1,1))

plot(t, mu, col = "red", lwd=2, type = "b", pch = 19, 

     ylab = "size, X(t)", xlab = "time (t)")


# Formation of the variance Co-variance matrix

sigma2 = 0.5   # variance of X_t

rho = 0.5   # corr(X_t, X_{t+1})

cov_mat = matrix(data = NA, nrow = length(t), ncol = length(t))

for (i in 1:length(t)) {

  for (j in 1:length(t)) {

    cov_mat[i,j] = sigma2*rho^(abs(i-j))

  }

}


# The ISRP estimators of the parameter derived by localized MLE method

n = 100    # number of independent trajectories

data = mvtnorm::rmvnorm(n, mean = mu, sigma = cov_mat)

matplot(t, t(data), type = "l")


# Likelihood function for computing overall estimates that will give a 

# single estimate of the parameters using the data at # all time points 1 # to q.

fun_likelihood = function(param){

  r = param[1]

  K = param[2]

  sigma2  = param[3]

  rho = param[4]

  mu = K*((1 + ((x0/K)^(1/3) - 1)*exp(-r*(t/3)))^3)

  

  cov_mat = matrix(data = NA, nrow = length(t), ncol = length(t))

  for (i in 1:q) {

    for (j in 1:q) {

      cov_mat[i,j] = sigma2*rho^(abs(i-j))

    }

  }

  exponent = 0

  for(i in 1:n){

    exponent = exponent + t(data[i,]-mu)%*%(solve(cov_mat))%*%(data[i,]-mu)

  }

  log_lik = - (n*q/2)*log(2*pi) - (n/2)*log(det(cov_mat)) - (1/2)*exponent

  return(-log_lik)

}

param_0 = c(0.8, 100, 0.5, 0.5)

out = optim(param_0, fun_likelihood, method = "L-BFGS-B", 

            hessian = TRUE, lower = c(0.001, 50, 0.0001, -0.001),

            upper = c(3, 150, 10, 0.999))

out$par

solve(out$hessian)

# The Main programme starts here where we estimates the ISRP by maximizing localized    # likelihood. The following program will estimate the ISRP of r, K, sigma2, and rho by  # maximizing the localized likelihood function that will use only the data for three    #consecutive time points. Therefore, for q = 15 time points, we will obtain 13 estimates # of r and K. Note that we did not provide the ISRP estimate of X_0. We estimated X_0   # by taking the average of all the observations at time point t=1.


# No of time points taken at a time for the estimation of ISRP of parameters

P = 3

est_local = matrix(data = NA, nrow = q-P+1, ncol = 4) # to store the parameter estimates

cov_local=list() # to store the local variance-covariance matrix.

fun_local_likelihood = function(param){

  r = param[1]

  K = param[2]

  sigma2  = param[3]

  rho = param[4]

  mu = K*((1 + ((x0/K)^(1/3) - 1)*exp(-r*(t/3)))^3)

  cov_mat = matrix(data = NA, nrow = length(t), ncol = length(t))

  for (i in 1:q) {

    for (j in 1:q) {

      cov_mat[i,j] = sigma2*rho^(abs(i-j))

    }

  }

  

  # localized estimates

  mu_local = mu[ind:(ind + P-1)]

  data_local = data[,ind:(ind + P - 1)]

  cov_mat_local = cov_mat[ind:(ind+P-1), ind:(ind+P-1)]

  exponent = 0

  for(i in 1:n){

    exponent = exponent + t(data_local[i,] - mu_local)%*%(solve(cov_mat_local))%*%(data_local[i,] - mu_local)

  }

  

  log_lik = - (n*length(ind:(ind+P-1))/2)*log(2*pi) - (n/2)*log(det(cov_mat_local)) - (1/2)*exponent

  return(-log_lik)

}


# The following is for verification purposes for whether we are able to get the ISRP 

# estimates by maximizing the localized likelihood function.

for(j in 1:(q-P+1)){

  ind = j

  param_0 = c(0.8, 100, 0.5, 0.5)

  out = optim(param_0, fun_local_likelihood, method = "L-BFGS-B", 

              hessian = TRUE, lower = c(0.001, 50, 0.0001, -0.001), 

              upper = c(3, 150, 10, 0.999))

  est_local[j, ] =  out$par

  var_cov_local=solve(out$hessian)

  cov_local[[j]]=var_cov_local

}

colnames(est_local) = c("r", "K", "sigma2", "rho")


# Cov_local[[1]], Cov_local[[2]],..., Cov_local[[q-P+1]] will give the local 

# estimates at 1,2,..., q-P+1 time points respectively.

View(cov_local[[q-P+1]])


par(mfrow = c(2,2))

plot(1:(q-P+1), est_local[,1], type = "b", col = "red", lwd = 2,

     main = expression(widehat(r(Delta~t))[MLE]), 

     ylab = expression(hat(r)))

plot(1:(q-P+1), est_local[,2], type = "b", col = "red", lwd = 2,

     main = expression(widehat(K(Delta~t))[MLE]), 

     ylab = expression(hat(K)))

plot(1:(q-P+1), est_local[,3], type = "b", col = "red", lwd = 2,

     main = expression(widehat(sigma^2)), 

     ylab = expression(hat(sigma^2)))

plot(1:(q-P+1), est_local[,4], type = "b", col = "red", lwd = 2,

     main = expression(widehat(rho)), 

     ylab = expression(hat(rho)))


# In this section we validate the asymptotic distribution of the estimates of the 

# parameter r and K and the test statistic T at(T_1, T_2,...,T_{q-4}) when the null 

# hypothesis is true. The following experiment will be carried out for n trajectories. 

# Now we replicate this process 500 times to get the distribution of the test 

# statistics under the null hypothesis.


setwd("D:/Projrct R ISRP") # Set the directory to load the data

n = 100

rep = 500

est_local_list = list()

x0_mean_vals = numeric(length = rep)

for(l in 1:rep){

  set.seed(l)

  print(l)

  data = mvtnorm::rmvnorm(n, mean = mu, sigma = cov_mat)

  x0_mean_vals[l] = mean(data[,1])

  est_local = matrix(data = NA, nrow = q-P+1, ncol = 4)

  

  for(j in 1:(q-P+1)){

    ind = j

    param_0 = c(0.8, 100, 0.5, 0.5)

    out = optim(param_0, fun_local_likelihood, method = "L-BFGS-B", 

                hessian = TRUE, lower = c(0.001, 50, 0.0001, -0.001), 

                upper = c(3, 150, 10, 0.999))

    est_local[j, ] =  out$par

  }

  colnames(est_local) = c("r", "K", "sigma2", "rho")

  est_local_list[[l]] = est_local

}


# Plot the estimates based on replication

# Distribution of ISRP estimates of r using localized maximum likelihood

mat_est_r_MLE = matrix(data = NA, nrow = q-P+1, ncol = rep)

for (j in 1:rep) {

  mat_est_r_MLE[,j] = est_local_list[[j]][,1]

}

write.table(mat_est_r_MLE, file = "r_MLE_ISRP.txt", row.names = FALSE,

            col.names = FALSE)

matplot(1:(q-P+1), mat_est_r_MLE, type = "l", 

        main = expression(widehat(r(Delta~t))[MLE]), 

        ylab = expression(hat(r)))

legend("topright", legend = paste("n = ", n), bty = "n", cex = 1.5)

boxplot(mat_est_r_MLE, col = "grey",  xlab=expression(bold(Time(t))), lwd=2, 

        ylab = expression(bold(widehat(r[j](Delta~t)))))

axis(1,font = 2); axis(2,font = 2)


# Distribution of ISRP estimates of K using localized maximum likelihood

mat_est_K_MLE = matrix(data = NA, nrow = q-P+1, ncol = rep)

for (j in 1:rep) {

  mat_est_K_MLE[,j] = est_local_list[[j]][,2]

}

write.table(mat_est_K_MLE, file = "K_MLE_ISRP.txt", row.names = FALSE,

            col.names = FALSE)

matplot(1:(q-P+1), mat_est_K_MLE, type = "l", 

        main = expression(widehat(K(Delta~t))[MLE]), 

        ylab = expression(hat(K)))

legend("topright", legend = paste("n = ", n), bty = "n", cex = 1.5)

boxplot(mat_est_K_MLE, col = "grey",  xlab=expression(bold(Time(t))), lwd=2, 

        ylab = expression(bold(widehat(K[j](Delta~t)))))

axis(1,font = 2); axis(2,font = 2)


# Estimation of modified RGR and its distribution

modified_RGR = matrix(data = NA, nrow = q-P+1, ncol = rep)

for (i in 1:(q-P+1)) {

  numerator = mat_est_r_MLE[i,]*exp(-mat_est_r_MLE[i,]*(t[i]/3))*

    ((mat_est_K_MLE[i,])^(1/3) - (x0_mean_vals)^(1/3))

  denominator = (mat_est_K_MLE[i,])^(1/3) + exp(-mat_est_r_MLE[i,]*(t[i]/3))*

    ((x0_mean_vals)^(1/3) - (mat_est_K_MLE[i,])^(1/3))

  modified_RGR[i,] = numerator/denominator

}

write.table(modified_RGR, file = "modified_RGR.txt", row.names = FALSE,

            col.names = FALSE)


# Histogram of the modified RGR

par(mfrow = c(2,3))

for(i in 1:(q-P+1)){

  hist(modified_RGR[i,], probability = TRUE, main = paste("t = ", i),

       xlab = expression(hat(R[t])))

}


# Computation of the Test Statistics and its distribution

V = matrix(data = NA, nrow = q-P+1, ncol = rep)

for (i in 1:(q-P+1)) {

  V[i, ] = log(1/modified_RGR[i,] + 1/ mat_est_r_MLE[i,])

}

print(head(V))

sum(is.na(V))


test_stat = matrix(data = NA, nrow = q-P-1, ncol = rep)

for (i in 1:(q-P-1)) {

  test_stat[i,] = V[i+2,] - 2*V[i+1,] + V[i,]

  print(shapiro.test(test_stat[i,]))

}

write.table(test_stat, file = "test_stat.txt", row.names = FALSE,

            col.names = FALSE)


# Histogram of the test statistics under the null distribution (required)

par(mfrow = c(2,3))  

for(i in 1:(q-P-1)){

  

  hist( test_stat[i,]/sd(test_stat[i,]), probability = TRUE, main = '',

        xlab = expression(widehat(T[t])), breaks = 10, col="lightgreen")

  curve(dnorm(x), add = TRUE, col = "red", lwd = 2, lty = 2)

}  


# Asymptotic normality check of the test statistic by Q-Q plot and P-value 

par(mfrow = c(3,3))  

for(i in 1:(q-P-1)){

  

  qqnorm(test_stat[i,]/sd(test_stat[i,]), col = "lightgreen", lwd = 2, lty = 1, 

         main = '')

  qqline(test_stat[i,]/sd(test_stat[i,]), col = "red", lwd = 2, lty=2)

  print (shapiro.test(test_stat[i,]/sd(test_stat[i,])))

}