Kaplan-Meier Estimator in R
Post date: Sep 10, 2012 3:58:57 AM
## By Haoying Wang@2012
rm(list = ls(all = TRUE)) # clean workspace
##Define My Kaplan-Meier Estimator Function
kmeier = function(input1,input2)
{
## check the completeness of input data
n = length(input1)
n0 = length(input2)
if (n!=n0) { return("Error: The length of failure time indicators vector DOES NOT match with length of observed times vector!")
break }
n_n = sum(input1<0) + sum(input2<0)
if (n_n>0) { return("Error: negative value is detected in data!")
break }
n_1 = sum(input2==0) + sum(input2==1)
if (n_1!=n) { return("Error: The 2nd input/argument of the function should be failure time indicator (0 or 1) vector! Consider change!")
break }
## order the data, in increasing order
temp_data = data.frame(input1,input2)
attach(temp_data)
data <- temp_data[order(input1),]
detach(temp_data)
## construct and compute the estimator
sub_data <- data[which(data$input2=='1'), ] # select data with failure time indicator as 1
data <- as.matrix(data)
sub_data <- as.matrix(sub_data)
f_time = unique(sub_data[,1])
m=length(f_time) # get the number of distinct failure time
atrisk = rep(NA,m)
die = rep(NA,m)
s_hat = rep(NA,m+1) # initialize the K-M estimates
s_hat[1] = 1
for (i in 1:m)
{
atrisk[i] = sum(data[,1] >= f_time[i])
die[i] = sum(data[,1] == f_time[i] & data[,2]==1)
s_hat[i+1] = s_hat[i] * (atrisk[i] - die[i])/atrisk[i]
}
## return the estimates along with observed event/failure times
s_est = s_hat[2:(m+1)]
s = data.frame(f_time,s_est)
return (s)
}
## end of the code