Kaplan-Meier Estimator in R

Post date: Sep 10, 2012 3:58:57 AM

## By Haoying Wang@2012

rm(list = ls(all = TRUE)) # clean workspace

##Define My Kaplan-Meier Estimator Function

kmeier = function(input1,input2)

{

## check the completeness of input data

n = length(input1)

n0 = length(input2)

if (n!=n0) { return("Error: The length of failure time indicators vector DOES NOT match with length of observed times vector!")

break }

n_n = sum(input1<0) + sum(input2<0)

if (n_n>0) { return("Error: negative value is detected in data!")

break }

n_1 = sum(input2==0) + sum(input2==1)

if (n_1!=n) { return("Error: The 2nd input/argument of the function should be failure time indicator (0 or 1) vector! Consider change!")

break }

## order the data, in increasing order

temp_data = data.frame(input1,input2)

attach(temp_data)

data <- temp_data[order(input1),]

detach(temp_data)

## construct and compute the estimator

sub_data <- data[which(data$input2=='1'), ] # select data with failure time indicator as 1

data <- as.matrix(data)

sub_data <- as.matrix(sub_data)

f_time = unique(sub_data[,1])

m=length(f_time) # get the number of distinct failure time

atrisk = rep(NA,m)

die = rep(NA,m)

s_hat = rep(NA,m+1) # initialize the K-M estimates

s_hat[1] = 1

for (i in 1:m)

{

atrisk[i] = sum(data[,1] >= f_time[i])

die[i] = sum(data[,1] == f_time[i] & data[,2]==1)

s_hat[i+1] = s_hat[i] * (atrisk[i] - die[i])/atrisk[i]

}

## return the estimates along with observed event/failure times

s_est = s_hat[2:(m+1)]

s = data.frame(f_time,s_est)

return (s)

}

## end of the code