R Sample 1

mydata <- read.table("./myR_folder/pop_subrhalvl1.txt", sep=",", header=TRUE, as.is=TRUE) eddata <- read.table("./myR_folder/hospitaldata.txt", sep=",", header=TRUE, as.is=TRUE) distdata <- read.table("./myR_folder/2005distancedata.csv", sep=",", header=TRUE, as.is=TRUE) subRHA <- unique(mydata[, 1]) ## create list of subRHAs fiscYear <- unique(mydata[, 2]) ## create list of fiscal years subRHAPoplSize <- matrix(nrow = length(fiscYear), ncol = (length(subRHA) - 1)) for (i in 2:length(subRHA)) { for (j in 1:length(fiscYear)){ subRHAPoplSize[j, i-1] <- sum(mydata[which(mydata[, 1] == subRHA[i] & mydata[, 2] == fiscYear[j]), 6]) } } I <- length(subRHA[2:length(subRHA)]) mainAmbCD <- sort(unique(eddata[, 12])) d <- matrix(0, nrow = length(fiscYear), ncol = (length(subRHA) - 1)) u <- matrix(0, nrow = length(fiscYear), ncol = (length(subRHA) - 1)) for (j in 1:length(fiscYear)) { for (i in 2:length(subRHA)) { u[j, i-1] <- length(eddata[which(eddata[, 24] == subRHA[i] & eddata[, 12] == mainAmbCD[1] & eddata[, 1] == fiscYear[j]), 2]) d[j, i-1] <- length(unique(eddata[which(eddata[, 24] == subRHA[i] & eddata[, 12] == mainAmbCD[1] & eddata[, 1] == fiscYear[j]), 2])) } } colnames(d) <- subRHA[2:length(subRHA)] rownames(d) <- fiscYear colnames(u) <- subRHA[2:length(subRHA)] rownames(u) <- fiscYear distMtx <- distdata[4:nrow(distdata), 4:ncol(distdata)] rownames(distMtx) <- subRHA[2:length(subRHA)] distMtx <- as.matrix(distMtx) k <- 6 ## choose the kth fiscal year poplVec <- subRHAPoplSize[k, ] percentChosen <- 15/100 ## Users choose this a priori poplUpperBound <- sum(poplVec)*percentChosen radiusVec <- sort(unique(as.vector(distMtx))) threshold <- 0 for (i in 1:length(radiusVec)){ indic2 <- 1 for (j in 1:I){ if (sum(poplVec[which(distMtx[j, ] <= radiusVec[i])]) < poplUpperBound) {indic2 <- indic2*1} else { indic2 <- indic2*0 break } } if (indic2 == 1) {threshold <- threshold+1} else if (indic2 == 0) { break } } rm(indic2) radiusLim <- radiusVec[threshold] ## 'radiusLim' reports rm(poplUpperBound) zoneList <- c() zonePrimeList <- c() for (i in 1:I) { if (u[k, i] > 0 & sum(u[k, setdiff(1:I, i)] > 0) > 0) { zoneList <- c(zoneList, list(i)) zonePrimeList <- c(zonePrimeList, list(setdiff(1:I, i))) } } if (threshold > 1) { for (J in 2:(threshold + 1)){ for (i in 1:I){ temp.list1 <- c(i) temp.list2 <- c() for (j in 1:I){ if (j != i){ if (distMtx[i, j] 0) > 0 & sum(u[k, temp.list2] > 0) > 0){ zoneList <- c(zoneList, list(sort((temp.list1)))) zonePrimeList <- c(zonePrimeList, list(sort((temp.list2)))) } } } } rm(indic1) rm(L) rm(L1) rm(temp.list1) rm(temp.list2) compndDistrb <- function(trueMu, event){ truncPoissPMF <- log(trueMu) for (i in 1:max(event)){ truncPoissPMF <- c(truncPoissPMF, truncPoissPMF[i - 1] + log(trueMu) - log(i)) } truncPoissPMF <- exp(truncPoissPMF)*(exp(trueMu) - 1)^(-1) truncPoissPMF[event] } ## this version works best for 'event' as a scalar or a vector and avoids factorial computation cpoisspdf <- function(parameter, sumRegionEvent, subpopsize){ if(sumRegionEvent == 0){P <- exp(-subpopsize*parameter[1])} else if(sumRegionEvent > 0){ QVec <- compndDistrb(parameter[2], seq(1:sumRegionEvent)) P <- vector(mode = "numeric", length = (sumRegionEvent + 1)) P[1] <- exp(-subpopsize*parameter[1]) for (i in 2:(sumRegionEvent + 1)){ P[i] <- (parameter[1]*subpopsize/(i - 1))*sum(seq(1:(i - 1))*QVec[1:(i - 1)]*rev(P[1:(i - 1)])) } } P[sumRegionEvent + 1] } loglikelihood <- function(params){ minus.sum <- 0 for (i in 1:I){ minus.sum <- minus.sum-log(cpoisspdf(params, u[k, i], subRHAPoplSize[k, i])) ## for 1st fiscYear } minus.sum } loglikelihood1 <- function(params){ minus.sum <- 0 for (i in 1:length(zoneList[[L1]])){ minus.sum <- minus.sum-log(cpoisspdf(params, u[k, zoneList[[L1]][i]], subRHAPoplSize[k, zoneList[[L1]][i]])) } minus.sum } loglikelihood2 <- function(params){ minus.sum <- 0 for (i in 1:length(zonePrimeList[[L1]])){ minus.sum <- minus.sum - log(cpoisspdf(params, u[k, zonePrimeList[[L1]][i]], subRHAPoplSize[k, zonePrimeList[[L1]][i]])) } minus.sum } k <- 6 yearkUniqID <- unique(eddata[which(eddata[, 1] == fiscYear[k] & eddata[, 12] == mainAmbCD[1]), 2]) numVisitEachID <- vector(mode = 'numeric', length = length(yearkUniqID)) for (i in 1:length(yearkUniqID)) { numVisitEachID[i] <- length(eddata[which(eddata[, 1] == fiscYear[k] & eddata[, 2] == yearkUniqID[i] & eddata[, 12] == mainAmbCD[1]), 2]) } totalTrials <- 3 simLRStat <- vector(mode = 'numeric', length = totalTrials) simMRStat <- vector(mode = 'numeric', length = totalTrials) # #run once ends for (N in 1:totalTrials){ sim_sRHA_Vector <- sample(subRHA[2:length(subRHA)], length(yearkUniqID), replace=TRUE, prob = poplVec/sum(poplVec)) for (i in 2:length(subRHA)) { u[k, i-1] <- sum(numVisitEachID[sim_sRHA_Vector == subRHA[i]]) d[k, i-1] <- length(yearkUniqID[sim_sRHA_Vector == subRHA[i]]) } m5 <- nlminb(c(sum(d[k, ])/sum(subRHAPoplSize[k, ]), sum(u[k, ])/sum(d[k, ])), loglikelihood, lower=c(10^{-10}, 10^{-10})) ratioStat <- vector(mode = "numeric", length = length(zoneList)) meanRatio <- vector(mode = "numeric", length = length(zoneList)) ## ratio of E[events in zone]/E[events outside zone] for (L1 in 1:length(zoneList)) { parameter <- vector(mode="numeric", length = 2) ## pre-allocate a 2-element parameter vector m11 <- nlminb(c(sum(d[k, zoneList[[L1]]])/sum(subRHAPoplSize[k, zoneList[[L1]]]), sum(u[k, zoneList[[L1]]])/sum(d[k, zoneList[[L1]]])), loglikelihood1, lower = c(10^{-10}, 10^{-10})) parameter <- vector(mode="numeric", length=2) ## pre-allocate a 2-element parameter vector m12 <- nlminb(c(sum(d[k, zonePrimeList[[L1]]])/sum(subRHAPoplSize[k, zonePrimeList[[L1]]]), sum(u[k, zonePrimeList[[L1]]])/sum(d[k, zonePrimeList[[L1]]])), loglikelihood2, lower=c(10^{-10}, 10^{-10})) ratioStat[L1] <- exp(-m11[[2]] - m12[[2]] + m5[[2]]) meanRatio[L1] <- exp(log(m11[[1]][1]) + log(m11[[1]][2]) + m11[[1]][2] - log(exp(m11[[1]][2]) - 1) - log(m12[[1]][1]) - log(m12[[1]][2]) - m12[[1]][2] + log(exp(m12[[1]][2]) - 1)) } m5[1:2] rm(L1) rm(m11) rm(m12) rm(m5) simLRStat[N] <- max(ratioStat[meanRatio > 1]) simMRStat[N] <- max(meanRatio[meanRatio > 1]) } # # # new parallel version begins (same as above but using 'snowfall' package) #run once begin totalTrials <- 10 simLRStat <- vector(mode = 'numeric', length = totalTrials*100) simMRStat <- vector(mode = 'numeric', length = totalTrials*100) yearkUniqID <- unique(eddata[which(eddata[, 1] == fiscYear[k] & eddata[, 12] == mainAmbCD[1]), 2]) numVisitEachID <- vector(mode = 'numeric', length = length(yearkUniqID)) for (i in 1:length(yearkUniqID)) { numVisitEachID[i] <- length(eddata[which(eddata[, 1] == fiscYear[k] & eddata[, 2] == yearkUniqID[i] & eddata[, 12] == mainAmbCD[1]), 2]) } #run once end library(foreach) library(snowfall) sfInit(parallel = TRUE, cpus=4, type = "SOCK") Sys.time() ## this line can be removed ptm <- proc.time() for (M in 1:1){ ## vary the iterator by totalTrials for (N in 1:totalTrials){ sim_sRHA_Vector <- sample(subRHA[2:length(subRHA)], length(yearkUniqID), replace=TRUE, prob = poplVec/sum(poplVec)) #sim_sRHA_Vector <- sample(subRHA[2:length(subRHA)], length(yearkUniqID), replace=TRUE, samplingWGT) #sim_sRHA_Vector <- sample(subRHA[2:length(subRHA)], length(yearkUniqID), replace=TRUE) ## choosing diff sampling weight varies the m5 results greatly for (i in 2:length(subRHA)) { u[k, i-1] <- sum(numVisitEachID[sim_sRHA_Vector == subRHA[i]]) d[k, i-1] <- length(yearkUniqID[sim_sRHA_Vector == subRHA[i]]) } m5 <- nlminb(c(sum(d[k, ])/sum(subRHAPoplSize[k, ]), sum(u[k, ])/sum(d[k, ])), loglikelihood, lower = c(10^{-10}, 10^{-9})) zoneOptVec <- foreach(L1 = 1:length(zoneList), .combine='rbind') %dopar% { unlist(nlminb(c(sum(d[k, zoneList[[L1]]])/sum(subRHAPoplSize[k, zoneList[[L1]]]), sum(u[k, zoneList[[L1]]]) / sum(d[k, zoneList[[L1]]])), loglikelihood1, lower = c(10^{-10}, 10^{-10}))[1:2], use.names=FALSE) } ## help(unlist) zonePrimeOptVec <- foreach(L1 = 1:length(zoneList), .combine = 'rbind') %dopar% { unlist(nlminb(c(sum(d[k, zonePrimeList[[L1]]])/sum(subRHAPoplSize[k, zonePrimeList[[L1]]]), sum(u[k, zonePrimeList[[L1]]]) / sum(d[k, zonePrimeList[[L1]]])), loglikelihood2, lower = c(10^{-10}, 10^{-10}))[1:2], use.names=FALSE) } ## help(unlist) ratioStat <- foreach(i = 1:length(zoneList), .combine = 'c') %dopar% { exp(-zoneOptVec[i, 3] - zonePrimeOptVec[i, 3] + m5[[2]]) } meanRatio <- foreach(i = 1:length(zoneList), .combine = 'c') %dopar% { exp(log(zoneOptVec[i, 1]) + log(zoneOptVec[i, 2]) + zoneOptVec[i, 2] - log(exp(zoneOptVec[i, 2]) - 1) - log(zonePrimeOptVec[i, 1]) - log(zonePrimeOptVec[i, 2]) - zonePrimeOptVec[i, 2] + log(exp(zonePrimeOptVec[i, 2]) - 1)) } simLRStat[(M - 1)*totalTrials + N] <- max(ratioStat[meanRatio > 1]) simMRStat[(M - 1)*totalTrials + N] <- max(meanRatio[meanRatio > 1]) } #write.table(simLRStat, file = "F:/myR_folder/simLRStat.csv", row.names=FALSE, col.names=FALSE, quote=FALSE, sep = ",") #write.table(simMRStat, file = "F:/myR_folder/simMRStat.csv", row.names=FALSE, col.names=FALSE, quote=FALSE, sep = ",") } proc.time() - ptm sfStop() rm(N) rm(M) rm(m5) rm(zoneOptVec) rm(zonePrimeOptVec) rm(ratioStat) rm(meanRatio) # # # new parallel version ends