Matlab Sample 1

% % % this is a sample Matlab program to run by PBS on UNIX % % % k = csvread('./2004_2005/k.csv'); u = csvread('./u.csv'); I = csvread('./I.csv'); distMtx = csvread('./distMtx.csv'); subrhapopmatrix = csvread('./subrhapopmatrix.csv'); subrhapopsize = subrhapopmatrix(k, :)'; percentChosen = 7/100; popUpperBound = sum(subrhapopsize)*percentChosen; umale = csvread('./umale.csv'); ufemale = csvread('./ufemale.csv'); malesubrhapopmatrix = csvread('./malesubrhapopmatrix.csv'); femalesubrhapopmatrix = csvread('./femalesubrhapopmatrix.csv'); malepopvec = malesubrhapopmatrix(k, :)'; femalepopvec = femalesubrhapopmatrix(k, :)'; sumLogLikeQ = @(par, data) - sum(log(logarithmicQ(par, data))); sumLogLike1 = @(par, data) - sum(log(nbinDistribution(par, data))); % % % this codes read in multiple csv files % % % % % ie. fnumVisitEachID_subRHA_1_.csv and fnumVisitEachID_subRHA_2_.csv ... fnumVisitEachID = cell(I); mnumVisitEachID = cell(I); for i = 1:I myfilename = sprintf('./2004_2005/fnumVisitEachID_subRHA_%d_.csv', i); fnumVisitEachID{i} = (csvread(myfilename))'; myfilename = sprintf('./2004_2005/mnumVisitEachID_subRHA_%d_.csv', i); mnumVisitEachID{i} = (csvread(myfilename))'; end % % % % % % % % % % this section creates the list of all the test zones that have at % least 1 visit in the zone and outside the zone, instead of check to see % if a zone has events > 0, we check for events > 0.1 always count = 0; sRHAlist = 1:I; zoneList = cell(1); zonePrimeList = cell(1); for i = 1:I for j = 2:I tempZone = sRHAlist(tiedrank(distMtx(i, :)) < j); if sum(subrhapopsize(tempZone)) < popUpperBound && sum(u(k, tempZone) > 0.1) > 0 && sum(u(k, setdiff(sRHAlist, tempZone)) > 0.1) > 0 tempZonelist = sort(sRHAlist(tempZone)); indic1 = 1; for l = 1:length(zoneList) if length(tempZonelist) == length(zoneList{l}) && sum(ne(tempZonelist, zoneList{l})) == 0 indic1 = 0; break; end end if indic1 == 1 count = count + 1; zoneList{count} = tempZonelist; zonePrimeList{count} = setdiff(sRHAlist, tempZonelist); end end end end numberoftestzone = length(zoneList); clear count sRHAlist tempZone tempZonelist indic1 i j l; % % % % end % % % % % % % % % % Analysis under the Null begin % % % % % % options = optimset('Display', 'off', 'Algorithm', 'interior- point', 'FinDiffType', 'central'); % controls options of numerical optimization 'fmincon' % % % denominator of the ratio statistic % % % % male sectoin only data = cat(2, mnumVisitEachID{1:I}); data = data(data > 0.1); mthetahat = fmincon(@(par) sumLogLikeQ(par, data), 0.4, [], [], [], [], 10^(-9), 1, [], options); mnumofvisit = length(data); % number of unique visits made by male mdata = cell(1, 3); mdata{1} = umale(k, :); mdata{2} = malepopvec'; %% fixed sRHAs population size vector mdata{3} = mthetahat; % female sectoin only data = cat(2, fnumVisitEachID{1:I}); data = data(data > 0.1); fthetahat = fmincon(@(par) sumLogLikeQ(par, data), 0.4, [], [], [], [], 10^(-9), 1, [], options); fnumofvisit = length(data); % number of unique visits made by female fdata = cell(1, 3); fdata{1} = ufemale(k, :); fdata{2} = femalepopvec'; %% fixed sRHAs population size vector fdata{3} = fthetahat; % % separate optimizations initialtemp = (mnumofvisit + fnumofvisit)/sum(malepopvec + femalepopvec); % originally initialtemp = 0.001; [~, mfval] = fmincon(@(par) sumLogLike1(par, mdata), initialtemp, [], [], [], [], 10^(- 9), [], [], options); [~, ffval] = fmincon(@(par) sumLogLike1(par, fdata), initialtemp, [], [], [], [], 10^(- 9), [], [], options); % % results are the same as the above optimization % clear mdata fdata; % null analysis done here % % % % % data analysis beings % % % % % % % % analyze observed data % % % u_mzonedata1 = cell(1, numberoftestzone); u_mzonedata2 = cell(1, numberoftestzone); pop_mzonedata1 = cell(1, numberoftestzone); pop_mzonedata2 = cell(1, numberoftestzone); u_fzonedata1 = cell(1, numberoftestzone); u_fzonedata2 = cell(1, numberoftestzone); pop_fzonedata1 = cell(1, numberoftestzone); pop_fzonedata2 = cell(1, numberoftestzone); phat_male = zeros(numberoftestzone, 1); phat_female = zeros(numberoftestzone, 1); qhat_male = zeros(numberoftestzone, 1); qhat_female = zeros(numberoftestzone, 1); mnumVisitEachIDZone_cell1 = cell(1, numberoftestzone); mnumVisitEachIDZone_cell2 = cell(1, numberoftestzone); fnumVisitEachIDZone_cell1 = cell(1, numberoftestzone); fnumVisitEachIDZone_cell2 = cell(1, numberoftestzone); for j = 1:numberoftestzone u_mzonedata1{j} = umale(k, zoneList{j}); u_mzonedata2{j} = umale(k, zonePrimeList{j}); pop_mzonedata1{j} = malepopvec(zoneList{j}); pop_mzonedata2{j} = malepopvec(zonePrimeList{j}); u_fzonedata1{j} = ufemale(k, zoneList{j}); u_fzonedata2{j} = ufemale(k, zonePrimeList{j}); pop_fzonedata1{j} = femalepopvec(zoneList{j}); pop_fzonedata2{j} = femalepopvec(zonePrimeList{j}); mnumVisitEachIDZone_cell1{j} = cat(2, mnumVisitEachID{zoneList{j}}); mnumVisitEachIDZone_cell2{j} = cat(2, mnumVisitEachID{zonePrimeList{j}}); fnumVisitEachIDZone_cell1{j} = cat(2, fnumVisitEachID{zoneList{j}}); fnumVisitEachIDZone_cell2{j} = cat(2, fnumVisitEachID{zonePrimeList{j}}); end matlabpool local 6; parfor j = 1:numberoftestzone mnumVisitEachIDZone1 = mnumVisitEachIDZone_cell1{j}; mnumVisitEachIDZone2 = mnumVisitEachIDZone_cell2{j}; fnumVisitEachIDZone1 = fnumVisitEachIDZone_cell1{j}; fnumVisitEachIDZone2 = fnumVisitEachIDZone_cell2{j}; if sum(mnumVisitEachIDZone1) > 0.1 mnumVisitEachIDZone1 = mnumVisitEachIDZone1(mnumVisitEachIDZone1 > 0.1); phat_male(j) = fmincon(@(par) sumLogLikeQ(par, mnumVisitEachIDZone1), 0.4, [], [], [], [], 10^(- 9), 1, [], options); else phat_male(j) = mthetahat; % in case there is no data to estimate for p- hat_j_male under the alternative end mnumVisitEachIDZone2 = mnumVisitEachIDZone2(mnumVisitEachIDZone2 > 0.1); qhat_male(j) = fmincon(@(par) sumLogLikeQ(par, mnumVisitEachIDZone2), 0.4, [], [], [], [], 10^(- 9), 1, [], options); if sum(fnumVisitEachIDZone1) > 0.1 fnumVisitEachIDZone1 = fnumVisitEachIDZone1(fnumVisitEachIDZone1 > 0.1); phat_female(j) = fmincon(@(par) sumLogLikeQ(par, fnumVisitEachIDZone1), 0.4, [], [], [], [], 10^(- 9), 1, [], options); else phat_female(j) = fthetahat; % in case there is no data to estimate for p- hat_j_female under the alternative end fnumVisitEachIDZone2 = fnumVisitEachIDZone2(fnumVisitEachIDZone2 > 0.1); qhat_female(j) = fmincon(@(par) sumLogLikeQ(par, fnumVisitEachIDZone2), 0.4, [], [], [], [], 10^(- 9), 1, [], options); end clear mnumVisitEachIDZone1 mnumVisitEachIDZone2 fnumVisitEachIDZone1 fnumVisitEachIDZone2; temp4 = zeros(numberoftestzone, 2); temp5 = zeros(numberoftestzone, 2); temp6 = zeros(numberoftestzone, 2); temp7 = zeros(numberoftestzone, 2); parfor j = 1:numberoftestzone % male section begin data1 = cell(1, 3); data1{1} = u_mzonedata1{j}; data1{2} = pop_mzonedata1{j}'; %% sRHAs population size vector data1{3} = phat_male(j); data2 = cell(1, 3); data2{1} = u_mzonedata2{j}; data2{2} = pop_mzonedata2{j}'; %% sRHAs population size vector data2{3} = qhat_male(j); [mparhat4, fval4] = fmincon(@(par) sumLogLike1(par, data1), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp4(j, :) = [mparhat4, fval4]; [mparhat5, fval5] = fmincon(@(par) sumLogLike1(par, data2), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp5(j, :) = [mparhat5, fval5]; % male section end % female section % data1 = cell(1, 3); % reuse data1 and data2 to reduce redundancy data1{1} = u_fzonedata1{j}; data1{2} = pop_fzonedata1{j}'; %% sRHAs population size vector data1{3} = phat_female(j); % data2 = cell(1, 3); data2{1} = u_fzonedata2{j}; data2{2} = pop_fzonedata2{j}'; %% sRHAs population size vector data2{3} = qhat_female(j); [fparhat6, fval6] = fmincon(@(par) sumLogLike1(par, data1), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp6(j, :) = [fparhat6, fval6]; [fparhat7, fval7] = fmincon(@(par) sumLogLike1(par, data2), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp7(j, :) = [fparhat7, fval7]; % female section only end end % % % end % % % ratioStat = - temp4(:, 2) - temp5(:, 2) - temp6(:, 2) - temp7(:, 2) + mfval + ffval; meanRatio = (temp4(:, 1) .* phat_male ./ (1 - phat_male) + temp6(:, 1) .* phat_female ./ (1 - phat_female)) ... ./ (temp5(:, 1) .* qhat_male ./ (1 - qhat_male) + temp7(:, 1) .* qhat_female ./ (1 - qhat_female)); meanratiological = meanRatio > 1; % this yields which test zoen satisfy the inequality condition % % % this section sorts the observed likelihood ratios, with their mean % ratio statistics, and their associated zone ID % this section may be used to find secondary clusters zoneID = 1:numberoftestzone; % zone label from 1 to total number of test zones tempLRMR = [ratioStat(meanratiological), meanRatio(meanratiological)]; tempzoneID = zoneID(meanratiological); [sortedtempLRMR, index] = sortrows(tempLRMR, 1); sortedtempLRMR = sortedtempLRMR(fliplr(1:size(tempLRMR, 1)), :); zoneIDReport = tempzoneID(index(fliplr(1:size(tempLRMR, 1)), :)); teststatistic = sortedtempLRMR(1, 1); theMR = sortedtempLRMR(1, 2); % % % read in subregion name file, column 1 is integer 1 to 68, column 2 % has actual names R101, R102, ... fid = fopen('./regionname.csv', 'r'); regions = textscan(fid, ['%d8', '%s'], 'delimiter', ',', 'CollectOutput', true); fclose(fid); % % % end % % % % % % % % simulation for assessing significance begin % % % % % msubpopproption = malepopvec/sum(malepopvec); fsubpopproption = femalepopvec/sum(femalepopvec); % msubpopproption and fsubpopproption are resampling weight vectors % this section creates the list of all the test zones % count = 0; sRHAlist = 1:I; simZones = cell(1); simPrimeZones = cell(1); for i = 1:I for j = 2:I tempZone = sRHAlist(tiedrank(distMtx(i, :)) < j); if sum(subrhapopsize(tempZone)) < popUpperBound tempZonelist = sort(sRHAlist(tempZone)); indic1 = 1; for l = 1:length(simZones) if length(tempZonelist) == length(simZones{l}) && sum(ne(tempZonelist, simZones{l})) == 0 indic1 = 0; break; end end if indic1 == 1 count = count+ 1; simZones{count} = tempZonelist; simPrimeZones{count} = setdiff(sRHAlist, tempZonelist); end end end end numberoftestzones = length(simZones); clear count sRHAlist tempZone tempZonelist indic1 i j l; % end % options = optimset('Display', 'off', 'Algorithm', 'interior-point', 'FinDiffType', 'forward'); Trial = 999; s = RandStream('mcg16807', 'Seed', 6); % RandStream.setDefaultStream(s); % for server Matlab 2009 version RandStream.setGlobalStream(s); % for Matlab 2012+ % % male data prep section begin u_mzonedata1 = cell(1, numberoftestzone); u_mzonedata2 = cell(1, numberoftestzone); data12_mat = cell(1, numberoftestzones); data22_mat = cell(1, numberoftestzones); for j = 1:numberoftestzones data12_mat{j} = malepopvec(simZones{j}); data22_mat{j} = malepopvec(simPrimeZones{j}); end % % male section end % % female data prep section begin u_fzonedata1 = cell(1, numberoftestzone); u_fzonedata2 = cell(1, numberoftestzone); data14_mat = cell(1, numberoftestzones); data24_mat = cell(1, numberoftestzones); for j = 1:numberoftestzones data14_mat{j} = femalepopvec(simZones{j}); data24_mat{j} = femalepopvec(simPrimeZones{j}); end % % female section end % % overall section begin simallLRMR06v4 = zeros(1, Trial); malevisiteachid = cat(2, mnumVisitEachID{1:I}); femalevisiteachid = cat(2, fnumVisitEachID{1:I}); mnumVisitEachIDZone_cell1 = cell(1, numberoftestzone); mnumVisitEachIDZone_cell2 = cell(1, numberoftestzone); fnumVisitEachIDZone_cell1 = cell(1, numberoftestzone); fnumVisitEachIDZone_cell2 = cell(1, numberoftestzone); tic for L = 1:Trial % male and female data setup msimSRHAvector = randsample(1:I, mnumofvisit, true, msubpopproption); fsimSRHAvector = randsample(1:I, fnumofvisit, true, fsubpopproption); for i = 1:I mdata{1}(i) = sum(malevisiteachid(msimSRHAvector == i)); % mdata{1} is the new umale(k, :) under MC Simulation fdata{1}(i) = sum(femalevisiteachid(fsimSRHAvector == i)); % fdata{1} is the new ufemale(k, :) under MC Simulation end [~, mfval] = fmincon(@(par) sumLogLike1(par, mdata), initialtemp, [], [], [], [], 10^(- 9), [], [], options); [~, ffval] = fmincon(@(par) sumLogLike1(par, fdata), initialtemp, [], [], [], [], 10^(- 9), [], [], options); % mthetahat remains a constant under Ho that needs not be estimated in MC Simulations % fthetahat remains a constant under Ho that needs not be estimated in MC Simulations % overall analysis accounting for gender stratification under Ha for j = 1:numberoftestzones u_mzonedata1{j} = mdata{1}(simZones{j}); u_mzonedata2{j} = mdata{1}(simPrimeZones{j}); u_fzonedata1{j} = fdata{1}(simZones{j}); u_fzonedata2{j} = fdata{1}(simPrimeZones{j}); mnumVisitEachIDZone_cell1{j} = cat(2, mnumVisitEachID{simZones{j}}); mnumVisitEachIDZone_cell2{j} = cat(2, mnumVisitEachID{simPrimeZones{j}}); fnumVisitEachIDZone_cell1{j} = cat(2, fnumVisitEachID{simZones{j}}); fnumVisitEachIDZone_cell2{j} = cat(2, fnumVisitEachID{simPrimeZones{j}}); end parfor j = 1:numberoftestzone mnumVisitEachIDZone1 = mnumVisitEachIDZone_cell1{j}; mnumVisitEachIDZone2 = mnumVisitEachIDZone_cell2{j}; fnumVisitEachIDZone1 = fnumVisitEachIDZone_cell1{j}; fnumVisitEachIDZone2 = fnumVisitEachIDZone_cell2{j}; if sum(mnumVisitEachIDZone1) > 0.1 mnumVisitEachIDZone1 = mnumVisitEachIDZone1(mnumVisitEachIDZone1 > 0.1); phat_male(j) = fmincon(@(par) sumLogLikeQ(par, mnumVisitEachIDZone1), 0.4, [], [], [], [], 10^(- 9), 1, [], options); else phat_male(j) = mthetahat; % in case there is no data to estimate for p- hat_j_male under the alternative end mnumVisitEachIDZone2 = mnumVisitEachIDZone2(mnumVisitEachIDZone2 > 0.1); qhat_male(j) = fmincon(@(par) sumLogLikeQ(par, mnumVisitEachIDZone2), 0.4, [], [], [], [], 10^(- 9), 1, [], options); if sum(fnumVisitEachIDZone1) > 0.1 fnumVisitEachIDZone1 = fnumVisitEachIDZone1(fnumVisitEachIDZone1 > 0.1); phat_female(j) = fmincon(@(par) sumLogLikeQ(par, fnumVisitEachIDZone1), 0.4, [], [], [], [], 10^(- 9), 1, [], options); else phat_female(j) = fthetahat; % in case there is no data to estimate for p- hat_j_female under the alternative end fnumVisitEachIDZone2 = fnumVisitEachIDZone2(fnumVisitEachIDZone2 > 0.1); qhat_female(j) = fmincon(@(par) sumLogLikeQ(par, fnumVisitEachIDZone2), 0.4, [], [], [], [], 10^(- 9), 1, [], options); end temp4 = NaN(numberoftestzone, 2); temp5 = NaN(numberoftestzone, 2); temp6 = NaN(numberoftestzone, 2); temp7 = NaN(numberoftestzone, 2); parfor j = 1:numberoftestzone data1 = cell(1, 3); data1{1} = u_mzonedata1{j}; data1{2} = data12_mat{j}'; %% sRHAs population size vector data1{3} = phat_male(j); data2 = cell(1, 3); data2{1} = u_mzonedata2{j}; data2{2} = data22_mat{j}'; %% sRHAs population size vector data2{3} = qhat_male(j); [mparhat4, fval4] = fmincon(@(par) sumLogLike1(par, data1), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp4(j, :) = [mparhat4, fval4]; [mparhat5, fval5] = fmincon(@(par) sumLogLike1(par, data2), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp5(j, :) = [mparhat5, fval5]; % data1 = cell(1, 3); data1{1} = u_fzonedata1{j}; data1{2} = data14_mat{j}'; %% sRHAs population size vector data1{3} = phat_female(j); % data2 = cell(1, 3); data2{1} = u_fzonedata2{j}; data2{2} = data24_mat{j}'; %% sRHAs population size vector data2{3} = qhat_female(j); [fparhat6, fval6] = fmincon(@(par) sumLogLike1(par, data1), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp6(j, :) = [fparhat6, fval6]; [fparhat7, fval7] = fmincon(@(par) sumLogLike1(par, data2), initialtemp, [], [], [], [], 10^(- 9), [], [], options); temp7(j, :) = [fparhat7, fval7]; end ratioStat = - temp4(:, 2) - temp5(:, 2) - temp6(:, 2) - temp7(:, 2) + mfval + ffval; meanratiological = (temp4(:, 1) .* phat_male ./ (1 - phat_male) + temp6(:, 1) .* phat_female ./ (1 - phat_female)) ... > (temp5(:, 1) .* qhat_male ./ (1 - qhat_male) + temp7(:, 1) .* qhat_female ./ (1 - qhat_female)); simallLRMR06v4(L) = max(ratioStat(meanratiological)); end timing = toc; matlabpool close; save('./2004_2005/simallLRMR06v4[NB]_999Trials.mat', 'simallLRMR06v4', '-v7.3'); % % saves the timing in the file tictocfile_simallLRMR06v4[NB]_999.txt fid = fopen('./2004_2005/tictocfile_simallLRMR06v4[NB]_999.txt', 'w'); fprintf(fid, ['Elapsed time is ', num2str(timing), ' seconds for running 999 simulation trials.\n']); fclose(fid); % % % % % % % % % % % % % % % this section outputs in text the most likely cluster with its p- value % and checks for possible secondary likely clusters of events, but be % careful in describing how these zones are considered significant and % report these zones manually cluster = transpose(cellstr(char(regions{2}{zoneList{zoneIDReport(1)}}))); rankvector = tiedrank([simallLRMR06v4, teststatistic]); fid = fopen('./2004_2005/resultofstratificationanalysis[NB].txt', 'wt'); % 'wt' is a very important argument, 'w' alone does not output properly fprintf(fid, ['* The most likely event cluster includes the subregions: ', num2str(zoneList{zoneIDReport(1)}), '\n']); fprintf(fid, ['* The most likely event cluster includes the subregions: ', sprintf('%s ', cluster{:}), '\n']); fprintf(fid, ['* The observed statistic is: ', num2str(teststatistic), '\n']); fprintf(fid, ['* The observed mean ratio statistic associated to the observed statistic is: ', num2str(theMR), '\n']); fprintf(fid, ['* The p- value is: ', num2str((2 + Trial - rankvector(end)) / (Trial+ 1)), '\n\n']); % adds p- value to the analysis report for i = 2:25 cluster = transpose(cellstr(char(regions{2}{zoneList{zoneIDReport(i)}}))); rankvector = tiedrank([simallLRMR06v4, sortedtempLRMR(i, 1)]); fprintf(fid, ['Possible secondary event cluster includes the subregions: ', num2str(zoneList{zoneIDReport(i)}), '\n']); fprintf(fid, ['The likely secondary event cluster includes the subregions: ', sprintf('%s ', cluster{:}), '\n']); fprintf(fid, ['The observed statistic of this cluster is: ', num2str(sortedtempLRMR(i, 1)), '\n']); fprintf(fid, ['The observed mean ratio statistic associated to the cluster is: ', num2str(sortedtempLRMR(i, 2)), '\n']); fprintf(fid, ['The p- value is: ', num2str((2 + Trial - rankvector(end)) / (Trial + 1)), '\n\n']); % adds p- value to the analysis report end % investigate the top 25 most likely secondary event clusters for their significance fclose(fid); clear all; exit % % % % % % % % % % % % % program end % % % % % % % % % % % % % % updated on April 13, 2013