統計推論(Statistical Inference)

GNU R:

# 統計公式與程序 ############ 基本 ############ Mean <- function(myData) { myMean <- mean(myData) myMean } Mean2 <- function(myData) { myMean <- sum(myData) / length(myData) myMean } MeanOf2 <- function(myMean1, myMean2) { myMeanOf2 <- myMean1 - myMean2 myMeanOf2 } Median <- function(myData) { myMedian <- median(myData) myMedian } Median2 <- function(myData) { myMid <- length(myData) if (myMid %% 2 ==1) { myMid <- myMid - 1 myMid <- 1 + (myMid %/% 2) myMedian <- myData[myMid] } else { myMid <- myMid %/% 2 myMedian <- (myData[myMid] + myData[myMid + 1]) / 2 } myMedian } Variance <- function(myData, isSample) { if (isSample) { myVariance <- var(myData) } else { myMean <- Mean(myData) for (myElement in c(1:length(myData))) { myData[myElement] <- (myData[myElement] - myMean) ^ 2 } myVariance <- Mean(myData) } myVariance } Variance2 <- function(myData, isSample) { myMean <- Mean(myData) for (myElement in c(1:length(myData))) { myData[myElement] <- (myData[myElement] - myMean) ^ 2 } if (isSample) { myVariance <- sum(myData) / (length(myData) - 1) } else { myVariance <- Mean(myData) } myVariance } PooledVarianceOf2 <- function(mySampleVariance1, myNumOfSample1, mySampleVariance2, myNumOfSample2) { myDegreeOfFreedom <- myNumOfSample1 + myNumOfSample2 - 2 myPooledVarianceOf2 <- ((myNumOfSample1 - 1) * mySampleVariance1 + (myNumOfSample2 - 1) * mySampleVariance2) / myDegreeOfFreedom myPooledVarianceOf2 } StdDeviation <- function(myData, isSample) { if (isSample) { myStdDeviation <- sd(myData) } else { myStdDeviation <- sqrt(Variance(myData, isSample)) } myStdDeviation } StdDeviation2 <- function(myData, isSample) { myStdDeviation <- sqrt(Variance2(myData, isSample)) myStdDeviation } InterQuartileRange <- function(myData) { myIQR <- IQR(myData) myIQR } CoefficientOfVariation <- function(myStdDevation, myMean) { myCoefficientOfVariation <- 100 * (myStdDevation / myMean) myCoefficientOfVariation } CoefficientOfVariationFromVector <- function(myData, isSample) { myMean <- Mean(myData) myStdDevation <- StdDevation(myData, isSample) myCoefficientOfVariation <- 100 * (myStdDevation / myMean) myCoefficientOfVariation } Z <- function(myStdDevation, myMean, myValue) { myZ <- (myValue - myMean) / myStdDevation myZ } Covariance <- function(xData, yData, isSample) { xMean <- Mean(xData) yMean <- Mean(yData) for (myElement in c(1:length(xData))) { xData[myElement] <- (xData[myElement] - xMean) * (yData[myElement] - yMean) } if (isSample) { myCovariance <- sum(xData) / (length(xData) - 1) } else { myCovariance <- Mean(xData) } myCovariance } PearsonProductMomentCorrelationCoefficient <- function(xData, yData) { xStdDeviation <- StdDeviation(xData, TRUE) yStdDeviation <- StdDeviation(yData, TRUE) myPearsonProductMomentCorrelationCoefficient <- SampleCovariance(xData, yData) / (xStdDeviation * yStdDeviation) myPearsonProductMomentCorrelationCoefficient } WeightedMean <- function(myData, myWeight) { myData <- myData * myWeight WeightedMean <- sum(myData) / sum(myWeight) WeightedMean } MeanForGroupedData <- function(myFreqence, myMidPointClass) { myFreqence <- myFreqence * myMidPointClass mySampleMeanForGroupedData <- Mean(myFreqence) mySampleMeanForGroupedData } VarianceForGroupedData <- function(myFreqence, myMidPointClass, isSample) { if (isSample) { myVarianceForGroupedData <- MeanForGroupedData(myFreqence, myMidPointClass) / (length(myFreqence) - 1) } else { myVarianceForGroupedData <- MeanForGroupedData(myFreqence, myMidPointClass) / length(myFreqence) } myVarianceForGroupedData } Frequency <- function(myData) { myFrequency <- summary(factor(myData)) myFrequency } ############ 查表 ############ # 從 z 值求機率 Z <- function(myZ) { myProbability <- pnorm(myZ) myProbability } # 從機率反求 z 值 GetZ <- function(myProbability) { myZ <- qnorm(myProbability) myZ } # 從 t 值求機率 T <- function(myT, myDegreeOfFreedom) { myProbability <- pt(myT, myDegreeOfFreedom) myProbability } # 從機率與自由度反求 t 值 GetT <- function(myProbability, myDegreeOfFreedom) { myT <- qt(myProbability, myDegreeOfFreedom) myT } # 從機率與自由度反求 t 值 GetTU <- function(myUpperTailProbability, myDegreeOfFreedom) { myT <- qt(1 - myUpperTailProbability, myDegreeOfFreedom) myT } # 從機率與自由度反求 t 值 GetTL <- function(myLowerTailProbability, myDegreeOfFreedom) { myT <- qt(myLowerTailProbability, myDegreeOfFreedom) myT } ############ 已知母體標準差,求誤差界限 ############ # 誤差界限 MarginErrorOfKnownStdDeviation <- function(myPopulationStdDeviation, mySampleSize, myConfidenceInterval) { myZ <- GetZ(myConfidenceInterval + (1-myConfidenceInterval)/2) myMarginErrorOfKnownStdDeviation <- myZ * (myPopulationStdDeviation / sqrt(mySampleSize)) myMarginErrorOfKnownStdDeviation } ############ 統計推論 ############ # 樣本數 > 30 StdDeviationOf2 <- function(myStdDeviation1, myNumOfSample1, myStdDeviation2, myNumOfSample2) { myStdDeviationOf2 <- sqrt((myStdDeviation1 ^ 2) / myNumOfSample1 + (myStdDeviation2 ^ 2) / myNumOfSample2) myStdDeviationOf2 } MarginErrorOfKnownStdDeviationOf2 <- function(myStdDeviationOf2, myConfidenceInterval) { myZ <- GetZ(myConfidenceInterval + (1-myConfidenceInterval)/2) myMarginErrorOfKnownStdDeviationOf2 <- myZ * myStdDeviationOf2 myMarginErrorOfKnownStdDeviationOf2 } # 樣本數 < 30 PooledStdDeviationOf2 <- function(mySampleVariance1, myNumOfSample1, mySampleVariance2, myNumOfSample2) { myPooledVarianceOf2 <- PooledVarianceOf2(mySampleVariance1, myNumOfSample1, mySampleVariance2, myNumOfSample2) myCoefficient <- 1/myNumOfSample1 + 1/myNumOfSample2 myPooledStdDeviationOf2 <- sqrt(myPooledVarianceOf2 * myCoefficient) myPooledStdDeviationOf2 } PooledMarginErrorOf2 <- function(mySampleVariance1, myNumOfSample1, mySampleVariance2, myNumOfSample2, myConfidenceInterval, isSampleSizeSmall) { myT <- GetT(myConfidenceInterval + (1-myConfidenceInterval)/2, myNumOfSample1+myNumOfSample2-2) myStdDeviationOf2 <- PooledStdDeviationOf2(mySampleVariance1, myNumOfSample1, mySampleVariance2, myNumOfSample2) myPooledMarginErrorOf2 <- myT * myStdDeviationOf2 myPooledMarginErrorOf2 }

應用: