樣本比例機率計算

場景

    • 已知母體某項特質比例(p),如:台灣網民比例為 30%,p = 0.4。
    • 已知樣本數(Sample Size),如:抽樣調查 100~1000 人 。
    • 已知樣本區間(Sample Proportion),如:+-4%。
    • 求具有此項特質在樣本區間(如:1%~10%)內機率。

公式

    • E(p) = P
    • σp = SQRT( (p * (1 - p)) / (n) )
    • z-score = +- R / σp
    • Probability = Probability(z+) - Probability(z-)

觀察

    • 已知樣本區間,觀察樣本數與機率變化:
    • 已知樣本數,觀察樣本區間與機率變化:

GNU R

# 無限母體抽樣標準差 SampleProportionStdDeviation <- function(samplePortion, sampleSize) { sampleProportionDot <- samplePortion * (1 - samplePortion) myStdDeviation <- sqrt(sampleProportionDot / sampleSize) myStdDeviation } # 有限母體抽樣標準差 SampleFiniteProportionStdDeviation <- function(samplePortion, sampleSize, populationSize) { sampleProportionDot <- samplePortion * (1 - samplePortion) populationProportionDot <- (populationSize - sampleSize) / (populationSize - 1) myStdDeviation <- sqrt(populationProportionDot * sampleProportionDot / sampleSize) myStdDeviation } # 常態分配區間機率 ProbaibilityRange <- function(stdDeviation, m, n) { realBigger <- m if (m < n) { realBigger <- n n <- m m <- realBigger } z2 <- m / stdDeviation p2 <- pnorm(z2) z1 <- n / stdDeviation p1 <- pnorm(z1) myProbability <- p2 - p1 myProbability } ######################################################################### sampleProportion <- 0.75 resultSet <- vector(mode="double", 10) task <- 1 if (task == 0) { # 已知樣本區間,觀察樣本數與機率變化 sampleProportionRange <- 0.04 seedSet <- seq(from = 100, to = 1000, by = 100) for (sampleSize in seedSet) { myStdDeviation <- SampleProportionStdDeviation(samplePortion, sampleSize) pos <- sampleSize/100 resultSet[pos] <- ProbaibilityRange(myStdDeviation, sampleProportionRange, -1 * sampleProportionRange) print(sprintf("σ=%.4f p=%.4f", myStdDeviation, resultSet[pos])) } plot(main=sprintf("樣本比例=%d%% 樣本區間=%d%%", sampleProportion * 100, sampleProportionRange * 100), xlab="樣本數", ylab="機率", seedSet, resultSet) } else { # 已知樣本數,觀察樣本區間與機率變化 seedSet <- seq(from = 0.01, to = 0.1, by = 0.01) sampleSize <- 100 myStdDeviation <- SampleProportionStdDeviation(sampleProportion, sampleSize) for (sampleProportionRange in seedSet) { pos <- sampleProportionRange / 0.01 resultSet[pos] <- ProbaibilityRange(myStdDeviation, sampleProportionRange, -1 * sampleProportionRange) print(sprintf("σ=%.4f p=%.4f", myStdDeviation, resultSet[pos])) } plot(main=sprintf("樣本比例=%d%% 樣本數=%d", sampleProportion * 100, sampleSize), xlab="樣本區間", ylab="機率", seedSet, resultSet) }