網頁廣告假點擊機率計算

場景

    • 假定網際網路中,有平均 40% 點擊是假點擊(由特定程式發起)。

    • 隨機抽樣 380 家網站。

問題

    • 求算樣本標準差(0.0205)。

    • 求算樣本中,在樣本分佈 +- 0.04 範圍之機率。(0.0513)

    • 求算樣本中,在樣本分佈 >= 0.45 之機率。(0.0074)

GNU R

# 樣本分佈平均值 SampleDistributionMean <- function(populationMean) { myMean <- populationMean myMean } # 已知母體平均數,樣本數,求算樣本標準差 SampleDistributionStdDeviation <- function(populationStdDeviation, sampleSize) { myStdDeviation <- populationStdDeviation / sqrt(sampleSize) myStdDeviation } # 樣本分佈標準誤差 SampleDistributionStdError <- function(sampleDistributionStdDeviation) { myStdError <- sampleDistributionStdDeviation myStdError } # 已知母體平均數,樣本標準差,求算樣本在某母體平均數百分比區間之機率 SampleDistributionPercentageRangeProbability <- function(populationMean, sampleStdDeviation, samplePercentageRange) { sampleValueRange <- populationMean * samplePercentageRange z2 <- sampleValueRange z1 <- -1 * z2 p2 <- pnorm(z2, sd=sampleStdDeviation) p1 <- pnorm(z1, sd=sampleStdDeviation) myProbability <- p2 - p1 myProbability } # 已知母體平均數,樣本標準差,求算樣本在某母體平均數值區間之機率 SampleDistributionValueRangeProbability <- function(sampleStdDeviation, sampleValueRange) { z2 <- sampleValueRange z1 <- -1 * z2 p2 <- pnorm(z2, sd=sampleStdDeviation) p1 <- pnorm(z1, sd=sampleStdDeviation) myProbability <- p2 - p1 myProbability } # 已知母體平均數,樣本標準差,求算樣本在某母體平均數值區間之機率 SampleDistributionProbability <- function(sampleMean, sampleStdDeviation, sampleValue) { z <- (sampleValue - sampleMean) / sampleStdDeviation myProbability <- pnorm(z) myProbability } PlotNormalDistributionChart <- function(meanValue, stdDeviation) { myChartTitle <- sprintf("平均值=%.2f 標準差=%.4f", meanValue, stdDeviation) curve(main=myChartTitle, exp((-1 * (x - meanValue)^2)/(2 * stdDeviation ^ 2))/(stdDeviation * sqrt(2 * pi)), from=meanValue - 3 * stdDeviation, to=meanValue + 3 * stdDeviation, n=100, xlab="值", ylab="機率密度") myChartTitle } ############################################################################################################## populationMean <- 0.40 sampleSize <- 380 sampleMean <- SampleDistributionMean(populationMean) sampleStdDeviation <- SampleDistributionStdDeviation(sampleMean, sampleSize) print(sprintf("樣本數=%d 平均值=%.2f 標準差=%.4f", round(sampleSize), sampleMean, sampleStdDeviation)) sampleValueRange <- 0.04 myProbability <- 1 - SampleDistributionValueRangeProbability(sampleStdDeviation, sampleValueRange) print(sprintf("區間介於 %.2f 機率=%.4f", sampleValueRange, myProbability)) sampleValue <- 0.45 myProbability <- 1 - SampleDistributionProbability(sampleMean, sampleStdDeviation, sampleValue) print(sprintf("區間>=%.2f 機率=%.4f", sampleValue, myProbability)) PlotNormalDistributionChart(sampleMean, sampleStdDeviation)