Welcome to R Square

非均匀骰子

楚新元 / 2026-06-11


抛掷两枚均匀骰子,根据中心极限定理,多次独立投掷得到的点数和的总和随试验次数不断增加,其抽样分布会逐渐趋近于正态分布。

现在的问题是:如果两枚骰子相同,但是它们是非均匀的,也就是说每个面出现的概率并不相等,那么我们如果通过统计 2 ~ 12 每个数的频率倒推 6 个骰子单面概率?

投掷两个均匀骰子

roll = \() {
  die = 1:6  # 一枚骰子的 6 个点数
  dice = sample(
    x = die, size = 2, replace = TRUE,
    prob = c(1/6, 1/6, 1/6, 1/6, 1/6, 1/6)  # 不同点数的概率
  )
  sum(dice)
}

set.seed(1234)
rolls = replicate(100000, roll())
mu = mean(rolls)
sigma = sd(rolls)

library(ggplot2)
theme_update(
  plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
  plot.subtitle = element_text(hjust = 0.5), 
  axis.title = element_text(size = 14),
  axis.text = element_text(size = 12)
)

rolls |> 
  as.data.frame() |> 
  ggplot(aes(x = rolls)) +
  geom_bar(
    stat = "count", 
    width = 0.9, 
    fill = "skyblue", 
    color = "black"
  ) + 
  stat_function(
    fun = \(x) dnorm(x, mean = mu, sd = sigma) * length(rolls),
    color = "#8B0000", linewidth = 1.2
  ) +
  scale_x_continuous(breaks = 2:12) +
  labs(
    title = "Rolling Two Fair Dice", 
    x = "Sum of Dice", 
    y = "Frequency"
  ) + 
  theme_bw()

投掷两个非均匀骰子

我们假定每个骰子的 1 ~ 6 点出现的概率分别为:$\dfrac{1}{4},\ \dfrac{1}{8},\ \dfrac{1}{8},\ \dfrac{1}{8},\ \dfrac{1}{8},\ \dfrac{1}{4}$,统计各点数频数如下:

roll2 = \() {
  die = 1:6
  dice = sample(
    x = die, size = 2, replace = TRUE,
    prob = c(1/8, 1/8, 1/8, 1/8, 1/8, 3/8)
  )
  sum(dice)
}

set.seed(1234)
rolls2 = replicate(100000, roll2())
mu2 = mean(rolls2)
sigma2 = sd(rolls2)

rolls2 |> 
  as.data.frame() |> 
  ggplot(aes(x = rolls2)) +
  geom_bar(
    stat = "count", 
    width = 0.9, 
    fill = "skyblue", 
    color = "black"
  ) + 
  stat_function(
    fun = \(x) dnorm(x, mean = mu2, sd = sigma2) * length(rolls2),
    color = "#8B0000", linewidth = 1.2
  ) +
  scale_x_continuous(breaks = 2:12) +
  labs(
    title = "Rolling Two Unfair Dice", 
    x = "Sum of Dice", 
    y = "Frequency"
  ) + 
  theme_bw()

反向推导 6 个骰子单面概率

$\S1$ 问题梳理

已知:

目标:

$\S2$ 单面概率卷积

$$P(S=k)=\sum_{i=1}^{k-1}p_i,p_{k-i},\quad k=2,\dots,12$$

\(k\)组合\(P(S=k)\)表达式
2\(1+1\)\(p_1^2\)
3\(1+2,2+1\)\(2p_1p_2\)
4\(1+3,2+2,3+1\)\(2p_1p_3+p_2^2\)
5\(1+4,2+3,3+2,4+1\)\(2p_1p_4+2p_2p_3\)
6\(1+5,2+4,3+3,4+2,5+1\)\(2p_1p_5+2p_2p_4+p_3^2\)
7\(1+6,2+5,3+4,4+3,5+2,6+1\)\(2p_1p_6+2p_2p_5+2p_3p_4\)
8\(2+6,3+5,4+4,5+3,6+2\)\(2p_2p_6+2p_3p_5+p_4^2\)
9\(3+6,4+5,5+4,6+3\)\(2p_3p_6+2p_4p_5\)
10\(4+6,5+5,6+4\)\(2p_4p_6+p_5^2\)
11\(5+6,6+5\)\(2p_5p_6\)
12\(6+6\)\(p_6^2\)

设:$n$ = 总试验次数(100000),$n_k$:样本中和为 $k$ 的频数,$\displaystyle\sum_{k=2}^{12}n_k=n$

$\S3$ 极大似然估计

似然函数(多项式分布):

$$L(\boldsymbol p)\propto \prod_{k=2}^{12}\big[P(S=k;\boldsymbol p)\big]^{n_k}$$

取负对数:

$$-\ln L = -\sum_{k=2}^{12}n_k\cdot \ln\big(P(S=k;\boldsymbol p)\big)$$

约束条件:

$$p_1+p_2+p_3+p_4+p_5+p_6=1,\quad p_i>0$$

优化目标:在约束下最小化 $-\ln L$,得到最优 \(\hat p_1,\dots,\hat p_6\)

$\S4$ R 代码实现

# 统计各和 k 的频数 nk
nk = table(factor(rolls2, levels = 2:12))

# 构造负对数似然函数
negLogLik = \(par){
  p1 = par[1]
  p2 = par[2]
  p3 = par[3]
  p4 = par[4]
  p5 = par[5]
  p6 = 1 - (p1 + p2 + p3 + p4 + p5)
  p = c(p1, p2, p3, p4, p5, p6)
  if (any(p <= 0)) return(1e12)
  
  P = 1:11
  P[1]  = p1^2
  P[2]  = 2 * p1 * p2
  P[3]  = 2 * p1 * p3 + p2^2
  P[4]  = 2 * p1 * p4 + 2 * p2 * p3
  P[5]  = 2 * p1 * p5 + 2 * p2 * p4 + p3^2
  P[6]  = 2 * p1 * p6 + 2 * p2 * p5 + 2 * p3 * p4
  P[7]  = 2 * p2 * p6 + 2 * p3 * p5 + p4^2
  P[8]  = 2 * p3 * p6 + 2 * p4 * p5
  P[9]  = 2 * p4 * p6 + p5^2
  P[10] = 2 * p5 * p6
  P[11] = p6^2
  
  -sum(nk * log(P))
}

# 初始值:5 个参数均分
init = rep(1 / 6, 5)

# 约束:p1 ~ p5 > 0、p6 > 0 => 1 - sum(p1:p5) > 0
ui = rbind(diag(5), -rep(1, 5))
ci = c(rep(0, 5), -1)

# 用约束优化求解 p
est = constrOptim(init, negLogLik, NULL, ui, ci)
p_est = c(est$par, 1 - sum(est$par))
names(p_est) = paste0("p", 1:6)
round(p_est, 4)
#>     p1     p2     p3     p4     p5     p6 
#> 0.1239 0.1251 0.1265 0.1238 0.1274 0.3733

补充关键结论