34  分层正态模型

介绍分层正态模型的定义、结构、估计,分层正态模型与线性生长模型的关系,分层正态模型与潜变量模型的关系,分层正态模型与神经网络模型的关系,以 rstan 包和 nlme 包实现简单分层正态模型,说明 rstan 包的一些用法,比较贝叶斯参数估计和频率参数估计的结果,给出结果的解释。

library(StanHeaders)
library(ggplot2)
library(rstan)
rstan_options(auto_write = TRUE)
# 如果CPU和内存足够,设置成与马尔科夫链一样多
options(mc.cores = max(c(floor(parallel::detectCores() / 2), 1L)))

custom_colors <- c(
  "#4285f4", # GoogleBlue
  "#34A853", # GoogleGreen
  "#FBBC05", # GoogleYellow
  "#EA4335"  # GoogleRed
)
rstan_ggtheme_options(
  panel.background = element_rect(fill = "white"),
  legend.position = "top"
)
rstan_gg_options(
  fill = "#4285f4", color = "white",
  pt_color = "#EA4335", chain_colors = custom_colors
)

34.1 8schools 数据

8schools 数据最早来自 Rubin (1981)

分层正态模型

\[ \begin{aligned} y_j &\sim \mathcal{N}(\theta_j,\sigma_j) \quad \theta_j = \mu + \tau * \eta_j \\ \theta_j &\sim \mathcal{N}(\mu, \tau) \quad \eta_j \sim \mathcal{N}(0,1) \end{aligned} \]

34.1.1 拟合模型

用 rstan 包来拟合模型

# 编译模型
eight_schools_fit <- stan(
  model_name = "eight_schools",
  # file = "code/8schools.stan",
  model_code = "
  // saved as 8schools.stan
  data {
    int<lower=0> J;         // number of schools
    real y[J];              // estimated treatment effects
    real<lower=0> sigma[J]; // standard error of effect estimates
  }
  parameters {
    real mu;                // population treatment effect
    real<lower=0> tau;      // standard deviation in treatment effects
    vector[J] eta;          // unscaled deviation from mu by school
  }
  transformed parameters {
    vector[J] theta = mu + tau * eta;        // school treatment effects
  }
  model {
    target += normal_lpdf(eta | 0, 1);       // prior log-density
    target += normal_lpdf(y | theta, sigma); // log-likelihood
  }
  ",
  data = list( # 观测数据
    J = 8,
    y = c(28, 8, -3, 7, -1, 1, 18, 12),
    sigma = c(15, 10, 16, 11, 9, 11, 10, 18)
  ),
  warmup = 1000, # 每条链预处理迭代次数
  iter = 2000,   # 每条链总迭代次数
  chains = 2,    # 马尔科夫链的数目
  cores = 2,     # 指定 CPU 核心数,可以给每条链分配一个
  verbose = FALSE, # 不显示迭代的中间过程
  refresh = 0,     # 不显示采样的进度
  seed = 20190425  # 设置随机数种子,不要使用 set.seed() 函数
)
#> Warning: There were 1 divergent transitions after warmup. See
#> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
#> to find out why this is a problem and how to eliminate them.
#> Warning: Examine the pairs() plot to diagnose sampling problems

34.1.2 模型输出

print(eight_schools_fit, digits = 1)
#> Inference for Stan model: eight_schools.
#> 2 chains, each with iter=2000; warmup=1000; thin=1; 
#> post-warmup draws per chain=1000, total post-warmup draws=2000.
#> 
#>           mean se_mean  sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
#> mu         8.2     0.2 5.1  -1.2   4.8   8.0  11.3  19.1   975    1
#> tau        6.6     0.2 5.8   0.3   2.5   5.3   9.2  20.7   784    1
#> eta[1]     0.4     0.0 0.9  -1.5  -0.2   0.4   1.0   2.2  1696    1
#> eta[2]    -0.1     0.0 0.8  -1.7  -0.6   0.0   0.5   1.7  1748    1
#> eta[3]    -0.2     0.0 0.9  -2.0  -0.8  -0.2   0.3   1.7  1921    1
#> eta[4]     0.0     0.0 0.9  -1.7  -0.7  -0.1   0.6   1.7  2035    1
#> eta[5]    -0.4     0.0 0.9  -2.0  -1.0  -0.4   0.2   1.4  1859    1
#> eta[6]    -0.2     0.0 0.9  -1.9  -0.8  -0.3   0.4   1.6  1885    1
#> eta[7]     0.3     0.0 0.8  -1.4  -0.2   0.3   0.9   2.0  1743    1
#> eta[8]     0.0     0.0 0.9  -1.7  -0.6   0.0   0.6   1.9  2118    1
#> theta[1]  11.4     0.2 8.2  -2.0   6.1  10.3  15.3  31.1  1194    1
#> theta[2]   7.8     0.1 6.2  -4.3   4.1   7.7  11.4  20.3  2314    1
#> theta[3]   6.0     0.2 7.5 -10.6   2.0   6.5  10.7  19.7  1698    1
#> theta[4]   7.7     0.1 6.6  -6.2   3.8   7.7  11.6  21.4  2052    1
#> theta[5]   5.2     0.1 6.4  -9.3   1.3   5.8   9.4  16.2  2025    1
#> theta[6]   6.2     0.1 6.6  -8.0   2.6   6.6  10.5  18.9  2106    1
#> theta[7]  10.5     0.2 6.5  -1.3   6.3  10.0  14.2  25.0  1566    1
#> theta[8]   8.2     0.2 7.7  -8.0   3.9   8.1  12.5  24.2  1764    1
#> lp__     -39.4     0.1 2.6 -44.8 -41.0 -39.2 -37.6 -34.8   752    1
#> 
#> Samples were drawn using NUTS(diag_e) at Mon Oct 16 10:50:27 2023.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split chains (at 
#> convergence, Rhat=1).

提取任意一个参数的结果,如查看参数 \(\tau\) 的 95% 置信区间。

print(eight_schools_fit, pars = "tau", probs = c(0.025, 0.975))
#> Inference for Stan model: eight_schools.
#> 2 chains, each with iter=2000; warmup=1000; thin=1; 
#> post-warmup draws per chain=1000, total post-warmup draws=2000.
#> 
#>     mean se_mean   sd 2.5% 97.5% n_eff Rhat
#> tau 6.65    0.21 5.76 0.27 20.73   784    1
#> 
#> Samples were drawn using NUTS(diag_e) at Mon Oct 16 10:50:27 2023.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split chains (at 
#> convergence, Rhat=1).

从迭代抽样数据获得与 print(fit) 一样的结果。以便后续对原始采样数据做任意的进一步分析。rstan 包扩展泛型函数 summary() 以支持对 stanfit 数据对象汇总,输出各个参数分链条和合并链条的后验分布结果。

34.1.3 操作数据

合并四条马氏链的结果

eight_schools_sim <- extract(eight_schools_fit, permuted = TRUE)

返回的结果是一个列表

str(eight_schools_sim)
#> List of 5
#>  $ mu   : num [1:2000(1d)] -0.349 13.214 4.653 8.459 4.314 ...
#>   ..- attr(*, "dimnames")=List of 1
#>   .. ..$ iterations: NULL
#>  $ tau  : num [1:2000(1d)] 5.171 0.281 3.416 4.26 6.84 ...
#>   ..- attr(*, "dimnames")=List of 1
#>   .. ..$ iterations: NULL
#>  $ eta  : num [1:2000, 1:8] 0.8443 0.1824 -0.5421 0.3176 0.0592 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ iterations: NULL
#>   .. ..$           : NULL
#>  $ theta: num [1:2000, 1:8] 4.02 13.27 2.8 9.81 4.72 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ iterations: NULL
#>   .. ..$           : NULL
#>  $ lp__ : num [1:2000(1d)] -37.8 -42.5 -41.8 -36.5 -36 ...
#>   ..- attr(*, "dimnames")=List of 1
#>   .. ..$ iterations: NULL
class(eight_schools_sim)
#> [1] "list"

计算参数 \(\eta,\theta\) 的均值

apply(eight_schools_sim$eta, 2, mean)
#> [1]  0.37737234 -0.05115180 -0.22331030 -0.04388245 -0.35811384 -0.22323565
#> [7]  0.30601563  0.01891633
apply(eight_schools_sim$theta, 2, mean)
#> [1] 11.436791  7.793837  6.010571  7.703675  5.161396  6.237395 10.528707
#> [8]  8.185503

计算参数 \(\eta,\theta\) 的分位点

t(apply(eight_schools_sim$eta, 2, quantile, probs = c(2.5, 25, 50, 75, 97.5) / 100))
#>       
#>             2.5%        25%         50%       75%    97.5%
#>   [1,] -1.507198 -0.1968247  0.37869013 0.9789101 2.174623
#>   [2,] -1.726116 -0.5851447 -0.04740783 0.4768830 1.660240
#>   [3,] -1.994936 -0.8202685 -0.23339301 0.3494418 1.662723
#>   [4,] -1.745175 -0.6545073 -0.05512642 0.5521324 1.741359
#>   [5,] -2.004906 -0.9575384 -0.35962354 0.2032693 1.407583
#>   [6,] -1.922606 -0.8169963 -0.25524392 0.3566269 1.574648
#>   [7,] -1.368122 -0.2396888  0.31536228 0.8581667 1.970735
#>   [8,] -1.736231 -0.5953672 -0.03530469 0.6294982 1.886066
t(apply(eight_schools_sim$theta, 2, quantile, probs = c(2.5, 25, 50, 75, 97.5) / 100))
#>       
#>              2.5%      25%       50%       75%    97.5%
#>   [1,]  -2.033648 6.100229 10.271452 15.343849 31.06742
#>   [2,]  -4.273520 4.050181  7.722681 11.445376 20.34221
#>   [3,] -10.641025 1.990698  6.517992 10.713881 19.70925
#>   [4,]  -6.152792 3.824841  7.651704 11.638808 21.40971
#>   [5,]  -9.289988 1.294042  5.751342  9.403381 16.21893
#>   [6,]  -8.003669 2.607037  6.582900 10.452106 18.90089
#>   [7,]  -1.262546 6.306058  9.995780 14.187053 25.04851
#>   [8,]  -7.978283 3.895587  8.129171 12.493753 24.21741

计算参数 \(\mu,\tau\) 的均值

lapply(eight_schools_sim["mu"], mean)
#> $mu
#> [1] 8.180393
lapply(eight_schools_sim["tau"], mean)
#> $tau
#> [1] 6.647675
lapply(eight_schools_sim["lp__"], mean)
#> $lp__
#> [1] -39.38927

计算参数 \(\mu,\tau\) 的分位点

lapply(eight_schools_sim["mu"], quantile, probs = c(2.5, 25, 50, 75, 97.5) / 100)
#> $mu
#>      2.5%       25%       50%       75%     97.5% 
#> -1.218135  4.824075  7.972065 11.343782 19.089152
lapply(eight_schools_sim["tau"], quantile, probs = c(2.5, 25, 50, 75, 97.5) / 100)
#> $tau
#>       2.5%        25%        50%        75%      97.5% 
#>  0.2686822  2.4555350  5.3460596  9.1916895 20.7314381
lapply(eight_schools_sim["lp__"], quantile, probs = c(2.5, 25, 50, 75, 97.5) / 100)
#> $lp__
#>      2.5%       25%       50%       75%     97.5% 
#> -44.77153 -41.00192 -39.23531 -37.59843 -34.80907

34.1.4 采样诊断

获取马尔科夫链迭代点列数据

eight_schools_sim <- extract(eight_schools_fit, permuted = FALSE)

eight_schools_sim 是一个三维数组,1000(次迭代)* 2 (条链)* 19(个参数)。如果 permuted = TRUE 则会合并四条马氏链的迭代结果,变成一个列表。

# 数据类型
class(eight_schools_sim)
#> [1] "array"
# 1000(次迭代)* 2 (条链)* 19(个参数)
str(eight_schools_sim)
#>  num [1:1000, 1:2, 1:19] 14.07 13.02 9.03 8.03 6.85 ...
#>  - attr(*, "dimnames")=List of 3
#>   ..$ iterations: NULL
#>   ..$ chains    : chr [1:2] "chain:1" "chain:2"
#>   ..$ parameters: chr [1:19] "mu" "tau" "eta[1]" "eta[2]" ...

提取参数 \(\mu\) 的四条迭代点列

eight_schools_mu_sim <- eight_schools_sim[, , "mu"]
matplot(eight_schools_mu_sim,
        xlab = "Iteration", ylab = expression(mu),
        type = "l", lty = "solid", col = custom_colors
)
abline(h = apply(eight_schools_mu_sim, 2, mean), col = custom_colors)
legend("bottomleft",
       legend = paste0("chain:", 1:2), box.col = "white", inset = 0.01,
       lty = "solid", horiz = TRUE, col = custom_colors
)
图 34.1: 参数 \(\mu\) 的迭代轨迹

或者使用 rstan 提供的 traceplot 函数或者 stan_trace 函数,rstan 大量依赖 ggplot2 绘图,所以如果你熟悉 GGplot2 可以很方便地定制属于自己的风格,除了 rstan 提供的 rstan_ggtheme_optionsrstan_gg_options 两个函数外,还可以使用 ggplot2 自带的大量配置选项和主题,如 theme_minimal 主题,因为 stan_trace等作图函数返回的是一个 ggplot 对象。

stan_trace(eight_schools_fit, pars = "mu") +
  theme_minimal() +
  labs(x = "Iteration", y = expression(mu))
图 34.2: 马氏链的迭代序列

序列的自相关图,类似地,我们这里也使用 stan_ac 函数绘制自相关图

stan_ac(eight_schools_fit, pars = "mu", separate_chains = TRUE, color = "white") +
  theme_minimal()
图 34.3: 马氏链的自相关图

34.1.5 后验分布

可以用 stan_hist 函数绘制参数 \(\mu\) 的后验分布图,它没有 separate_chains 参数,所以不能分链条绘制

stan_hist(eight_schools_fit, pars = "mu", bins = 30) + theme_minimal()
图 34.4: 参数 \(\mu\) 的后验分布

参数 \(\mu\)\(\tau\) 的散点图

stan_scat(eight_schools_fit, pars = c("mu","tau")) + theme_minimal()
图 34.5: 参数 \(\mu\)\(\tau\) 的联合后验分布

参数 \(\mu\) 的后验概率密度分布图

stan_dens(eight_schools_fit, pars = "mu", separate_chains = TRUE) +
  theme_minimal() +
  labs(x = expression(mu), y = "Density")
图 34.6: 参数 \(\mu\) 的后验概率密度分布图

bayesplot 包的函数 mcmc_pairs() 以矩阵图展示多个参数的分布。

bayesplot::mcmc_pairs(eight_schools_fit, pars = c("mu", "tau", "lp__"))
图 34.7: 参数 \(\mu\)\(\tau\)\(\mathrm{lp\_\_}\) 的后验分布图

34.1.6 其它 R 包

接下来,分别用 nlmelme4 包拟合模型。

# 成绩
y <- c(28, 8, -3, 7, -1, 1, 18, 12)
# 标准差
sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)
# 学校编号
g <- 1:8

首先,调用 nlme 包的函数 lme() 拟合模型。

library(nlme)
fit_lme <- lme(y ~ 1, random = ~ 1 | g, weights = varFixed(~ sigma^2), method = "REML")
summary(fit_lme)
#> Linear mixed-effects model fit by REML
#>   Data: NULL 
#>        AIC      BIC    logLik
#>   60.21091 60.04864 -27.10546
#> 
#> Random effects:
#>  Formula: ~1 | g
#>         (Intercept) Residual
#> StdDev:    2.917988 0.780826
#> 
#> Variance function:
#>  Structure: fixed weights
#>  Formula: ~sigma^2 
#> Fixed effects:  y ~ 1 
#>                Value Std.Error DF  t-value p-value
#> (Intercept) 7.785729  3.368082  8 2.311621  0.0496
#> 
#> Standardized Within-Group Residuals:
#>         Min          Q1         Med          Q3         Max 
#> -1.06635035 -0.73588511 -0.02896764  0.50254917  1.62502386 
#> 
#> Number of Observations: 8
#> Number of Groups: 8

随机效应的标准差 2.917988 ,随机效应部分的估计

ranef(fit_lme)
#>   (Intercept)
#> 1  1.18135690
#> 2  0.02625714
#> 3 -0.55795544
#> 4 -0.08130333
#> 5 -1.29202241
#> 6 -0.70215328
#> 7  1.25167648
#> 8  0.17414393

类比 Stan 输出结果中的 \(\theta\) 向量,每个学校的成绩估计

7.785729 + 2.917988 * ranef(fit_lme) 
#>   (Intercept)
#> 1   11.232914
#> 2    7.862347
#> 3    6.157622
#> 4    7.548487
#> 5    4.015623
#> 6    5.736854
#> 7   11.438106
#> 8    8.293879

接着,采用 lme4 包拟合模型,发现 lme4 包获得与 nlme 包一样的结果。

control <- lme4::lmerControl(
  check.conv.singular = "ignore",
  check.nobs.vs.nRE = "ignore",
  check.nobs.vs.nlev = "ignore"
)
fit_lme4 <- lme4::lmer(y ~ 1 + (1 | g), weights = 1 / sigma^2, control = control, REML = TRUE)
summary(fit_lme4)
#> Linear mixed model fit by REML ['lmerMod']
#> Formula: y ~ 1 + (1 | g)
#> Weights: 1/sigma^2
#> Control: control
#> 
#> REML criterion at convergence: 54.2
#> 
#> Scaled residuals: 
#>      Min       1Q   Median       3Q      Max 
#> -1.06635 -0.73589 -0.02897  0.50255  1.62502 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev.
#>  g        (Intercept) 8.5145   2.9180  
#>  Residual             0.6097   0.7808  
#> Number of obs: 8, groups:  g, 8
#> 
#> Fixed effects:
#>             Estimate Std. Error t value
#> (Intercept)    7.786      3.368   2.312

最后,使用 blme 包 (Chung 等 2013) ,blme 包基于 lme4 包,截距项结果比较一致,随机效应(组间方差)和残差项(组内方差)完全不同。

### Example using a residual variance prior ###
# This is the "eight schools" data set; 
# the mode should be at the boundary of the space.

fit_blme <- blme::blmer(
  y ~ 1 + (1 | g),
  control = control, REML = TRUE,
  resid.prior = point, cov.prior = NULL,
  weights = 1 / sigma^2
)
summary(fit_blme)
#> Resid prior: point(value = 1)
#> Prior dev  : 0
#> 
#> Linear mixed model fit by REML ['blmerMod']
#> Formula: y ~ 1 + (1 | g)
#> Weights: 1/sigma^2
#> Control: control
#> 
#> REML criterion at convergence: 54.7
#> 
#> Scaled residuals: 
#>      Min       1Q   Median       3Q      Max 
#> -0.96507 -0.62280 -0.01545  0.43763  1.35429 
#> 
#> Random effects:
#>  Groups   Name        Variance  Std.Dev. 
#>  g        (Intercept) 8.583e-19 9.264e-10
#>  Residual             1.000e+00 1.000e+00
#> Number of obs: 8, groups:  g, 8
#> 
#> Fixed effects:
#>             Estimate Std. Error t value
#> (Intercept)    7.686      4.072   1.887

34.2 rats 数据

rats 数据最早来自 Gelfand 等 (1990) ,记录 30 只小鼠每隔一周的重量,一共进行了 5 周。第一次记录是小鼠第 8 天的时候,第二次测量记录是第 15 天的时候,一直持续到第 36 天。下面在 R 环境中准备数据。

# 总共 30 只老鼠
N <- 30
# 总共进行 5 周
T <- 5
# 小鼠重量
y <- structure(c(
  151, 145, 147, 155, 135, 159, 141, 159, 177, 134,
  160, 143, 154, 171, 163, 160, 142, 156, 157, 152, 154, 139, 146,
  157, 132, 160, 169, 157, 137, 153, 199, 199, 214, 200, 188, 210,
  189, 201, 236, 182, 208, 188, 200, 221, 216, 207, 187, 203, 212,
  203, 205, 190, 191, 211, 185, 207, 216, 205, 180, 200, 246, 249,
  263, 237, 230, 252, 231, 248, 285, 220, 261, 220, 244, 270, 242,
  248, 234, 243, 259, 246, 253, 225, 229, 250, 237, 257, 261, 248,
  219, 244, 283, 293, 312, 272, 280, 298, 275, 297, 350, 260, 313,
  273, 289, 326, 281, 288, 280, 283, 307, 286, 298, 267, 272, 285,
  286, 303, 295, 289, 258, 286, 320, 354, 328, 297, 323, 331, 305,
  338, 376, 296, 352, 314, 325, 358, 312, 324, 316, 317, 336, 321,
  334, 302, 302, 323, 331, 345, 333, 316, 291, 324
), .Dim = c(30, 5))
# 第几天
x <- c(8.0, 15.0, 22.0, 29.0, 36.0)
xbar <- 22.0

重复测量的小鼠重量数据 rats 如下 表格 34.1 所示。

表格 34.1: 小鼠重量数据(部分)
第 8 天 第 15 天 第 22 天 第 29 天 第 36 天
1 151 199 246 283 320
2 145 199 249 293 354
3 147 214 263 312 328
4 155 200 237 272 297
5 135 188 230 280 323
6 159 210 252 298 331

小鼠重量数据的分布情况见下 图 34.8 ,由图可以假定 30 只小鼠的重量服从正态分布。

图 34.8: 30 只小鼠 5 次测量的数据

图 34.9 可见, 30 只小鼠的重量增长趋势呈现一种线性趋势。

图 34.9: 30 只小鼠 5 次测量的数据

34.2.1 rstan

初始化模型参数,设置采样算法的参数。

# 迭代链
chains <- 4
# 迭代次数
iter <- 1000
# 初始值
init <- rep(list(list(
  alpha = rep(250, 30), beta = rep(6, 30),
  alpha_c = 150, beta_c = 10,
  tausq_c = 1, tausq_alpha = 1,
  tausq_beta = 1
)), chains)

接下来,基于重复测量数据,建立线性生长曲线模型:

\[ \begin{aligned} \alpha_c &\sim \mathcal{N}(0,100) \quad \beta_c \sim \mathcal{N}(0,100) \\ \tau^2_{\alpha} &\sim \mathrm{inv\_gamma}(0.001, 0.001) \\ \tau^2_{\beta} &\sim \mathrm{inv\_gamma}(0.001, 0.001) \\ \tau^2_c &\sim \mathrm{inv\_gamma}(0.001, 0.001) \\ \alpha_n &\sim \mathcal{N}(\alpha_c, \tau_{\alpha}) \quad \beta_n \sim \mathcal{N}(\beta_c, \tau_{\beta}) \\ y_{nt} &\sim \mathcal{N}(\alpha_n + \beta_n * (x_t - \bar{x}), \tau_c) \\ & n = 1,2,\ldots,N \quad t = 1,2,\ldots,T \end{aligned} \]

其中, \(\alpha_c,\beta_c,\tau_c,\tau_{\alpha},\tau_{\beta}\) 为无信息先验,\(\bar{x} = 22\) 表示第 22 天,\(N = 30\)\(T = 5\) 分别表示实验中的小鼠数量和测量次数,下面采用 Stan 编码、编译、采样和拟合模型。

rats_fit <- stan(
  model_name = "rats",
  model_code = "
  data {
    int<lower=0> N;
    int<lower=0> T;
    vector[T] x;
    matrix[N,T] y;
    real xbar;
  }
  parameters {
    vector[N] alpha;
    vector[N] beta;

    real alpha_c;
    real beta_c;          // beta.c in original bugs model

    real<lower=0> tausq_c;
    real<lower=0> tausq_alpha;
    real<lower=0> tausq_beta;
  }
  transformed parameters {
    real<lower=0> tau_c;       // sigma in original bugs model
    real<lower=0> tau_alpha;
    real<lower=0> tau_beta;

    tau_c = sqrt(tausq_c);
    tau_alpha = sqrt(tausq_alpha);
    tau_beta = sqrt(tausq_beta);
  }
  model {
    alpha_c ~ normal(0, 100);
    beta_c ~ normal(0, 100);
    tausq_c ~ inv_gamma(0.001, 0.001);
    tausq_alpha ~ inv_gamma(0.001, 0.001);
    tausq_beta ~ inv_gamma(0.001, 0.001);
    alpha ~ normal(alpha_c, tau_alpha); // vectorized
    beta ~ normal(beta_c, tau_beta);  // vectorized
    for (n in 1:N)
      for (t in 1:T)
        y[n,t] ~ normal(alpha[n] + beta[n] * (x[t] - xbar), tau_c);
  }
  generated quantities {
    real alpha0;
    alpha0 = alpha_c - xbar * beta_c;
  }
  ",
  data = list(N = N, T = T, y = y, x = x, xbar = xbar),
  chains = chains, init = init, iter = iter,   
  verbose = FALSE, refresh = 0, seed = 20190425
)

模型输出结果如下:

print(rats_fit, pars = c("alpha", "beta"), include = FALSE, digits = 1)
#> Inference for Stan model: rats.
#> 4 chains, each with iter=1000; warmup=500; thin=1; 
#> post-warmup draws per chain=500, total post-warmup draws=2000.
#> 
#>               mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
#> alpha_c      242.5     0.1  2.6  237.3  240.8  242.5  244.2  247.6  2200    1
#> beta_c         6.2     0.0  0.1    6.0    6.1    6.2    6.3    6.4  2134    1
#> tausq_c       37.3     0.2  5.4   28.3   33.4   36.8   40.5   49.0  1090    1
#> tausq_alpha  218.5     1.5 62.1  127.3  174.5  207.2  252.1  369.2  1787    1
#> tausq_beta     0.3     0.0  0.1    0.1    0.2    0.3    0.3    0.5  1340    1
#> tau_c          6.1     0.0  0.4    5.3    5.8    6.1    6.4    7.0  1096    1
#> tau_alpha     14.6     0.0  2.0   11.3   13.2   14.4   15.9   19.2  1871    1
#> tau_beta       0.5     0.0  0.1    0.4    0.5    0.5    0.6    0.7  1304    1
#> alpha0       106.4     0.1  3.5   99.4  104.0  106.4  108.7  113.4  2300    1
#> lp__        -438.6     0.3  6.6 -452.7 -442.9 -438.1 -434.1 -426.3   703    1
#> 
#> Samples were drawn using NUTS(diag_e) at Mon Oct 16 10:51:58 2023.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split chains (at 
#> convergence, Rhat=1).

alpha_c 表示小鼠 5 次测量的平均重量,beta_c 表示小鼠体重的增长率,\(\alpha_i,\beta_i\) 分别表示第 \(i\) 只小鼠在第 22 天(第 3 次测量或 \(x_t = \bar{x}\) )的重量和增长率(每日增加的重量)。

对于分量众多的参数向量,比较适合用岭线图展示后验分布,下面调用 bayesplot 包绘制参数向量 \(\boldsymbol{\alpha},\boldsymbol{\beta}\) 的后验分布。

# plot(rats_fit, pars = "alpha", show_density = TRUE, ci_level = 0.8, outer_level = 0.95)
bayesplot::mcmc_areas_ridges(rats_fit, pars = paste0("alpha", "[", 1:30, "]")) +
  scale_y_discrete(labels = scales::parse_format()) 
图 34.10: 参数 \(\boldsymbol{\alpha}\) 的后验分布

参数向量 \(\boldsymbol{\alpha}\) 的后验估计可以看作 \(x_t = \bar{x}\) 时小鼠的重量,上图即为各个小鼠重量的后验分布。

# plot(rats_fit, pars = "beta", ci_level = 0.8, outer_level = 0.95)
bayesplot::mcmc_areas_ridges(rats_fit, pars = paste0("beta", "[", 1:30, "]")) +
  scale_y_discrete(labels = scales::parse_format()) 
图 34.11: 参数 \(\boldsymbol{\beta}\) 的后验分布

参数向量 \(\boldsymbol{\beta}\) 的后验估计可以看作是小鼠的重量的增长率,上图即为各个小鼠重量的增长率的后验分布。

34.2.2 lavaan

lavaan 包 (Rosseel 2012) 的函数 growth() 可以直接拟合曲线生长模型

library(lavaan)
# 设置矩阵 y 的列名
colnames(y) <- c("t1","t2","t3","t4","t5")
rats_growt_model <- ' i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4 + 1*t5
                      s =~ 0*t1 + 1*t2 + 2*t3 + 3*t4 + 4*t5 '

其中,i 表示截距, s 表示斜率。下面拟合模型

rats_growth_fit <- growth(rats_growt_model, data = y)

模型输出

summary(rats_growth_fit)
#> lavaan 0.6.16 ended normally after 173 iterations
#> 
#>   Estimator                                         ML
#>   Optimization method                           NLMINB
#>   Number of model parameters                        10
#> 
#>   Number of observations                            30
#> 
#> Model Test User Model:
#>                                                       
#>   Test statistic                                68.986
#>   Degrees of freedom                                10
#>   P-value (Chi-square)                           0.000
#> 
#> Parameter Estimates:
#> 
#>   Standard errors                             Standard
#>   Information                                 Expected
#>   Information saturated (h1) model          Structured
#> 
#> Latent Variables:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>   i =~                                                
#>     t1                1.000                           
#>     t2                1.000                           
#>     t3                1.000                           
#>     t4                1.000                           
#>     t5                1.000                           
#>   s =~                                                
#>     t1                0.000                           
#>     t2                1.000                           
#>     t3                2.000                           
#>     t4                3.000                           
#>     t5                4.000                           
#> 
#> Covariances:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>   i ~~                                                
#>     s                10.625    9.304    1.142    0.253
#> 
#> Intercepts:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>    .t1                0.000                           
#>    .t2                0.000                           
#>    .t3                0.000                           
#>    .t4                0.000                           
#>    .t5                0.000                           
#>     i               155.759    2.065   75.437    0.000
#>     s                44.525    0.808   55.080    0.000
#> 
#> Variances:
#>                    Estimate  Std.Err  z-value  P(>|z|)
#>    .t1               32.762   12.914    2.537    0.011
#>    .t2               13.005    5.219    2.492    0.013
#>    .t3                5.181    3.498    1.481    0.139
#>    .t4               17.105    8.426    2.030    0.042
#>    .t5              157.548   45.272    3.480    0.001
#>     i               112.957   33.179    3.404    0.001
#>     s                15.909    5.124    3.105    0.002

34.2.3 nlme

nlme 包适合长格式的数据,因此,先将小鼠数据整理成长格式。

weight <- c(
  151, 145, 147, 155, 135, 159, 141, 159, 177, 134,
  160, 143, 154, 171, 163, 160, 142, 156, 157, 152, 154, 139, 146,
  157, 132, 160, 169, 157, 137, 153, 199, 199, 214, 200, 188, 210,
  189, 201, 236, 182, 208, 188, 200, 221, 216, 207, 187, 203, 212,
  203, 205, 190, 191, 211, 185, 207, 216, 205, 180, 200, 246, 249,
  263, 237, 230, 252, 231, 248, 285, 220, 261, 220, 244, 270, 242,
  248, 234, 243, 259, 246, 253, 225, 229, 250, 237, 257, 261, 248,
  219, 244, 283, 293, 312, 272, 280, 298, 275, 297, 350, 260, 313,
  273, 289, 326, 281, 288, 280, 283, 307, 286, 298, 267, 272, 285,
  286, 303, 295, 289, 258, 286, 320, 354, 328, 297, 323, 331, 305,
  338, 376, 296, 352, 314, 325, 358, 312, 324, 316, 317, 336, 321,
  334, 302, 302, 323, 331, 345, 333, 316, 291, 324
)
rats <- rep(1:30, times = 5)
days <- rep(c(8, 15, 22, 29, 36), each = 30)
rats_data <- data.frame(weight = weight, rats = rats, days = days)

将 30 只小鼠的重量变化及回归曲线画出来,发现各只小鼠的回归线的斜率几乎一样,截距略有不同。不同小鼠的出生重量是不同,前面 Stan 采用变截距变斜率的混合效应模型拟合数据。

ggplot(data = rats_data, aes(x = days, y = weight)) +
  geom_point() +
  geom_smooth(formula = "y ~ x", method = "lm", se = FALSE) +
  theme_bw() +
  facet_wrap(facets = ~rats, labeller = "label_both", ncol = 6) +
  labs(x = "第几天", y = "重量")
图 34.12: 小鼠重量变化曲线

小鼠的重量随时间增长,不同小鼠的情况又会有所不同。可用随机效应模型表示生长曲线模型,下面加载 nlme 包调用函数 lme() 拟合该模型。

library(nlme)
rats_lme <- lme(data = rats_data, fixed = weight ~ days, random = ~ days | rats)
summary(rats_lme)
#> Linear mixed-effects model fit by REML
#>   Data: rats_data 
#>        AIC      BIC    logLik
#>   1107.373 1125.357 -547.6867
#> 
#> Random effects:
#>  Formula: ~days | rats
#>  Structure: General positive-definite, Log-Cholesky parametrization
#>             StdDev     Corr  
#> (Intercept) 10.7429538 (Intr)
#> days         0.5105284 -0.159
#> Residual     6.0146581       
#> 
#> Fixed effects:  weight ~ days 
#>                 Value Std.Error  DF  t-value p-value
#> (Intercept) 106.56762 2.2976758 119 46.38062       0
#> days          6.18571 0.1055885 119 58.58321       0
#>  Correlation: 
#>      (Intr)
#> days -0.343
#> 
#> Standardized Within-Group Residuals:
#>        Min         Q1        Med         Q3        Max 
#> -2.6370711 -0.5394662  0.1187649  0.4927166  2.6090856 
#> 
#> Number of Observations: 150
#> Number of Groups: 30

模型输出结果中,固定效应中的截距项 (Intercept) 对应 106.56762,斜率 days 对应 6.18571。Stan 模型中截距参数 alpha0 的后验估计是 106.332,斜率参数 beta_c 的后验估计是 6.188。对比 Stan 和 nlme 包的拟合结果,可以发现贝叶斯和频率方法的结果是非常接近的。截距参数 alpha0 可以看作小鼠的初始(出生)重量,斜率参数 beta_c 可以看作小鼠的生长率 growth rate。

函数 lme() 的输出结果中,随机效应的随机截距标准差 10.7425835,对应 tau_alpha,表示每个小鼠的截距偏移量的波动。而随机斜率的标准差为 0.5105447,对应 tau_beta,相对随机截距标准差来说很小。残差标准差为 6.0146608,对应 tau_c,表示与小鼠无关的剩余量的波动,比如测量误差。总之,和 Stan 的结果有所不同,但相去不远。主要是前面的 Stan 模型没有考虑随机截距和随机斜率之间的相关性,这可以进一步调整 (Sorensen, Hohenstein, 和 Vasishth 2016)

Stan 输出中,截距项 alpha、斜率项 beta 参数的标准差分别是 tau_alphatau_beta ,残差标准差参数 tau_c 的估计为 6.1。简单起见,没有考虑截距项和斜率项的相关性,即不考虑小鼠出生时的重量和生长率的相关性,一般来说,应该是有关系的。函数 lme() 的输出结果中给出了截距项和斜率项的相关性为 -0.343,随机截距和随机斜率的相关性为 -0.159。

计算与 Stan 输出中的截距项 alpha_c 对应的量,结合函数 lme() 的输出,截距、斜率加和之后,如下

106.56762 + 6.18571 * 22
#> [1] 242.6532

值得注意,Stan 代码中对时间 days 做了中心化处理,即 \(x_t - \bar{x}\),目的是降低采样时参数 \(\alpha_i\)\(\beta_i\) 之间的相关性,而在拟合函数 lme() 中没有做处理,因此,结果无需转化,而且更容易解释。

fit_lm <- lm(weight ~ days, data = rats_data)
summary(fit_lm)
#> 
#> Call:
#> lm(formula = weight ~ days, data = rats_data)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -38.253 -11.278   0.197   7.647  64.047 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 106.5676     3.2099   33.20   <2e-16 ***
#> days          6.1857     0.1331   46.49   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 16.13 on 148 degrees of freedom
#> Multiple R-squared:  0.9359, Adjusted R-squared:  0.9355 
#> F-statistic:  2161 on 1 and 148 DF,  p-value: < 2.2e-16

34.2.4 lme4

当采用 lme4 包拟合数据的时候,发现输出结果与 nlme 包几乎相同。

rats_lme4 <- lme4::lmer(weight ~ days + (days | rats), data = rats_data)
summary(rats_lme4)
#> Linear mixed model fit by REML ['lmerMod']
#> Formula: weight ~ days + (days | rats)
#>    Data: rats_data
#> 
#> REML criterion at convergence: 1095.4
#> 
#> Scaled residuals: 
#>     Min      1Q  Median      3Q     Max 
#> -2.6371 -0.5395  0.1188  0.4927  2.6091 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev. Corr 
#>  rats     (Intercept) 115.4239 10.7435       
#>           days          0.2607  0.5106  -0.16
#>  Residual              36.1753  6.0146       
#> Number of obs: 150, groups:  rats, 30
#> 
#> Fixed effects:
#>             Estimate Std. Error t value
#> (Intercept) 106.5676     2.2978   46.38
#> days          6.1857     0.1056   58.58
#> 
#> Correlation of Fixed Effects:
#>      (Intr)
#> days -0.343

34.2.5 blme

blme 包基于 lme4 包拟合贝叶斯线性混合效应模型。

rats_blme <- blme::blmer(
  weight ~ days + (days | rats), data = rats_data,
  resid.prior = point, cov.prior = NULL
)
summary(rats_blme)
#> Resid prior: point(value = 1)
#> Prior dev  : 0
#> 
#> Linear mixed model fit by REML ['blmerMod']
#> Formula: weight ~ days + (days | rats)
#>    Data: rats_data
#> 
#> REML criterion at convergence: 3938.2
#> 
#> Scaled residuals: 
#>      Min       1Q   Median       3Q      Max 
#> -16.7740  -3.3490   0.9875   2.7505  14.0468 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev. Corr 
#>  rats     (Intercept) 157.2001 12.5379       
#>           days          0.3325  0.5766  -0.34
#>  Residual               1.0000  1.0000       
#> Number of obs: 150, groups:  rats, 30
#> 
#> Fixed effects:
#>             Estimate Std. Error t value
#> (Intercept) 106.5676     2.2977   46.38
#> days          6.1857     0.1056   58.58
#> 
#> Correlation of Fixed Effects:
#>      (Intr)
#> days -0.343

34.2.6 INLA

library(INLA)
rats_inla <- inla(weight ~ 1 + days + f(rats, days, model = "iid"),
  data = rats_data, control.predictor = list(compute = TRUE))
summary(rats_inla)
#> 
#> Call:
#>    c("inla.core(formula = formula, family = family, contrasts = contrasts, 
#>    ", " data = data, quantiles = quantiles, E = E, offset = offset, ", " 
#>    scale = scale, weights = weights, Ntrials = Ntrials, strata = strata, 
#>    ", " lp.scale = lp.scale, link.covariates = link.covariates, verbose = 
#>    verbose, ", " lincomb = lincomb, selection = selection, control.compute 
#>    = control.compute, ", " control.predictor = control.predictor, 
#>    control.family = control.family, ", " control.inla = control.inla, 
#>    control.fixed = control.fixed, ", " control.mode = control.mode, 
#>    control.expert = control.expert, ", " control.hazard = control.hazard, 
#>    control.lincomb = control.lincomb, ", " control.update = 
#>    control.update, control.lp.scale = control.lp.scale, ", " 
#>    control.pardiso = control.pardiso, only.hyperparam = only.hyperparam, 
#>    ", " inla.call = inla.call, inla.arg = inla.arg, num.threads = 
#>    num.threads, ", " keep = keep, working.directory = working.directory, 
#>    silent = silent, ", " inla.mode = inla.mode, safe = FALSE, debug = 
#>    debug, .parent.frame = .parent.frame)" ) 
#> Time used:
#>     Pre = 0.581, Running = 0.308, Post = 0.026, Total = 0.915 
#> Fixed effects:
#>                mean    sd 0.025quant 0.5quant 0.975quant    mode kld
#> (Intercept) 106.568 1.546    103.532  106.568    109.604 106.568   0
#> days          6.186 0.125      5.940    6.186      6.431   6.186   0
#> 
#> Random effects:
#>   Name     Model
#>     rats IID model
#> 
#> Model hyperparameters:
#>                                          mean    sd 0.025quant 0.5quant
#> Precision for the Gaussian observations 0.017 0.002      0.013    0.017
#> Precision for rats                      3.274 1.551      1.210    2.962
#>                                         0.975quant  mode
#> Precision for the Gaussian observations      0.023 0.017
#> Precision for rats                           7.167 2.420
#> 
#> Marginal log-Likelihood:  -590.38 
#>  is computed 
#> Posterior summaries for the linear predictor and the fitted values are computed
#> (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

34.3 习题

  1. 四个组的重复测量数据,如下表所示,建立贝叶斯线性混合效应模型/分层正态模型分析数据,与 nlme 包拟合的结果对比。

    表格 34.2: 实验数据
    编号 第1组 第2组 第3组 第4组
    1 62 63 68 56
    2 60 67 66 62
    3 63 71 71 60
    4 59 64 67 61
    5 65 68 63
    6 66 68 64
    7 63
    8 59

    \[ \begin{aligned} y_{ij} \sim \mathcal{N}(\theta_i, \sigma^2) &\quad \theta_i \sim \mathcal{N}(\mu, \tau^2) \\ (\mu,\log \sigma, \tau) &\sim \mathrm{uniform\ prior} \\ i = 1,2,3,4 &\quad j = 1,2, \ldots, n_i \end{aligned} \]

    \(y_{ij}\) 表示第 \(i\) 组的第 \(j\) 个测量值,\(\theta_i\) 表示第 \(i\) 组的均值,\(\mu\) 表示整体的均值,\(\sigma^2\) 表示组内的方差,\(\tau^2\) 表示组内的方差。

    library(nlme)
    fit_lme <- lme(data = dat, fixed = y ~ 1, random = ~ 1 | group)
    summary(fit_lme)
    #> Linear mixed-effects model fit by REML
    #>   Data: dat 
    #>        AIC      BIC    logLik
    #>   121.7804 125.1869 -57.89019
    #> 
    #> Random effects:
    #>  Formula: ~1 | group
    #>         (Intercept) Residual
    #> StdDev:    3.419288 2.366309
    #> 
    #> Fixed effects:  y ~ 1 
    #>                Value Std.Error DF  t-value p-value
    #> (Intercept) 64.01266  1.780313 20 35.95584       0
    #> 
    #> Standardized Within-Group Residuals:
    #>         Min          Q1         Med          Q3         Max 
    #> -2.18490896 -0.59921167  0.09332131  0.54077636  2.17507789 
    #> 
    #> Number of Observations: 24
    #> Number of Groups: 4

    随机效应(组间标准差)\(\tau^2\) 3.419288 、残差效应(组内标准差)\(\sigma^2\) 2.366309。截距 \(\mu\) 64.01266 代表整体的均值。各组的均值如下:

    64.01266 + ranef(fit_lme)
    #>   (Intercept)
    #> 1    61.32214
    #> 2    65.85309
    #> 3    67.70525
    #> 4    61.17016
  2. 基于 lme4 包中学生对老师的评价数据 InstEval 建立(广义)线性混合效应模型分析数据。将响应变量(学生评价)视为有序的离散型变量,比较观察两个模型拟合效果(lme4、GLMMadaptive、spaMM 都不支持有序的响应变量,brms 则支持各类有序回归,使用语法与 lme4 完全一样。但是,由于数据规模比较大,计算时间数以天计,可考虑用 Stan 直接编码)。再者,从 Stan 实现的贝叶斯模型来看,感受 Stan 建模的灵活性和扩展性。(nlme 包不支持此等交叉随机效应的表达。)

    data(InstEval, package = "lme4")
    str(InstEval)
    #> 'data.frame':    73421 obs. of  7 variables:
    #>  $ s      : Factor w/ 2972 levels "1","2","3","4",..: 1 1 1 1 2 2 3 3 3 3 ...
    #>  $ d      : Factor w/ 1128 levels "1","6","7","8",..: 525 560 832 1068 62 406 3 6 19 75 ...
    #>  $ studage: Ord.factor w/ 4 levels "2"<"4"<"6"<"8": 1 1 1 1 1 1 1 1 1 1 ...
    #>  $ lectage: Ord.factor w/ 6 levels "1"<"2"<"3"<"4"<..: 2 1 2 2 1 1 1 1 1 1 ...
    #>  $ service: Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 1 1 1 ...
    #>  $ dept   : Factor w/ 14 levels "15","5","10",..: 14 5 14 12 2 2 13 3 3 3 ...
    #>  $ y      : int  5 2 5 3 2 4 4 5 5 4 ...
    • 因子型变量 s 表示 1-2972 位参与评分的学生。
    • 因子型变量 d 表示 1-2160 位上课的讲师。
    • 因子型变量 dept 表示课程相关的 1-15 院系。
    • 因子型变量 service 表示讲师除了授课外,是否承担其它服务。
    • 数值型变量 y 表示学生给课程的评分,1-5 分对应从坏到很好。
    # 数值型的响应变量
    fit_lme4 <- lme4::lmer(y ~ 1 + service + (1 | s) + (1 | d) + (1 | dept), data = InstEval)
    summary(fit_lme4)
    #> Linear mixed model fit by REML ['lmerMod']
    #> Formula: y ~ 1 + service + (1 | s) + (1 | d) + (1 | dept)
    #>    Data: InstEval
    #> 
    #> REML criterion at convergence: 237733.8
    #> 
    #> Scaled residuals: 
    #>     Min      1Q  Median      3Q     Max 
    #> -3.0597 -0.7478  0.0404  0.7723  3.1988 
    #> 
    #> Random effects:
    #>  Groups   Name        Variance Std.Dev.
    #>  s        (Intercept) 0.105998 0.32557 
    #>  d        (Intercept) 0.265221 0.51500 
    #>  dept     (Intercept) 0.006912 0.08314 
    #>  Residual             1.386500 1.17750 
    #> Number of obs: 73421, groups:  s, 2972; d, 1128; dept, 14
    #> 
    #> Fixed effects:
    #>             Estimate Std. Error t value
    #> (Intercept)  3.28259    0.02935 111.858
    #> service1    -0.09264    0.01339  -6.919
    #> 
    #> Correlation of Fixed Effects:
    #>          (Intr)
    #> service1 -0.152
    # 需要 5-10 分钟时间
    # 有序因子型的响应变量
    InstEval$y <- factor(InstEval$y, ordered = TRUE)
    library(ordinal)
    fit_ordinal <- clmm(
      y ~ 1 + service + (1 | s) + (1 | d) + (1 | dept),
      data = InstEval, link = "probit", threshold = "equidistant"
    )
    summary(fit_ordinal)