Welcome to R Square

用 R 进行多元回归估计

楚新元 / 2021-08-23


  案例来源于古扎拉蒂《经济计量学精要》(第四版)第二章例 2-5 古董钟与拍卖价格。之所以选择此案例是因为此案例只有 3 个变量,32 个样本,便于手工验证。

读取数据

library(readxl)
library(dplyr)
library(kableExtra)
data = read_excel("data/Table2_14.xls", skip = 5)
data %>% 
  kable(
    caption = "古董钟与拍卖价格",
    format = "html"
  ) %>% 
   kable_styling(
     bootstrap_options = "striped",
     fixed_thead = TRUE,
     font_size = 14,
  ) %>% 
  column_spec(1, width = "80px") %>%
  column_spec(2:4, width = "120px")
古董钟与拍卖价格
ObservationPriceAgeNumber of Bidders
1123512713
2108011512
38451277
415521509
510471566
6197918211
7182215612
8125313210
912971379
109461139
11171313715
12102411711
13213117014
1415501828
15188416211
16204118410
178541436
1814831599
19105510814
2015451758
217291086
2217921799
23117511115
2415931878
2511471378
2610921536
27115211713
28133612610
297851117
307441157
3113561945
3212621687

变量含义

利用 R 自带函数进行参数估计

fit = lm(Price ~ Age + `Number of Bidders`, data = data)
summary(fit)
#> 
#> Call:
#> lm(formula = Price ~ Age + `Number of Bidders`, data = data)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -208.60 -119.04   15.73  101.84  212.54 
#> 
#> Coefficients:
#>                       Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)         -1336.0493   175.2725  -7.623 2.10e-08 ***
#> Age                    12.7414     0.9124  13.965 2.09e-14 ***
#> `Number of Bidders`    85.7641     8.8020   9.744 1.19e-10 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 134.6 on 29 degrees of freedom
#> Multiple R-squared:  0.8906,	Adjusted R-squared:  0.8831 
#> F-statistic: 118.1 on 2 and 29 DF,  p-value: 1.161e-14

手动计算参数值和各个统计量(含截距项)

生成矩阵 \(\mathbf{X}\)

data %>%
  mutate(X1 = 1) %>%
  select(X1, Age, `Number of Bidders`) %>%
  as.matrix() -> X
print(X)
#>       X1 Age Number of Bidders
#>  [1,]  1 127                13
#>  [2,]  1 115                12
#>  [3,]  1 127                 7
#>  [4,]  1 150                 9
#>  [5,]  1 156                 6
#>  [6,]  1 182                11
#>  [7,]  1 156                12
#>  [8,]  1 132                10
#>  [9,]  1 137                 9
#> [10,]  1 113                 9
#> [11,]  1 137                15
#> [12,]  1 117                11
#> [13,]  1 170                14
#> [14,]  1 182                 8
#> [15,]  1 162                11
#> [16,]  1 184                10
#> [17,]  1 143                 6
#> [18,]  1 159                 9
#> [19,]  1 108                14
#> [20,]  1 175                 8
#> [21,]  1 108                 6
#> [22,]  1 179                 9
#> [23,]  1 111                15
#> [24,]  1 187                 8
#> [25,]  1 137                 8
#> [26,]  1 153                 6
#> [27,]  1 117                13
#> [28,]  1 126                10
#> [29,]  1 111                 7
#> [30,]  1 115                 7
#> [31,]  1 194                 5
#> [32,]  1 168                 7

生成矩阵 \(\mathbf{Y}\)

Y = as.matrix(data$Price)
print(Y)
#>       [,1]
#>  [1,] 1235
#>  [2,] 1080
#>  [3,]  845
#>  [4,] 1552
#>  [5,] 1047
#>  [6,] 1979
#>  [7,] 1822
#>  [8,] 1253
#>  [9,] 1297
#> [10,]  946
#> [11,] 1713
#> [12,] 1024
#> [13,] 2131
#> [14,] 1550
#> [15,] 1884
#> [16,] 2041
#> [17,]  854
#> [18,] 1483
#> [19,] 1055
#> [20,] 1545
#> [21,]  729
#> [22,] 1792
#> [23,] 1175
#> [24,] 1593
#> [25,] 1147
#> [26,] 1092
#> [27,] 1152
#> [28,] 1336
#> [29,]  785
#> [30,]  744
#> [31,] 1356
#> [32,] 1262

计算样本容量 \(n\) 和自由度 \(df\)

n = nrow(data)
k = ncol(X)
df = n - k
print(c(n = n, k = k, df = df))
#>  n  k df 
#> 32  3 29

估计参数 \(\beta\)

Beta_hat = solve(t(X) %*% X) %*% t(X) %*% Y
colnames(Beta_hat) = "Beta_hat"
print(Beta_hat)
#>                      Beta_hat
#> X1                -1336.04929
#> Age                  12.74138
#> Number of Bidders    85.76407

计算回归残差 \(e\)

residuals = Y - (X %*% Beta_hat)
colnames(residuals) = "residuals"
print(residuals)
#>        residuals
#>  [1,] -162.03929
#>  [2,]  -78.37862
#>  [3,]  -37.45489
#>  [4,]  204.96516
#>  [5,] -119.19094
#>  [6,]   52.71275
#>  [7,]  141.22465
#>  [8,]   49.54599
#>  [9,]  115.60314
#> [10,]   70.39635
#> [11,]   17.01874
#> [12,]  -74.09732
#> [13,]  100.31715
#> [14,] -118.99505
#> [15,]  212.54042
#> [16,]  174.99405
#> [17,] -146.55296
#> [18,]   21.29270
#> [19,] -185.71707
#> [20,]  -34.80536
#> [21,]  174.39547
#> [22,]   75.46503
#> [23,] -189.70529
#> [24,] -139.70197
#> [25,]   51.36721
#> [26,]  -35.96679
#> [27,] -117.62546
#> [28,]  208.99429
#> [29,]  106.40725
#> [30,]   14.44171
#> [31,] -208.59945
#> [32,] -142.85161

计算回归标准误 \(\hat{\sigma}\)

Sigma_hat = sqrt(sum(residuals^2) / df)
c(Sigma_hat = Sigma_hat)
#> Sigma_hat 
#>  134.6083

计算 \(\beta\) 估计量的方差与标准误

Beta_hat_var = solve(t(X) %*% X) * (Sigma_hat)^2
Beta1_hat_se = sqrt(Beta_hat_var[1, 1])
Beta2_hat_se = sqrt(Beta_hat_var[2, 2])
Beta3_hat_se = sqrt(Beta_hat_var[3, 3])
c(
  Beta1_hat_se = Beta1_hat_se,
  Beta2_hat_se  = Beta2_hat_se,
  Beta3_hat_se  = Beta3_hat_se
)
#> Beta1_hat_se Beta2_hat_se Beta3_hat_se 
#>  175.2724965    0.9123559    8.8019949

计算 \(t\) 统计量

t_Beta1_hat = Beta_hat[1, 1] / Beta1_hat_se
t_Beta2_hat = Beta_hat[2, 1] / Beta2_hat_se
t_Beta3_hat = Beta_hat[3, 1] / Beta3_hat_se

c(
  t_Beta1_hat = t_Beta1_hat,
  t_Beta2_hat = t_Beta2_hat,
  t_Beta3_hat = t_Beta3_hat
)
#> t_Beta1_hat t_Beta2_hat t_Beta3_hat 
#>   -7.622698   13.965366    9.743708

计算拟合优度 \(R^2\)

TSS = sum((Y - mean(Y))^2)
RSS = sum(residuals^2)
ESS = TSS - RSS
R_squared = 1 - RSS/TSS
c(R_squared = R_squared)
#> R_squared 
#> 0.8906143

计算修正的拟合优度 \(Adjusted\ R^2\)

Adjusted_R_squared  = 1 - (1 - R_squared) * (n - 1) / (n - k)
c(Adjusted_R_squared  = Adjusted_R_squared)
#> Adjusted_R_squared 
#>          0.8830705

计算 \(F\) 统计量

F_statistic = (R_squared / (k - 1)) /
  ((1 - R_squared) / (n - k))
c(F_statistic = F_statistic)
#> F_statistic 
#>    118.0585