用 R 进行多元回归估计
楚新元 / 2021-08-23
案例来源于古扎拉蒂《经济计量学精要》(第四版)第二章例 2-5 古董钟与拍卖价格。之所以选择此案例是因为此案例只有 3 个变量,32 个样本,便于手工验证。
读取数据
library(readxl)
library(dplyr)
library(kableExtra)
data = read_excel("data/Table2_14.xls", skip = 5)
data %>%
kable(
caption = "古董钟与拍卖价格",
format = "html"
) %>%
kable_styling(
bootstrap_options = "striped",
fixed_thead = TRUE,
font_size = 14,
) %>%
column_spec(1, width = "80px") %>%
column_spec(2:4, width = "120px")
Observation | Price | Age | Number of Bidders |
---|---|---|---|
1 | 1235 | 127 | 13 |
2 | 1080 | 115 | 12 |
3 | 845 | 127 | 7 |
4 | 1552 | 150 | 9 |
5 | 1047 | 156 | 6 |
6 | 1979 | 182 | 11 |
7 | 1822 | 156 | 12 |
8 | 1253 | 132 | 10 |
9 | 1297 | 137 | 9 |
10 | 946 | 113 | 9 |
11 | 1713 | 137 | 15 |
12 | 1024 | 117 | 11 |
13 | 2131 | 170 | 14 |
14 | 1550 | 182 | 8 |
15 | 1884 | 162 | 11 |
16 | 2041 | 184 | 10 |
17 | 854 | 143 | 6 |
18 | 1483 | 159 | 9 |
19 | 1055 | 108 | 14 |
20 | 1545 | 175 | 8 |
21 | 729 | 108 | 6 |
22 | 1792 | 179 | 9 |
23 | 1175 | 111 | 15 |
24 | 1593 | 187 | 8 |
25 | 1147 | 137 | 8 |
26 | 1092 | 153 | 6 |
27 | 1152 | 117 | 13 |
28 | 1336 | 126 | 10 |
29 | 785 | 111 | 7 |
30 | 744 | 115 | 7 |
31 | 1356 | 194 | 5 |
32 | 1262 | 168 | 7 |
变量含义
- Price: 古董钟中标的价格
- Age: 钟表的年代
- Number of Bidders: 投标人的个数
利用 R 自带函数进行参数估计
fit = lm(Price ~ Age + `Number of Bidders`, data = data)
summary(fit)
#>
#> Call:
#> lm(formula = Price ~ Age + `Number of Bidders`, data = data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -208.60 -119.04 15.73 101.84 212.54
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1336.0493 175.2725 -7.623 2.10e-08 ***
#> Age 12.7414 0.9124 13.965 2.09e-14 ***
#> `Number of Bidders` 85.7641 8.8020 9.744 1.19e-10 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 134.6 on 29 degrees of freedom
#> Multiple R-squared: 0.8906, Adjusted R-squared: 0.8831
#> F-statistic: 118.1 on 2 and 29 DF, p-value: 1.161e-14
手动计算参数值和各个统计量(含截距项)
生成矩阵 \(\mathbf{X}\)
data %>%
mutate(X1 = 1) %>%
select(X1, Age, `Number of Bidders`) %>%
as.matrix() -> X
print(X)
#> X1 Age Number of Bidders
#> [1,] 1 127 13
#> [2,] 1 115 12
#> [3,] 1 127 7
#> [4,] 1 150 9
#> [5,] 1 156 6
#> [6,] 1 182 11
#> [7,] 1 156 12
#> [8,] 1 132 10
#> [9,] 1 137 9
#> [10,] 1 113 9
#> [11,] 1 137 15
#> [12,] 1 117 11
#> [13,] 1 170 14
#> [14,] 1 182 8
#> [15,] 1 162 11
#> [16,] 1 184 10
#> [17,] 1 143 6
#> [18,] 1 159 9
#> [19,] 1 108 14
#> [20,] 1 175 8
#> [21,] 1 108 6
#> [22,] 1 179 9
#> [23,] 1 111 15
#> [24,] 1 187 8
#> [25,] 1 137 8
#> [26,] 1 153 6
#> [27,] 1 117 13
#> [28,] 1 126 10
#> [29,] 1 111 7
#> [30,] 1 115 7
#> [31,] 1 194 5
#> [32,] 1 168 7
生成矩阵 \(\mathbf{Y}\)
Y = as.matrix(data$Price)
print(Y)
#> [,1]
#> [1,] 1235
#> [2,] 1080
#> [3,] 845
#> [4,] 1552
#> [5,] 1047
#> [6,] 1979
#> [7,] 1822
#> [8,] 1253
#> [9,] 1297
#> [10,] 946
#> [11,] 1713
#> [12,] 1024
#> [13,] 2131
#> [14,] 1550
#> [15,] 1884
#> [16,] 2041
#> [17,] 854
#> [18,] 1483
#> [19,] 1055
#> [20,] 1545
#> [21,] 729
#> [22,] 1792
#> [23,] 1175
#> [24,] 1593
#> [25,] 1147
#> [26,] 1092
#> [27,] 1152
#> [28,] 1336
#> [29,] 785
#> [30,] 744
#> [31,] 1356
#> [32,] 1262
计算样本容量 \(n\)
和自由度 \(df\)
n = nrow(data)
k = ncol(X)
df = n - k
print(c(n = n, k = k, df = df))
#> n k df
#> 32 3 29
估计参数 \(\beta\)
Beta_hat = solve(t(X) %*% X) %*% t(X) %*% Y
colnames(Beta_hat) = "Beta_hat"
print(Beta_hat)
#> Beta_hat
#> X1 -1336.04929
#> Age 12.74138
#> Number of Bidders 85.76407
计算回归残差 \(e\)
residuals = Y - (X %*% Beta_hat)
colnames(residuals) = "residuals"
print(residuals)
#> residuals
#> [1,] -162.03929
#> [2,] -78.37862
#> [3,] -37.45489
#> [4,] 204.96516
#> [5,] -119.19094
#> [6,] 52.71275
#> [7,] 141.22465
#> [8,] 49.54599
#> [9,] 115.60314
#> [10,] 70.39635
#> [11,] 17.01874
#> [12,] -74.09732
#> [13,] 100.31715
#> [14,] -118.99505
#> [15,] 212.54042
#> [16,] 174.99405
#> [17,] -146.55296
#> [18,] 21.29270
#> [19,] -185.71707
#> [20,] -34.80536
#> [21,] 174.39547
#> [22,] 75.46503
#> [23,] -189.70529
#> [24,] -139.70197
#> [25,] 51.36721
#> [26,] -35.96679
#> [27,] -117.62546
#> [28,] 208.99429
#> [29,] 106.40725
#> [30,] 14.44171
#> [31,] -208.59945
#> [32,] -142.85161
计算回归标准误 \(\hat{\sigma}\)
Sigma_hat = sqrt(sum(residuals^2) / df)
c(Sigma_hat = Sigma_hat)
#> Sigma_hat
#> 134.6083
计算 \(\beta\)
估计量的方差与标准误
Beta_hat_var = solve(t(X) %*% X) * (Sigma_hat)^2
Beta1_hat_se = sqrt(Beta_hat_var[1, 1])
Beta2_hat_se = sqrt(Beta_hat_var[2, 2])
Beta3_hat_se = sqrt(Beta_hat_var[3, 3])
c(
Beta1_hat_se = Beta1_hat_se,
Beta2_hat_se = Beta2_hat_se,
Beta3_hat_se = Beta3_hat_se
)
#> Beta1_hat_se Beta2_hat_se Beta3_hat_se
#> 175.2724965 0.9123559 8.8019949
计算 \(t\)
统计量
t_Beta1_hat = Beta_hat[1, 1] / Beta1_hat_se
t_Beta2_hat = Beta_hat[2, 1] / Beta2_hat_se
t_Beta3_hat = Beta_hat[3, 1] / Beta3_hat_se
c(
t_Beta1_hat = t_Beta1_hat,
t_Beta2_hat = t_Beta2_hat,
t_Beta3_hat = t_Beta3_hat
)
#> t_Beta1_hat t_Beta2_hat t_Beta3_hat
#> -7.622698 13.965366 9.743708
计算拟合优度 \(R^2\)
TSS = sum((Y - mean(Y))^2)
RSS = sum(residuals^2)
ESS = TSS - RSS
R_squared = 1 - RSS/TSS
c(R_squared = R_squared)
#> R_squared
#> 0.8906143
计算修正的拟合优度 \(Adjusted\ R^2\)
Adjusted_R_squared = 1 - (1 - R_squared) * (n - 1) / (n - k)
c(Adjusted_R_squared = Adjusted_R_squared)
#> Adjusted_R_squared
#> 0.8830705
计算 \(F\)
统计量
F_statistic = (R_squared / (k - 1)) /
((1 - R_squared) / (n - k))
c(F_statistic = F_statistic)
#> F_statistic
#> 118.0585