用 R 获取中国国债到期收益率曲线数据
楚新元 / 2021-08-18
这里直接给出代码如下,读者自行体验。
加载相关 R 包
library(dplyr)
library(purrr)
library(openxlsx)
library(ggplot2)
library(ggthemes)
编写批量获取数据的函数
cnbond_yield = \(from, to) {
# 规范化日期格式
from = lubridate::ymd(from)
to = lubridate::ymd(to)
# 参数检查,参数输入错误后给出必要的提示
test_internet = curl::has_internet()
if (!test_internet) {
stop('没有发现网络链接...')
}
if (is.na(from) | is.na(to)) {
stop('输入的起止日期参数必须是包含年月日的字符。')
}
if (to < from) {
stop('发现期初日期 > 期末日期,你把两者弄混了吗?')
}
if (to > Sys.Date()) {
stop('输入的期末日期不能大于当前日期。')
}
if (from < "2006-03-01") {
stop('数据库不包含2006年3月1日之前的数据。')
}
# 生成从起止时间段的完整的日期向量
dates = seq.Date(from = from, to = to, by = "day")
# 生成每一天的数据下载地址
url = paste0(
"http://yield.chinabond.com.cn/cbweb-mn/yc/downBzqxDetail?ycDefIds=2c9081e50a2f9606010a3068cae70001&&zblx=txy&&workTime=",
dates,
"&&dxbj=0&&qxlx=0,&&yqqxN=N&&yqqxK=K&&wrjxCBFlag=0&&locale=zh_CN"
)
# 批量下载数据到 data 文件夹下
if (!dir.exists("data")) dir.create("data")
list(
url = url, mode = "wb", quiet = TRUE,
destfile = paste0("./data/", dates, ".xlsx")
) |>
pwalk(download.file)
# # 指定最终数据文件名
# result_file = paste0(
# "cnbond", "-",
# format(from, "%Y%m%d"), "-",
# format(to, "%Y%m%d"), ".xlsx"
# )
# 批量读取非空 .xlsx 文件后合并数据
path = "./data"
path |>
list.files(
pattern = "\\.xlsx$",
full.names = TRUE
) |>
set_names() |>
map(
\(x) if (file.info(x)$size > 3400) read.xlsx(x)
) |>
list_rbind(names_to = "src") |>
select(-2) %>%
set_names(c("Date", "Term", "Rate")) |>
mutate(
Date = gsub(".*/(.*?)\\.xlsx", "\\1", Date),
Rate = as.numeric(Rate),
Term2 = case_when(
Term == 0 ~ "1D",
Term == 0 ~ "1D",
Term == 0.08 ~ "1M",
Term == 0.17 ~ "2M",
Term == 0.25 ~ "3M",
Term == 0.5 ~ "6M",
Term == 0.75 ~ "9M",
Term == 1 ~ "1Y",
Term == 2 ~ "2Y",
Term == 3 ~ "3Y",
Term == 5 ~ "5Y",
Term == 7 ~ "7Y",
Term == 10 ~ "10Y",
Term == 15 ~ "15Y",
Term == 20 ~ "20Y",
Term == 30 ~ "30Y",
Term == 40 ~ "40Y",
Term == 50 ~ "50Y",
)
)
}
国债到期收益率曲线可视化
# 获取给定日期段数据
data = cnbond_yield(from = "20210816", to = "20210817")
# 生成国债到期收益率曲线
data %>%
# 此处以昨日数据为例
filter(Date == "2021-08-17") %>%
ggplot(aes(Term, Rate, group = 1)) +
geom_point(color = "#96363D") +
geom_line(color = "#96363D", linewidth = 1) +
theme_economist() +
theme(
legend.position = "none",
plot.title = element_text(size = 12)
) +
scale_x_continuous(
breaks = c(
0.5, 1, 3, 5, 7, 10,
15, 20, 30, 40, 50
),
labels = c(
"6M", "1Y", "3Y", "5Y", "7Y", "10Y",
"15Y", "20Y", "30Y", "40Y", "50Y"
)
) +
xlab("") + ylab("") +
ggtitle("报告期:2021年8月17日")