楚新元 | All in R

Welcome to R Square

用 R 获取中国国债到期收益率曲线数据

楚新元 / 2021-08-18


这里直接给出代码如下,读者自行体验。

加载相关 R 包

library(dplyr)
library(purrr)
library(openxlsx)
library(ggplot2)
library(ggthemes)

编写批量获取数据的函数

cnbond_yield = \(from, to) {
  
  # 规范化日期格式
  from = lubridate::ymd(from)
  to = lubridate::ymd(to)
  
  # 参数检查,参数输入错误后给出必要的提示
  test_internet = curl::has_internet()
  if (!test_internet) {
    stop('没有发现网络链接...')
  }
  
  if (is.na(from) | is.na(to)) {
    stop('输入的起止日期参数必须是包含年月日的字符。')
  }
  
  if (to < from) {
    stop('发现期初日期 > 期末日期,你把两者弄混了吗?')
  }
  
  if (to > Sys.Date()) {
    stop('输入的期末日期不能大于当前日期。')
  }
  
  if (from < "2006-03-01") {
    stop('数据库不包含2006年3月1日之前的数据。')
  }
  
  # 生成从起止时间段的完整的日期向量
  dates = seq.Date(from = from, to = to, by = "day")
  
  # 生成每一天的数据下载地址
  url = paste0(
    "http://yield.chinabond.com.cn/cbweb-mn/yc/downBzqxDetail?ycDefIds=2c9081e50a2f9606010a3068cae70001&&zblx=txy&&workTime=",
    dates,
    "&&dxbj=0&&qxlx=0,&&yqqxN=N&&yqqxK=K&&wrjxCBFlag=0&&locale=zh_CN"
  )
  
  # 批量下载数据到 data 文件夹下
  if (!dir.exists("data")) dir.create("data")
  list(
    url = url, mode = "wb", quiet = TRUE,
    destfile = paste0("./data/", dates, ".xlsx")
  ) |>
  pwalk(download.file)
  
  # # 指定最终数据文件名
  # result_file = paste0(
  #   "cnbond", "-", 
  #   format(from, "%Y%m%d"), "-",
  #   format(to, "%Y%m%d"), ".xlsx"
  # )
  
  # 批量读取非空 .xlsx 文件后合并数据
  path = "./data"
  path |>
    list.files(
      pattern = "\\.xlsx$",
      full.names = TRUE
    ) |>
    set_names() |>
    map(
      \(x) if (file.info(x)$size > 3400) read.xlsx(x)
    ) |>
    list_rbind(names_to = "src") |>
    select(-2) %>% 
    set_names(c("Date", "Term", "Rate")) |>
    mutate(
      Date = gsub(".*/(.*?)\\.xlsx", "\\1", Date),
      Rate = as.numeric(Rate),
      Term2 = case_when(
        Term == 0 ~ "1D",
        Term == 0 ~ "1D",
        Term == 0.08 ~ "1M",
        Term == 0.17 ~ "2M",
        Term == 0.25 ~ "3M",
        Term == 0.5 ~ "6M",
        Term == 0.75 ~ "9M",
        Term == 1 ~ "1Y",
        Term == 2 ~ "2Y",
        Term == 3 ~ "3Y",
        Term == 5 ~ "5Y",
        Term == 7 ~ "7Y",
        Term == 10 ~ "10Y",
        Term == 15 ~ "15Y",
        Term == 20 ~ "20Y",
        Term == 30 ~ "30Y",
        Term == 40 ~ "40Y",
        Term == 50 ~ "50Y",
      )
    )
  
}

国债到期收益率曲线可视化

# 获取给定日期段数据
data = cnbond_yield(from = "20210816", to = "20210817")

# 生成国债到期收益率曲线
data %>% 
  # 此处以昨日数据为例
  filter(Date == "2021-08-17") %>%
  ggplot(aes(Term, Rate, group = 1)) + 
  geom_point(color = "#96363D") + 
  geom_line(color = "#96363D", linewidth = 1) +
  theme_economist() + 
  theme(
    legend.position = "none",
    plot.title = element_text(size = 12)
  ) + 
  scale_x_continuous(
    breaks = c(
      0.5, 1, 3, 5, 7, 10, 
      15, 20, 30, 40, 50
    ),
    labels = c(
      "6M", "1Y", "3Y", "5Y", "7Y", "10Y", 
      "15Y", "20Y", "30Y", "40Y", "50Y"
    )
  ) + 
  xlab("") + ylab("") +
  ggtitle("报告期:2021年8月17日")