楚新元 | All in R

Welcome to R Square

用 openxlsx 定制报表后生成 PDF 文件

楚新元 / 2021-08-20


用 R 清洗加工数据非常方便,对于 R 处理后输出的结果,一般情况下,我们看到的都是比较规整的数据框。现实当中每天要报的报表还有一些“乱七八糟”的要素,比如标题、报告日期、数据单位、制表、复核、保密等级等。当然有了最重要的数据部分,导出到 Excel 文件里手动添加剩余的哪些“乱七八糟”的要素也是可以的,如果这个报表只制作一次,我也更倾向于这样做,发挥 R 和 Excel 各自的优势。但是如果这个报表是每天都要报送的日报呢?我是绝对不能容忍每天手动做重复性的工作的,用 R 全自动化处理绝对是最佳选项。下面我们看一个例子:

加载相关 R 包

library(cnum)
library(stringr)
library(dplyr)
library(purrr)
library(janitor)
library(openxlsx)
library(openxlsx2)

报表数据准备

R 里面清洗加工数据这块的教程太多了,这里不是我要讲的重点,因此,这里直接生成一个垃圾数字组成的报表。

140 %>% 
  rnorm(100, 10) %>% 
  matrix(ncol = 7) %>% 
  as.data.frame() %>% 
  set_names(paste0("x", 2:8)) %>% 
  mutate(
    x1 = str_glue('科目{num2c(1:20, lang = "sc")}'),
    .before = "x2"
  ) %>% 
  mutate(
    across(
      where(is.numeric), 
      \(x) round(x + 1e-10, 2))
  ) %>% 
  adorn_totals(
    where = "row",
    name = "合计"
  ) -> daily

自定义报表要素

# 定义报告期
date = "2020-04-08"

# 计算报告期年、月、日
date = as.Date(date)
year  = as.integer(format(date, '%Y'))
month = as.integer(format(date, '%m'))
day = as.integer(format(date, '%d'))

# 保密提示信息
secret = "★内部资料、严格保密"

# 日报标题
title = paste0(
  year, "年", 
  month, "月", 
  day, "日", 
  "经营数据日报"
)

# 制表日期
tabu_date = paste0(
  "制表日期:", 
  Sys.Date()
)

# 数据单位
unit = "单位:亿元、%"

# 输出文件路径和文件名
dest_file = paste0(
  "daily", "-", format(date, "%Y%m%d"), ".xlsx"
)

定制报表并导出到 Excel

基于 openxlsx 包的实现

#---------------------------------------------------------------------------
# 新建工作簿并添加工作表
#---------------------------------------------------------------------------

wb = createWorkbook()
addWorksheet(wb, "daily", gridLines = FALSE)

#---------------------------------------------------------------------------
# 设置报表区域行高、列宽
#---------------------------------------------------------------------------

# 设置报表区域行高
setRowHeights(
  wb, "daily", 
  rows = 1:(nrow(daily) + 4),
  heights = c(20, 35, rep(20, nrow(daily) + 2))
)

# 设置报表区域列宽
setColWidths(
  wb, "daily", 
  cols = 1:ncol(daily),
  widths = c(26, rep(15.5, 5), 12, 18)
)

#---------------------------------------------------------------------------
# 冻结窗口,设置打印页面
#---------------------------------------------------------------------------

# 冻结窗口
freezePane(
  wb, "daily", 
  firstActiveRow = 5, 
  firstActiveCol = "B"
)

# 打印页面设置
pageSetup(
  wb, "daily",
  orientation = "landscape",
  scale = 97,
  left = 0.7,
  right = 0.7, 
  top = 0.75,
  bottom = 0.75,
  header = 0.3, 
  footer = 0.3,
  fitToWidth = TRUE,
  fitToHeight = TRUE,
  paperSize = 9,
  printTitleRows = NULL, 
  printTitleCols = NULL
)

#---------------------------------------------------------------------------
# 报表第 1 行右上角保密提示
#---------------------------------------------------------------------------

# 合并第 1 行单元格
mergeCells(
  wb, "daily", 
  rows = 1:1, 
  cols = 1:ncol(daily)
)

# 创建保密提示风格
style_secret = createStyle(
  halign = "right",
  valign = "center",
  wrapText = TRUE,
  fontColour = "red",
  fontSize = 11, 
  fontName = "Arial"
)

# 保密风格样式运用到第 1 行
addStyle(
  wb, "daily", 
  style = style_secret,
  rows = 1:1, 
  cols = 1:ncol(daily)
)

# 保密内容写入到第 1 行
writeData(
  wb, "daily", 
  secret, 
  startRow = 1
)

#---------------------------------------------------------------------------
# 日报标题设置
#---------------------------------------------------------------------------

mergeCells(
  wb, "daily", 
  rows = 2:2, 
  cols = 1:ncol(daily)
)

style_title = createStyle(
  halign = "center",
  valign = "center",
  wrapText = TRUE,
  textDecoration = c("bold"),
  fontColour = "black",
  fontSize = 20, 
  fontName = "Arial"
)

addStyle(
  wb, "daily", 
  style = style_title,
  rows = 2:2, 
  cols = 1:ncol(daily)
)

writeData(
  wb, "daily", 
  title,  
  startRow = 2
)

#---------------------------------------------------------------------------
# 制表日期设置
#---------------------------------------------------------------------------

mergeCells(
  wb, "daily", 
  rows = 3:3, 
  cols = 6:7
)

style_tabu_date = createStyle(
  halign = "right",
  valign = "center",
  wrapText = TRUE, 
  fontColour = "black",
  fontSize = 11, 
  fontName = "Arial"
)

addStyle(
  wb, "daily", 
  style = style_tabu_date,
  rows = 3:3, 
  cols = 6:7
)

writeData(
  wb, "daily", 
  tabu_date,  
  startRow = 3, 
  startCol = 6
)

#---------------------------------------------------------------------------
# 数据单位设置
#---------------------------------------------------------------------------

mergeCells(
  wb, "daily", 
  rows = 3:3, 
  cols = ncol(daily)
)

style_unit = createStyle(
  halign = "center",
  valign = "center",
  wrapText = TRUE, 
  fontColour = "black",
  fontSize = 11, 
  fontName = "Arial"
)

addStyle(
  wb, "daily", 
  style = style_unit,
  rows = 3:3, 
  cols = ncol(daily)
)

writeData(
  wb, "daily", 
  unit, 
  startRow = 3, 
  startCol = 8
)

#---------------------------------------------------------------------------
# 表头设置
#---------------------------------------------------------------------------

style_header = createStyle(
  textDecoration = "Bold",
  halign = "center",
  valign = "center",
  wrapText = TRUE,
  border = "TopBottomLeftRight",
  borderColour = "black",
  fontColour = "white",
  fgFill = "#4F81BD",
  fontSize = 11, 
  fontName = "Arial"
)

addStyle(
  wb, "daily", 
  rows = 4:4,
  cols = 1:ncol(daily), 
  style = style_header,
  gridExpand = T
)

#---------------------------------------------------------------------------
# 数据部分设置
#---------------------------------------------------------------------------

# 第 1 列水平居中,垂直居中
style_data1 = createStyle(
  valign = "center",
  halign = "center",
  border = "TopBottomLeftRight",
  borderColour = "black",
  fontSize = 11, 
  fontName = "Arial"
)

addStyle(
  wb, "daily", 
  style = style_data1,
  rows = 5:(nrow(daily) + 4), 
  cols = 1,
  gridExpand = T
)

# 第 2-8 列水平右对齐,垂直居中
style_data2 = createStyle(
  valign = "center",
  halign = "right",
  border = "TopBottomLeftRight",
  borderColour = "black",
  fontSize = 11, 
  fontName = "Arial",
  numFmt = "0.00"  # 只显示两位小数
)

addStyle(
  wb, "daily", 
  style = style_data2,
  rows = 5:(nrow(daily) + 4), 
  cols = 2:ncol(daily),
  gridExpand = T
)

# 数据部分的「合计」行加粗
style_data3 = createStyle(
  textDecoration = "Bold"
)

addStyle(
  wb, "daily", 
  rows = nrow(daily) + 4,
  cols = 1:ncol(daily), 
  style = style_data3,
  gridExpand = T,
  stack = TRUE  # 新格式和原格式合并,而不是替代
)

# 数据部分添加斑马线
style_data4 = createStyle(
  fgFill = "#F0FFFF"
)

addStyle(
  wb, "daily",
  rows = c(1:(nrow(daily) / 2)) * 2 + 4, 
  cols = 1:ncol(daily), 
  style = style_data4,
  gridExpand = TRUE,
  stack = TRUE
)

# 数据部分写入到工作表
writeData(
  wb, "daily", 
  daily,
  startRow = 4,
  startCol = 1
)

#---------------------------------------------------------------------------
# 保存工作簿
#---------------------------------------------------------------------------

saveWorkbook(wb, dest_file, overwrite = TRUE)

#---------------------------------------------------------------------------
# 额外的打印设置
#---------------------------------------------------------------------------

wb = openxlsx2::wb_load(dest_file)

wb$worksheets[[1]]$set_print_options(
  horizontalCentered = "1",  # 水平居中
  verticalCentered = "0"  # 垂直不居中
)

openxlsx2::wb_save(wb, dest_file, overwrite = TRUE)

#---------------------------------------------------------------------------

由于 openxlsx 包不支持打印页面页边距水平和垂直居中,还好 openxlsx2 包增强了这个功能。

基于 openxlsx2 包的实现(推荐)

openxlsx2 的函数和参数均采用下划线而非驼峰风格,这点和 Tidyverse 系列包 风格一致,另外参数设置方面对细节的把控更加细腻,最重要的是支持管道符。

wb_workbook() %>% 

  # 增加工作表
  wb_add_worksheet(
    sheet = "daily", 
    grid_lines = FALSE
  ) %>% 

  # 设置报表区域行高
  wb_set_row_heights(
    rows = 1:(nrow(daily) + 4),
    heights = c(20, 35, rep(20, nrow(daily) + 2))
  ) %>% 

  # 设置报表区域列宽
  wb_set_col_widths(
    cols = 1:ncol(daily),
    widths = c(26, rep(15.5, 5), 12, 18)
  ) %>% 

  # 冻结窗口
  wb_freeze_pane(
    first_active_row = 5, 
    first_active_col = "B"
  ) %>% 

  # 打印页面设置
  wb_page_setup(
    orientation = "landscape",
    scale = 97,
    left = 0.7,
    right = 0.7, 
    top = 0.75,
    bottom = 0.75,
    header = 0.3, 
    footer = 0.3,
    fit_to_width = TRUE,
    fit_to_height = TRUE,
    paper_size = 9,
    print_title_rows = NULL,
    print_title_cols = NULL
  ) %>% 
  
  # 加入保密提示内容
  wb_add_data(
    x = secret,
    dims = wb_dims(
      from_row = 1, 
      from_col = 1
    )
  ) %>% 
  wb_merge_cells(
    dims = wb_dims(
      rows = 1:1, 
      cols = 1:ncol(daily)
    )
  ) %>% 
  wb_add_font(
    dims = "A1",
    size = "11",
    name = "Arial",
    color = wb_color(name = "red"),
  ) %>% 
  wb_add_cell_style(
    dims = wb_dims(
      rows = 1:1, 
      cols = 1:ncol(daily)
    ),    
    horizontal = "right",
    vertical = "center",
    wrap_text = TRUE
  ) %>% 
  
  # 日报标题设置
  wb_add_data(
    x = title,
    dims = wb_dims(
      from_row = 2,
      from_col = 1
    )
  ) %>%
  wb_merge_cells(
    dims = wb_dims(
      rows = 2:2,
      cols = 1:ncol(daily)
    )
  ) %>%
  wb_add_font(
    dims = "A2",
    size = "20",
    name = "Arial",
    color = wb_color(name = "black"),
    bold = "double"
  ) %>%
  wb_add_cell_style(
    dims = wb_dims(
      rows = 2:2,
      cols = 1:ncol(daily)
    ),
    horizontal = "center",
    vertical = "center",
    wrap_text = TRUE
  ) %>% 
  
  # 制表日期设置
  wb_add_data(
    x = tabu_date,
    dims = wb_dims(
      from_row = 3,
      from_col = 6
    )
  ) %>%
  wb_merge_cells(
    dims = wb_dims(
      rows = 3:3,
      cols = 6:7
    )
  ) %>%
  wb_add_font(
    # dims = "F3",
    dims = wb_dims(
      rows = 3:3,
      cols = 6:7
    ),
    size = "11",
    name = "Arial",
    color = wb_color(name = "black")
  ) %>%
  wb_add_cell_style(
    dims = wb_dims(
      rows = 3:3,
      cols = 6:7
    ),
    horizontal = "right",
    vertical = "center",
    wrap_text = TRUE
  ) %>% 
  
  # 数据单位设置
  wb_add_data(
    x = unit,
    dims = wb_dims(
      from_row = 3,
      from_col = ncol(daily)
    )
  ) %>%
  wb_add_font(
    dims = wb_dims(
      rows = 3:3,
      cols = ncol(daily)
    ),
    size = "11",
    name = "Arial",
    color = wb_color(name = "black")
  ) %>%
  wb_add_cell_style(
    dims = wb_dims(
      rows = 3:3,
      cols = 6:7
    ),
    horizontal = "center",
    vertical = "center",
    wrap_text = TRUE
  ) %>%
  
  # 表头设置
  wb_add_font(
    dims = wb_dims(
      rows = 4:4,
      cols = 1:ncol(daily)
    ),
    size = "11",
    name = "Arial",
    color = wb_color(name = "white"),
    bold = "double"
  ) %>%
  wb_add_fill(
    dims = wb_dims(
      rows = 4:4,
      cols = 1:ncol(daily)
    ), 
    color = wb_color(hex = "#4F81BD"),    
  ) %>% 
  wb_add_cell_style(
    dims = wb_dims(
      rows = 4:4,
      cols = 1:ncol(daily)
    ),
    horizontal = "center",
    vertical = "center",
    wrap_text = TRUE
  ) %>%

  # 数据部分设置
  wb_add_data(
    x = daily,
    dims = wb_dims(
      from_row = 4,
      from_col = 1
    )
  ) %>%  
  wb_add_font(
    dims = wb_dims(
      rows = (1:nrow(daily)) + 4,
      cols = 1:ncol(daily)
    ),
    size = "11",
    name = "Arial",
    color = wb_color(name = "black")
  ) %>%
  wb_add_cell_style(  # 第 1 列居中对齐
    dims = wb_dims(
      rows = (1:nrow(daily)) + 4,
      cols = 1
    ),
    horizontal = "center",
    vertical = "center",
    wrap_text = TRUE
  ) %>%
  wb_add_cell_style(  # 第 2-8 列右对齐
    dims = wb_dims(
      rows = (1:nrow(daily)) + 4,
      cols = 2:ncol(daily)
    ),
    horizontal = "right",
    vertical = "center",
    wrap_text = TRUE
  ) %>% 
  wb_add_numfmt(  # 第 2-8 保留两位小数
    dims = wb_dims(
      rows = (1:nrow(daily)) + 4,
      cols = 2:ncol(daily)
    ),
    numfmt = "#.00"
  ) %>% 
  
  # 数据部分的「合计」行加粗
  wb_add_font(
    dims = wb_dims(
      rows = nrow(daily) + 4,
      cols = 1:ncol(daily)
    ),
    bold = "double"
  ) %>% 
  
  # 表头和数据部分增加边框
  wb_add_border(
    dims = wb_dims(
      rows = 1:(nrow(daily) + 1) + 3,
      cols = 1:ncol(daily)
    ),
    inner_hgrid = "thin",
    inner_vgrid = "thin"
  ) -> wb
  
# 数据部分添加斑马线
# 由于涉及非连续行,openxlsx2 包暂不支持
rows = c(1:(nrow(daily) / 2)) * 2 + 4
for (i in rows) {
  wb %>% wb_add_fill(
    dims = wb_dims(
      rows = i,
      cols = 1:ncol(daily)
    ),
    color = wb_color(hex = "#F0FFFF")
  )
}

# 额外的页面水平居中打印设置
wb$worksheets[[1]]$set_print_options(
  horizontalCentered = "1", 
  verticalCentered = "0"
)

wb_save(wb, dest_file)

导出为 PDF 格式

其实导出到 Excel 基本已经大功告成了,可是有的单位要求发布的报告必须是 PDF 格式的。如果单纯的需要 PDF 格式文件,我们可以考虑用 R Markdown 或者 Quarto 直接生成 PDF 格式文件,而不需要先生成 Excel 文件再转为 PDF 格式,但是定制模板相对比较困难,暂时笔者只是了解一点 LaTeX 方面的知识,还不能熟练驾驭。通过已经定制好的 Excel 文件再转为 PDF 虽然有点绕远路了,但是能满足报表要求,所以这里仍然选择数据–> Excel 文件 –> PDF 文件。

从 Excel 文件 –> PDF 文件,手动实现很容易,打开 Excel 后利用虚拟打印机打印为 PDF 格式即可。但是,对于一个完美主义极客而言这是不够的,因为将来有可能一次生成的不是一张报表,所以,为了将来不时之需,这里仍难考虑通过程序解决。R 里面直接调用虚拟打印机将 Excel 工作表打印成 PDF 实现起来估计有点困难,利用 JAVA 实现的网上倒是有教程 JAVA 调用打印机输出 PDF 文件。因为 Excel 从 2007 开始可以直接将 Excel 工作表转为 PDF,这里我们考虑用 R 调用 VBA 实现。

# 调用 R 包
library(RDCOMClient)  # 仅支持 Windows 系统

# 指定 Excel 文件路径
xlFile = here::here(dest_file)  # 这里必须是绝对路径

# 转换后的 PDF 文件与原 Excel 文件路径和名称相同
pdfFile = gsub(".xlsx", ".pdf", xlFile)  # 这里必须是绝对路径

# 指定路径下存在同名 PDF 文件则删除
if (file.exists(pdfFile)) file.remove(pdfFile)

# 创建对象,打开工作簿并选中第 1 个工作表
xlApp = COMCreate("Excel.Application")
xlApp[["Visible"]] = TRUE 
wb = xlApp[["Workbooks"]]$Open(Filename = xlFile)
sht = wb[["Worksheets"]]$Item(1)
sht$Select()

# 活动工作表转换为 PDF 文件
xlApp[["ActiveSheet"]]$ExportAsFixedFormat(
  IgnorePrintAreas = FALSE,
  Type = 0,  # 输出为 PDF
  Filename = pdfFile
) 

# 关闭 Excel  
xlApp$Quit()

特别需要注意的是: