用 openxlsx 定制报表后生成 PDF 文件
楚新元 / 2021-08-20
用 R 清洗加工数据非常方便,对于 R 处理后输出的结果,一般情况下,我们看到的都是比较规整的数据框。现实当中每天要报的报表还有一些“乱七八糟”的要素,比如标题、报告日期、数据单位、制表、复核、保密等级等。当然有了最重要的数据部分,导出到 Excel 文件里手动添加剩余的哪些“乱七八糟”的要素也是可以的,如果这个报表只制作一次,我也更倾向于这样做,发挥 R 和 Excel 各自的优势。但是如果这个报表是每天都要报送的日报呢?我是绝对不能容忍每天手动做重复性的工作的,用 R 全自动化处理绝对是最佳选项。下面我们看一个例子:
加载相关 R 包
library(cnum)
library(stringr)
library(dplyr)
library(purrr)
library(janitor)
library(openxlsx)
library(openxlsx2)
报表数据准备
R 里面清洗加工数据这块的教程太多了,这里不是我要讲的重点,因此,这里直接生成一个垃圾数字组成的报表。
140 %>%
rnorm(100, 10) %>%
matrix(ncol = 7) %>%
as.data.frame() %>%
set_names(paste0("x", 2:8)) %>%
mutate(
x1 = str_glue('科目{num2c(1:20, lang = "sc")}'),
.before = "x2"
) %>%
mutate(
across(
where(is.numeric),
\(x) round(x + 1e-10, 2))
) %>%
adorn_totals(
where = "row",
name = "合计"
) -> daily
自定义报表要素
# 定义报告期
date = "2020-04-08"
# 计算报告期年、月、日
date = as.Date(date)
year = as.integer(format(date, '%Y'))
month = as.integer(format(date, '%m'))
day = as.integer(format(date, '%d'))
# 保密提示信息
secret = "★内部资料、严格保密"
# 日报标题
title = paste0(
year, "年",
month, "月",
day, "日",
"经营数据日报"
)
# 制表日期
tabu_date = paste0(
"制表日期:",
Sys.Date()
)
# 数据单位
unit = "单位:亿元、%"
# 输出文件路径和文件名
dest_file = paste0(
"daily", "-", format(date, "%Y%m%d"), ".xlsx"
)
定制报表并导出到 Excel
基于 openxlsx 包的实现
#---------------------------------------------------------------------------
# 新建工作簿并添加工作表
#---------------------------------------------------------------------------
wb = createWorkbook()
addWorksheet(wb, "daily", gridLines = FALSE)
#---------------------------------------------------------------------------
# 设置报表区域行高、列宽
#---------------------------------------------------------------------------
# 设置报表区域行高
setRowHeights(
wb, "daily",
rows = 1:(nrow(daily) + 4),
heights = c(20, 35, rep(20, nrow(daily) + 2))
)
# 设置报表区域列宽
setColWidths(
wb, "daily",
cols = 1:ncol(daily),
widths = c(26, rep(15.5, 5), 12, 18)
)
#---------------------------------------------------------------------------
# 冻结窗口,设置打印页面
#---------------------------------------------------------------------------
# 冻结窗口
freezePane(
wb, "daily",
firstActiveRow = 5,
firstActiveCol = "B"
)
# 打印页面设置
pageSetup(
wb, "daily",
orientation = "landscape",
scale = 97,
left = 0.7,
right = 0.7,
top = 0.75,
bottom = 0.75,
header = 0.3,
footer = 0.3,
fitToWidth = TRUE,
fitToHeight = TRUE,
paperSize = 9,
printTitleRows = NULL,
printTitleCols = NULL
)
#---------------------------------------------------------------------------
# 报表第 1 行右上角保密提示
#---------------------------------------------------------------------------
# 合并第 1 行单元格
mergeCells(
wb, "daily",
rows = 1:1,
cols = 1:ncol(daily)
)
# 创建保密提示风格
style_secret = createStyle(
halign = "right",
valign = "center",
wrapText = TRUE,
fontColour = "red",
fontSize = 11,
fontName = "Arial"
)
# 保密风格样式运用到第 1 行
addStyle(
wb, "daily",
style = style_secret,
rows = 1:1,
cols = 1:ncol(daily)
)
# 保密内容写入到第 1 行
writeData(
wb, "daily",
secret,
startRow = 1
)
#---------------------------------------------------------------------------
# 日报标题设置
#---------------------------------------------------------------------------
mergeCells(
wb, "daily",
rows = 2:2,
cols = 1:ncol(daily)
)
style_title = createStyle(
halign = "center",
valign = "center",
wrapText = TRUE,
textDecoration = c("bold"),
fontColour = "black",
fontSize = 20,
fontName = "Arial"
)
addStyle(
wb, "daily",
style = style_title,
rows = 2:2,
cols = 1:ncol(daily)
)
writeData(
wb, "daily",
title,
startRow = 2
)
#---------------------------------------------------------------------------
# 制表日期设置
#---------------------------------------------------------------------------
mergeCells(
wb, "daily",
rows = 3:3,
cols = 6:7
)
style_tabu_date = createStyle(
halign = "right",
valign = "center",
wrapText = TRUE,
fontColour = "black",
fontSize = 11,
fontName = "Arial"
)
addStyle(
wb, "daily",
style = style_tabu_date,
rows = 3:3,
cols = 6:7
)
writeData(
wb, "daily",
tabu_date,
startRow = 3,
startCol = 6
)
#---------------------------------------------------------------------------
# 数据单位设置
#---------------------------------------------------------------------------
mergeCells(
wb, "daily",
rows = 3:3,
cols = ncol(daily)
)
style_unit = createStyle(
halign = "center",
valign = "center",
wrapText = TRUE,
fontColour = "black",
fontSize = 11,
fontName = "Arial"
)
addStyle(
wb, "daily",
style = style_unit,
rows = 3:3,
cols = ncol(daily)
)
writeData(
wb, "daily",
unit,
startRow = 3,
startCol = 8
)
#---------------------------------------------------------------------------
# 表头设置
#---------------------------------------------------------------------------
style_header = createStyle(
textDecoration = "Bold",
halign = "center",
valign = "center",
wrapText = TRUE,
border = "TopBottomLeftRight",
borderColour = "black",
fontColour = "white",
fgFill = "#4F81BD",
fontSize = 11,
fontName = "Arial"
)
addStyle(
wb, "daily",
rows = 4:4,
cols = 1:ncol(daily),
style = style_header,
gridExpand = T
)
#---------------------------------------------------------------------------
# 数据部分设置
#---------------------------------------------------------------------------
# 第 1 列水平居中,垂直居中
style_data1 = createStyle(
valign = "center",
halign = "center",
border = "TopBottomLeftRight",
borderColour = "black",
fontSize = 11,
fontName = "Arial"
)
addStyle(
wb, "daily",
style = style_data1,
rows = 5:(nrow(daily) + 4),
cols = 1,
gridExpand = T
)
# 第 2-8 列水平右对齐,垂直居中
style_data2 = createStyle(
valign = "center",
halign = "right",
border = "TopBottomLeftRight",
borderColour = "black",
fontSize = 11,
fontName = "Arial",
numFmt = "0.00" # 只显示两位小数
)
addStyle(
wb, "daily",
style = style_data2,
rows = 5:(nrow(daily) + 4),
cols = 2:ncol(daily),
gridExpand = T
)
# 数据部分的「合计」行加粗
style_data3 = createStyle(
textDecoration = "Bold"
)
addStyle(
wb, "daily",
rows = nrow(daily) + 4,
cols = 1:ncol(daily),
style = style_data3,
gridExpand = T,
stack = TRUE # 新格式和原格式合并,而不是替代
)
# 数据部分添加斑马线
style_data4 = createStyle(
fgFill = "#F0FFFF"
)
addStyle(
wb, "daily",
rows = c(1:(nrow(daily) / 2)) * 2 + 4,
cols = 1:ncol(daily),
style = style_data4,
gridExpand = TRUE,
stack = TRUE
)
# 数据部分写入到工作表
writeData(
wb, "daily",
daily,
startRow = 4,
startCol = 1
)
#---------------------------------------------------------------------------
# 保存工作簿
#---------------------------------------------------------------------------
saveWorkbook(wb, dest_file, overwrite = TRUE)
#---------------------------------------------------------------------------
# 额外的打印设置
#---------------------------------------------------------------------------
wb = openxlsx2::wb_load(dest_file)
wb$worksheets[[1]]$set_print_options(
horizontalCentered = "1", # 水平居中
verticalCentered = "0" # 垂直不居中
)
openxlsx2::wb_save(wb, dest_file, overwrite = TRUE)
#---------------------------------------------------------------------------
由于 openxlsx 包不支持打印页面页边距水平和垂直居中,还好 openxlsx2 包增强了这个功能。
基于 openxlsx2 包的实现(推荐)
openxlsx2 的函数和参数均采用下划线而非驼峰风格,这点和 Tidyverse 系列包 风格一致,另外参数设置方面对细节的把控更加细腻,最重要的是支持管道符。
wb_workbook() %>%
# 增加工作表
wb_add_worksheet(
sheet = "daily",
grid_lines = FALSE
) %>%
# 设置报表区域行高
wb_set_row_heights(
rows = 1:(nrow(daily) + 4),
heights = c(20, 35, rep(20, nrow(daily) + 2))
) %>%
# 设置报表区域列宽
wb_set_col_widths(
cols = 1:ncol(daily),
widths = c(26, rep(15.5, 5), 12, 18)
) %>%
# 冻结窗口
wb_freeze_pane(
first_active_row = 5,
first_active_col = "B"
) %>%
# 打印页面设置
wb_page_setup(
orientation = "landscape",
scale = 97,
left = 0.7,
right = 0.7,
top = 0.75,
bottom = 0.75,
header = 0.3,
footer = 0.3,
fit_to_width = TRUE,
fit_to_height = TRUE,
paper_size = 9,
print_title_rows = NULL,
print_title_cols = NULL
) %>%
# 加入保密提示内容
wb_add_data(
x = secret,
dims = wb_dims(
from_row = 1,
from_col = 1
)
) %>%
wb_merge_cells(
dims = wb_dims(
rows = 1:1,
cols = 1:ncol(daily)
)
) %>%
wb_add_font(
dims = "A1",
size = "11",
name = "Arial",
color = wb_color(name = "red"),
) %>%
wb_add_cell_style(
dims = wb_dims(
rows = 1:1,
cols = 1:ncol(daily)
),
horizontal = "right",
vertical = "center",
wrap_text = TRUE
) %>%
# 日报标题设置
wb_add_data(
x = title,
dims = wb_dims(
from_row = 2,
from_col = 1
)
) %>%
wb_merge_cells(
dims = wb_dims(
rows = 2:2,
cols = 1:ncol(daily)
)
) %>%
wb_add_font(
dims = "A2",
size = "20",
name = "Arial",
color = wb_color(name = "black"),
bold = "double"
) %>%
wb_add_cell_style(
dims = wb_dims(
rows = 2:2,
cols = 1:ncol(daily)
),
horizontal = "center",
vertical = "center",
wrap_text = TRUE
) %>%
# 制表日期设置
wb_add_data(
x = tabu_date,
dims = wb_dims(
from_row = 3,
from_col = 6
)
) %>%
wb_merge_cells(
dims = wb_dims(
rows = 3:3,
cols = 6:7
)
) %>%
wb_add_font(
# dims = "F3",
dims = wb_dims(
rows = 3:3,
cols = 6:7
),
size = "11",
name = "Arial",
color = wb_color(name = "black")
) %>%
wb_add_cell_style(
dims = wb_dims(
rows = 3:3,
cols = 6:7
),
horizontal = "right",
vertical = "center",
wrap_text = TRUE
) %>%
# 数据单位设置
wb_add_data(
x = unit,
dims = wb_dims(
from_row = 3,
from_col = ncol(daily)
)
) %>%
wb_add_font(
dims = wb_dims(
rows = 3:3,
cols = ncol(daily)
),
size = "11",
name = "Arial",
color = wb_color(name = "black")
) %>%
wb_add_cell_style(
dims = wb_dims(
rows = 3:3,
cols = 6:7
),
horizontal = "center",
vertical = "center",
wrap_text = TRUE
) %>%
# 表头设置
wb_add_font(
dims = wb_dims(
rows = 4:4,
cols = 1:ncol(daily)
),
size = "11",
name = "Arial",
color = wb_color(name = "white"),
bold = "double"
) %>%
wb_add_fill(
dims = wb_dims(
rows = 4:4,
cols = 1:ncol(daily)
),
color = wb_color(hex = "#4F81BD"),
) %>%
wb_add_cell_style(
dims = wb_dims(
rows = 4:4,
cols = 1:ncol(daily)
),
horizontal = "center",
vertical = "center",
wrap_text = TRUE
) %>%
# 数据部分设置
wb_add_data(
x = daily,
dims = wb_dims(
from_row = 4,
from_col = 1
)
) %>%
wb_add_font(
dims = wb_dims(
rows = (1:nrow(daily)) + 4,
cols = 1:ncol(daily)
),
size = "11",
name = "Arial",
color = wb_color(name = "black")
) %>%
wb_add_cell_style( # 第 1 列居中对齐
dims = wb_dims(
rows = (1:nrow(daily)) + 4,
cols = 1
),
horizontal = "center",
vertical = "center",
wrap_text = TRUE
) %>%
wb_add_cell_style( # 第 2-8 列右对齐
dims = wb_dims(
rows = (1:nrow(daily)) + 4,
cols = 2:ncol(daily)
),
horizontal = "right",
vertical = "center",
wrap_text = TRUE
) %>%
wb_add_numfmt( # 第 2-8 保留两位小数
dims = wb_dims(
rows = (1:nrow(daily)) + 4,
cols = 2:ncol(daily)
),
numfmt = "#.00"
) %>%
# 数据部分的「合计」行加粗
wb_add_font(
dims = wb_dims(
rows = nrow(daily) + 4,
cols = 1:ncol(daily)
),
bold = "double"
) %>%
# 表头和数据部分增加边框
wb_add_border(
dims = wb_dims(
rows = 1:(nrow(daily) + 1) + 3,
cols = 1:ncol(daily)
),
inner_hgrid = "thin",
inner_vgrid = "thin"
) -> wb
# 数据部分添加斑马线
# 由于涉及非连续行,openxlsx2 包暂不支持
rows = c(1:(nrow(daily) / 2)) * 2 + 4
for (i in rows) {
wb %>% wb_add_fill(
dims = wb_dims(
rows = i,
cols = 1:ncol(daily)
),
color = wb_color(hex = "#F0FFFF")
)
}
# 额外的页面水平居中打印设置
wb$worksheets[[1]]$set_print_options(
horizontalCentered = "1",
verticalCentered = "0"
)
wb_save(wb, dest_file)
导出为 PDF 格式
其实导出到 Excel 基本已经大功告成了,可是有的单位要求发布的报告必须是 PDF 格式的。如果单纯的需要 PDF 格式文件,我们可以考虑用 R Markdown 或者 Quarto 直接生成 PDF 格式文件,而不需要先生成 Excel 文件再转为 PDF 格式,但是定制模板相对比较困难,暂时笔者只是了解一点 LaTeX 方面的知识,还不能熟练驾驭。通过已经定制好的 Excel 文件再转为 PDF 虽然有点绕远路了,但是能满足报表要求,所以这里仍然选择数据–> Excel 文件 –> PDF 文件。
从 Excel 文件 –> PDF 文件,手动实现很容易,打开 Excel 后利用虚拟打印机打印为 PDF 格式即可。但是,对于一个完美主义极客而言这是不够的,因为将来有可能一次生成的不是一张报表,所以,为了将来不时之需,这里仍难考虑通过程序解决。R 里面直接调用虚拟打印机将 Excel 工作表打印成 PDF 实现起来估计有点困难,利用 JAVA 实现的网上倒是有教程 JAVA 调用打印机输出 PDF 文件。因为 Excel 从 2007 开始可以直接将 Excel 工作表转为 PDF,这里我们考虑用 R 调用 VBA 实现。
# 调用 R 包
library(RDCOMClient) # 仅支持 Windows 系统
# 指定 Excel 文件路径
xlFile = here::here(dest_file) # 这里必须是绝对路径
# 转换后的 PDF 文件与原 Excel 文件路径和名称相同
pdfFile = gsub(".xlsx", ".pdf", xlFile) # 这里必须是绝对路径
# 指定路径下存在同名 PDF 文件则删除
if (file.exists(pdfFile)) file.remove(pdfFile)
# 创建对象,打开工作簿并选中第 1 个工作表
xlApp = COMCreate("Excel.Application")
xlApp[["Visible"]] = TRUE
wb = xlApp[["Workbooks"]]$Open(Filename = xlFile)
sht = wb[["Worksheets"]]$Item(1)
sht$Select()
# 活动工作表转换为 PDF 文件
xlApp[["ActiveSheet"]]$ExportAsFixedFormat(
IgnorePrintAreas = FALSE,
Type = 0, # 输出为 PDF
Filename = pdfFile
)
# 关闭 Excel
xlApp$Quit()
特别需要注意的是:
-
RDCOMClient 包目前只支持 Windows 系统。
-
RDCOMClient 包不支持相对路径。
-
如果已经生成了 PDF 文件,如果再次运行一遍程序,原先生成的 PDF 文件不会被覆盖,执行
ExportAsFixedFormat
函数会报错,所以执行ExportAsFixedFormat
函数前,必须先删除之前已经生成的 PDF 文件。