楚新元 | All in R

Welcome to R Square

用 R 批量拆分数据后写入 Excel

楚新元 / 2024-04-07


工作中经常会遇到数据拆分和合并问题。例如,需要将多个部门的考核表汇总后,按照数据提供部门分组拆分到多个文件中,方便数据提供部门报送数据。其中将多个文件批量汇总比较简单,可以参考我之前发的博文用 R 批量汇总 Excel 文件里的数据,汇总后,按照某个关键字段拆分后写入到多个文件以前也不难,可能许多读者工作中也会遇到,将来的我也可能再次遇到,所以有必要写一篇博文记录下代码实现的整个过程。

这里我们以著名的鸢尾花数据为例。其中 Species 字段包含三种类型的鸢尾花名称,分别是 setosa、versicolor、virginica,现在我们需要将 iris 数据集按照 Species 字段拆分成三个数据框,并分别用 group_nest 和 split 两种方式将数据写入到 Excel 文件,包括:

加载相关 R 包

# 加载相关 R 包
library(openxlsx)
library(tidyverse)

dplyr::group_nest() 函数实现

写入 3 个 Excel 文件

iris %>% 
  group_nest(Species, keep = TRUE) %>% 
  mutate(dest_file = paste0(Species, ".xlsx")) %>% 
  # pwalk(\(...) write.xlsx(..2, ..3))
  pwalk(\(data, dest_file, ...) write.xlsx(data, dest_file))

写入 1 个 Excel 文件的 3 个 Sheet

iris %>% 
  group_nest(Species, keep = TRUE) -> grouped_data

wb = createWorkbook()
walk2(
  grouped_data$Species,
  grouped_data$data,
  \(sht, obj) {
    addWorksheet(wb, as.character(sht))
    writeData(wb, sheet = sht, x = obj)
  }
)
saveWorkbook(wb, file = "iris.xlsx")

写入 1 个 Excel 文件的 1 个 Sheet 的 3 个部分

# 设置基本参数
blank_row = 3  # 模块之间的空行
blank_col = 1  # 模块之间的空列
col_layout = 2  # 多栏显示

# 计算每一个模块写入工作表时的行、列位置
start_row = rep(
  seq(
    from = 1, 
    by = nrow(iris) / 3 + 1 + blank_row,  # 模块之间空行
    length.out = ceiling(3 / col_layout)
  ),
  each = col_layout,  # 多栏显示
  length.out = 3
)

start_col = rep(
  seq(
    from = 1,
    by = ncol(iris) + blank_col,  # 模块之间空列
    length.out = col_layout  # 多栏显示
  ),
  times = ceiling(3 / col_layout),
  length.out = 3
)

# 对鸢尾花数据进行处理
iris %>% 
  group_nest(Species, keep = TRUE) %>% 
  mutate(
    start_row = start_row,
    start_col = start_col
  ) -> nested_iris

# 定义一个数据写入工作簿的函数
f = \(data, start_row, start_col, ...) {
  writeData(
    wb = wb, 
    sheet = "iris",
    x = data,
    startRow = start_row,
    startCol = start_col,
    colNames = TRUE,
    borders = "all",
    borderColour = "blue",
    headerStyle = createStyle(
      fontColour = "#ffffff", 
      fgFill = "#4F80BD",
      halign = "center", 
      valign = "center", 
      textDecoration = "bold",
      border = "TopBottomLeftRight"
    )
  )
}

# 批量写入并保存
wb = createWorkbook()
addWorksheet(wb, sheetName = "iris")
pwalk(nested_iris, f)
saveWorkbook(wb, "nested_iris.xlsx")

split() 函数实现

写入 3 个 Excel 文件

iris %>% 
  split(.$Species) %>% 
  walk2(., paste0(levels(iris$Species), ".xlsx"), write.xlsx)

写入 1 个 Excel 文件的 3 个 Sheet

iris %>% 
  split(.$Species) %>% 
  write.xlsx("iris.xlsx")

写入 1 个 Excel 文件的 1 个 Sheet 的 3 个部分

# 设置基本参数
blank_row = 3  # 模块之间的空行
blank_col = 1  # 模块之间的空列
col_layout = 2  # 多栏显示

# 计算每一个模块写入工作表时的行、列位置
start_row = rep(
  seq(
    from = 1, 
    by = nrow(iris) / 3 + 1 + blank_row,  # 模块之间空行
    length.out = ceiling(3 / col_layout)
  ),
  each = col_layout,  # 多栏显示
  length.out = 3
)

start_col = rep(
  seq(
    from = 1,
    by = ncol(iris) + blank_col,  # 模块之间空列
    length.out = col_layout  # 多栏显示
  ),
  times = ceiling(3 / col_layout),
  length.out = 3
)

# 对鸢尾花数据进行处理
iris %>% 
  split(.$Species) %>% 
  list(
    data = .,
    start_row = start_row,
    start_col = start_col
  ) -> splited_iris

# 定义一个数据写入工作簿的函数
f = \(data, start_row, start_col) {
  writeData(
    wb = wb, 
    sheet = "iris",
    x = data,
    startRow = start_row,
    startCol = start_col,
    colNames = TRUE,
    borders = "all",
    borderColour = "blue",
    headerStyle = createStyle(
      fontColour = "#ffffff", 
      fgFill = "#4F80BD",
      halign = "center", 
      valign = "center", 
      textDecoration = "bold",
      border = "TopBottomLeftRight"
    )
  )
}

# 批量写入并保存
wb = createWorkbook()
addWorksheet(wb, sheetName = "iris")
pwalk(splited_iris, f)
saveWorkbook(wb, "splited_iris.xlsx")