这里我们以著名的鸢尾花数据为例。其中 Species 字段包含三种类型的鸢尾花名称,分别是 setosa、versicolor、virginica,现在我们需要将 iris 数据集按照 Species 字段拆分成三个数据框,并分别用 group_nest 和 split 两种方式将数据写入到 Excel 文件,包括:
- 写入到 3 个 Excel
- 写入到 1 个 Excel 的 3个 Sheet
- 写入到 1 个 Excel 的 1 个 Sheet 的 3 个部分
# 加载相关 R 包
# 加载相关R包
library(openxlsx)
library(tidyverse)
# dplyr::group_nest() 函数实现
# 写入 3 个 Excel 文件
iris %>%
group_nest(Species, keep = TRUE) %>%
mutate(dest_file = paste0(Species, ".xlsx")) %>%
# pwalk(\(...) write.xlsx(..2, ..3))
pwalk(\(data, dest_file, ...) write.xlsx(data, dest_file))
# 写入 1 个 Excel 的 3 个 Sheet
iris %>%
group_nest(Species, keep = TRUE) -> grouped_data
wb = createWorkbook()
walk2(
grouped_data$Species,
grouped_data$data,
\(sht, obj) {
addWorksheet(wb, as.character(sht))
writeData(wb, sheet = sht, x = obj)
}
)
saveWorkbook(wb, file = "iris.xlsx")
# 写入 1 个 Excel 的 1 个 Sheet 的 3 部分
# 设置基本参数
blank_row = 3 # 模块之间的空行
blank_col = 1 # 模块之间的空列
col_layout = 2 # 多栏显示
# 计算每一个模块写入工作表时的行、列位置
start_row = rep(
seq(
from = 1,
by = nrow(iris) / 3 + 1 + blank_row, # 模块之间空行
length.out = ceiling(3 / col_layout)
),
each = col_layout, # 多栏显示
length.out = 3
)
start_col = rep(
seq(
from = 1,
by = ncol(iris) + blank_col, # 模块之间空列
length.out = col_layout # 多栏显示
),
times = ceiling(3 / col_layout),
length.out = 3
)
# 对鸢尾花数据进行处理
iris %>%
group_nest(Species, keep = TRUE) %>%
mutate(
start_row = start_row,
start_col = start_col
) -> nested_iris
# 定义一个数据写入工作簿的函数
f = \(data, start_row, start_col, ...) {
writeData(
wb = wb,
sheet = "iris",
x = data,
startRow = start_row,
startCol = start_col,
colNames = TRUE,
borders = "all",
borderColour = "blue",
headerStyle = createStyle(
fontColour = "#ffffff",
fgFill = "#4F80BD",
halign = "center",
valign = "center",
textDecoration = "bold",
border = "TopBottomLeftRight"
)
)
}
# 批量写入并保存
wb = createWorkbook()
addWorksheet(wb, sheetName = "iris")
pwalk(nested_iris, f)
saveWorkbook(wb, "nested_iris.xlsx")
# split() 函数实现
# 写入 3 个 Excel 文件
iris %>%
split(.$Species) %>%
walk2(., paste0(levels(iris$Species), ".xlsx"), write.xlsx)
# 写入 1 个 Excel 的 3 个 Sheet
iris %>%
split(.$Species) %>%
write.xlsx("iris.xlsx")
# 写入 1 个 Excel 的 1 个 Sheet 的 3 部分
# 设置基本参数
blank_row = 3 # 模块之间的空行
blank_col = 1 # 模块之间的空列
col_layout = 2 # 多栏显示
# 计算每一个模块写入工作表时的行、列位置
start_row = rep(
seq(
from = 1,
by = nrow(iris) / 3 + 1 + blank_row, # 模块之间空行
length.out = ceiling(3 / col_layout)
),
each = col_layout, # 多栏显示
length.out = 3
)
start_col = rep(
seq(
from = 1,
by = ncol(iris) + blank_col, # 模块之间空列
length.out = col_layout # 多栏显示
),
times = ceiling(3 / col_layout),
length.out = 3
)
# 对鸢尾花数据进行处理
iris %>%
split(.$Species) %>%
list(
data = .,
start_row = start_row,
start_col = start_col
) -> splited_iris
# 定义一个数据写入工作簿的函数
f = \(data, start_row, start_col) {
writeData(
wb = wb,
sheet = "iris",
x = data,
startRow = start_row,
startCol = start_col,
colNames = TRUE,
borders = "all",
borderColour = "blue",
headerStyle = createStyle(
fontColour = "#ffffff",
fgFill = "#4F80BD",
halign = "center",
valign = "center",
textDecoration = "bold",
border = "TopBottomLeftRight"
)
)
}
# 批量写入并保存
wb = createWorkbook()
addWorksheet(wb, sheetName = "iris")
pwalk(splited_iris, f)
saveWorkbook(wb, "splited_iris.xlsx")