当前位置：首页 > article >正文

扩增子分析|零模型2——基于βNTI的微生物随机性和确定性装配过程（箱线图和柱状图R中实现）

article 2025/2/6 17:06:55

一、引言

我们之前发布的周集中老师团队零模型R中实战案例：扩增子分析|基于零模型的群落确定性和随机性构建过程——R实战_bmntd-CSDN博客。在文末只输出了一个.csv 表格。并没有提供绘图的方法，有小伙伴问如何在R中一键成图呢？还真可以！

小伙伴建议绘制带显著性检验的箱线图以及柱状图，本文提供了后续相关代码。如下图所示，绘制出漂亮的箱线图和堆叠柱状图将为论文增色不少。

二、绘图代码

2.1 绘制箱线图

代码如下：

# 安装所需的包（如果尚未安装）
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")

# 载入必要的包
library(dplyr)
library(ggplot2)

data<- read.csv("Ecological_process_bNTI_RC.csv")

# 创建分组,抽提组间比较的样本，ABCD可换为你自己数据
# 对应的组别，多于的组建议加一行代码
# grepl('^E', name1) & grepl('^E', name2) ~ 'E'
data_combined <- data %>%
  mutate(Group = case_when(
    grepl('^A', name1) & grepl('^A', name2) ~ 'A',
    grepl('^B', name1) & grepl('^B', name2) ~ 'B',
    grepl('^C', name1) & grepl('^C', name2) ~ 'C',
    grepl('^D', name1) & grepl('^D', name2) ~ 'D',
    TRUE ~ 'Other'  # 将不符合以上条件的样本标记为 'Other'
  ))
# 仅保留 'A', 'B', 'C', 'D' 组的数据
data_filtered <- data_combined %>%
  filter(Group %in% c('A', 'B', 'C', 'D'))

# 按分组进行 Shapiro-Wilk 检验分组数据是否正态
#p值 > 0.05数据符合正态分布；p 值 ≤ 0.05数据不符合正态分布
shapiro_test_results <- data_filtered %>%
  group_by(Group) %>%
  summarise(
    p_value = shapiro.test(bNTI)$p.value
  )

print(shapiro_test_results)

# 计算每个组的 t 检验并添加显著性标记
t_test_results <- lapply(c('B', 'C', 'D'), function(group) {
  group_data <- data_filtered %>% filter(Group %in% c('A', group))
  t_test_result <- t.test(bNTI ~ Group, data = group_data)
  p_value <- t_test_result$p.value
  
  # 计算显著性标记
  significance_marker <- if (p_value < 0.001) {
    "***"
  } else if (p_value < 0.01) {
    "**"
  } else if (p_value < 0.05) {
    "*"
  } else {
    "ns"
  }
  
  # 创建包含结果和显著性标记的数据框
  data.frame(Group = group, p_value = p_value, Significance = significance_marker)
})

# 合并结果为一个数据框
t_test_results_df <- bind_rows(t_test_results)

# 绘图
ggplot(data_filtered, aes(x = Group, y = bNTI, color = Group)) +
  geom_boxplot(outlier.shape = NA) +  # 不显示箱线图的离群点
  geom_jitter(width = 0.2, alpha = 0.7) +  # 添加散点图，调整宽度和透明度
  geom_text(data = t_test_results_df, aes(x = Group, y = max(data_filtered$bNTI) -0.01, label = Significance), 
            vjust = 0) +
  geom_hline(yintercept = c(-2, 2), linetype = "dashed", color = "black") +  # 添加 y = -2 和 y = 2 的虚线
    labs(y = 'bNTI') +
  theme_minimal() +
  theme(axis.text.x = element_text(face = "plain", angle = 0, hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA, size = 1),
        plot.border = element_rect(color = "black", fill = NA, size = 1))

输出结果：

注意：ns代表不显著，* 代表p <0.05， **代表 p< 0.01, ***代表 p<0.001

2.2 绘制堆叠柱状图

代码如下：

！！！要接着代码一之后运行

#绘制堆叠柱状图
# 计算每个组中每种生态过程的百分比
data_percent <- data_filtered %>%
  group_by(Group, EcologicalProcess) %>%
  summarise(Count = n(), .groups = 'drop') %>%  # 处理分组警告
  group_by(Group) %>%
  mutate(Percentage = Count / sum(Count) * 100) %>%
  ungroup()

# 绘制堆叠柱状图，按百分比排列
ggplot(data_percent, aes(x = Group, y = Percentage, fill = EcologicalProcess)) +
  geom_bar(stat = "identity", position = "stack") +  # 使用堆叠柱状图
  labs(y = 'Ecological Processes (%)') +
  theme()

输出结果：