当前位置：首页 > article >正文

相关性分析和作图

article 2025/2/22 5:41:50

相关性热图

corrplot包corrplot函数

数据处理：计算相关性系数和P值

rm(list = ls())
library(corrplot)
# 计算相关性
corData = cor(mtcars,
              method = "pearson",            # 计算相关性的方法有"pearson", "spearman", "kendall"
              use = "pairwise.complete.obs") # 缺失值处理的方式
# 计算相关性的P值和置信区间
testRes = cor.mtest(mtcars,
                    conf.level = 0.95,       # 置信区间
                    method = "pearson")      # 计算相关性的方法有"pearson", "spearman", "kendall"
corrp <- testRes$p

绘图：

#pdf("plot.pdf",width = 4,height = 4)##保存图片
# 绘图
?corrplot
corrplot(corData,
         method = "square",           # 图案形状 "square"方框,"circle"圆, "ellipse"椭圆, "number"数字, "shade"阴影花纹, "color"颜色方框, "pie饼图"
         type = "full",               # 绘制范围"full"全部, "lower"下半部分, "upper"半部分
         col=colorRampPalette(c('#0000ff','#ffffff','#ff0000'))(100), # 主体颜色
         bg = "white",                # 背景颜色
         # col.lim = c(-1,1),         # 数据颜色的范围，是相关性数据的话，直接is.corr = T就好
         title = "ABCD",             # 标题
         is.corr = T,                 # 输入相关性矩阵，数据范围-1到1
         add = F,                     # 是否在原来的图层上添加图形
         diag = T,                    # 是否显示主对角
         outline = "green",           # 轮廓，True或False或设置颜色
         mar = c(5, 4, 4, 2),         # bottom，left，top，right 指定的边距行数(需要一定的边距才能显示标题)
         addgrid.col = NA,          # 网格线的颜色，NA为不绘制，NULl为默认的灰色
         addCoefasPercent = F,        # 是否把相关性数值改为百分数
         order = "original",          # 排序方式 c("original", "AOE", "FPC", "hclust", "alphabet")
         hclust.method = c("complete", "ward", "ward.D", "ward.D2", "single", "average","mcquitty", "median", "centroid"),
         
         #position of text labels标签参数（好像没有缩短图例的参数）
         tl.pos = "lt",               # 位置'lt', 'ld', 'td', 'd' or 'n'   
         tl.cex = 1,                  # 字体的大小
         tl.col = "black",            # 字体的颜色
         tl.offset = 0.4,             # 标签离图案的距离
         tl.srt = 60,                 # 坐标轴标签旋转角度
         #color-legend;图例参数
         cl.pos = "r",                # 图例位置：r右边 b下边 n不显示
         cl.length = NULL,            # 数字越大，图例的分隔越稠
         cl.cex = 0.8,                # 图例的字体大小
         cl.ratio = 0.2,             # 图例的宽度
         cl.align.text = "c",         # 图例文字的对齐方式 l左对齐 c居中 r右对齐
         cl.offset = 1,             # 图例文字距离图例颜色条的距离 居中时无效
         #数值显示
         number.cex = 1,              # 相关性数字标签的字体大小
         number.font = 2,             # 相关性数字标签的字体
         number.digits = 2,           # 相关性数字标签，保留的小数点位数
         na.label = "",               # 当为NA时，显示的内容
         ## P值矩阵
         p.mat = testRes$p,           
         sig.level = 0.05,            # 当p大于sig.level时触发动作
         insig = "label_sig",         # sig.level, insig, pch, pch.col, pch.cex,label_sig(星号)
         
         #置信区间
         plotCI = "n",                # c("n", "square", "circle", "rect")
         lowCI.mat = testRes$lowCI,   # p值置信区间下边界数据
         uppCI.mat = testRes$uppCI,   # p值置信区间上边界数据
)

添加下三角：上下三角不一致

add = T, diag = F,

#添加图形
corrplot(corData,
         method = "number",            
         type = "lower",   #下三角
         col=colorRampPalette(c('#0000ff','#ffffff','#ff0000'))(100), # 主体颜色
         add = T,                      # 是否在原来的图层上添加图形
         diag = F,                     # 是否显示主对角
         order = "original",          
         na.label = "",                # 当为NA时，显示的内容
         p.mat = testRes$p,            # P值矩阵
         sig.level = 0.05,             # 当p大于sig.level时触发动作
)
dev.off()

ggcorrplot包ggcorrplot函数

数据

rm(list = ls())
library(ggcorrplot)
library(ggtext)
data(mtcars)
corr <- round(cor(mtcars), 2)#相关系数（保留2位小数）
p.mat <- cor_pmat(mtcars)##P值

作图

(这个图例要小很多)

?ggcorrplot
ggcorrplot(corr, method = "square", #"square", "circle"
           type ="full" , #full完全(默认)，lower下三角，upper上三角
           ggtheme = ggplot2::theme_minimal,
           title = "ABCD",
           show.legend = TRUE,  #是否显示图例。
           legend.title = "CorrA", #指定图例标题。
           show.diag =T ,    #对角线
           colors = c("blue", "white", "red"), #颜色设置
           outline.color = "white", #指定方形或圆形的边线颜色
           hc.order = FALSE,  #是否按hclust(层次聚类顺序)排列
           ##显示相关性系数设置
           lab =F , #是否添加相关系数
           lab_col = "black", #相关系数的颜色，只有当lab=TRUE时有效
           lab_size = 4, #指定相关系数大小，只有当lab=TRUE时有效。
           
           #P值显示
           p.mat = p.mat ,  #p.mat= p_mat,insig= "pch", pch.col= "red", pch.cex= 4,
           sig.level = 0.05,#P值
           insig = c("pch", "blank"),#显示X
           pch = 8, #8为星号
           pch.cex = 4, #大小
           #标签
           tl.cex = 10, #指定变量文本的大小，
           tl.col = "black", #指定变量文本的颜色，
           tl.srt = 45, #指定变量文本的旋转角度。
           digits = 2 #指定相关系数的显示小数位数(默认2)。
           
)
dev.off()

如果需要显示相关性系数：展示

##显示相关性系数设置
lab =T , #是否添加相关系数
lab_col = "black", #相关系数的颜色，只有当lab=TRUE时有效
lab_size = 4, #指定相关系数大小，只有当lab=TRUE时有效。

计算矩阵后使用pheatmap作图

pheatmap作图可以更好的展示P值

数据准备

rm(list = ls())
library(ggcorrplot)
library(ggtext)
library(psych)
library(pheatmap)
library(reshape2)

data(mtcars)
corr <- round(cor(mtcars), 2)
p.mat <- cor_pmat(mtcars)
table(p.mat<0.05)#P计数

##对所有p值进行判断，p<0.01的以“**”标注，p值0.01<p<0.05的以“*”标注
if (!is.null(p.mat)){
  ssmt <- p.mat< 0.01
  p.mat[ssmt] <-'**'
  smt <- p.mat >0.01& p.mat <0.05
  p.mat[smt] <- '*'
  p.mat[!ssmt&!smt]<- ''
} else {
  p.mat <- F
}

作图

#自定义颜色范围
mycol<-colorRampPalette(c("blue","white","tomato"))(100)
#绘制热图,可根据个人需求调整对应参数
?pheatmap
pheatmap(corr,
         scale = "none",#均一化处理
         cluster_row = T, #行距类
         cluster_col = T, #列聚类
         treeheight_col = 0, #设置为0 即不显示聚类树
         treeheight_row = 20,#行聚类树
         border=NA,#边框颜色
         display_numbers = p.mat,##显著性标记
         fontsize_number = 12, 
         number_color = "white",
         cellwidth = 20, #格子宽度
         cellheight =20,#格子高度
         color=mycol,#颜色
         legend=T,#是否显示图例
         main="ABCD")#标题
dev.off()