当前位置：首页 > article >正文

【单细胞-第三节多样本数据分析】

article 2025/1/30 23:59:18

文件在单细胞\5_GC_py\1_single_cell\1.GSE183904.Rmd
GSE183904
数据原文

1.获取临床信息

筛选样本可以参考临床信息

rm(list = ls())
library(tinyarray)
a = geo_download("GSE183904")$pd
head(a)
table(a$Characteristics_ch1) #统计各样本有多少

2.批量读取

学会如何读取特定的样本

if(!file.exists("f.Rdata")){
  #untar("GSE183904_RAW.tar",exdir = "GSE183904_RAW")
  fs = dir("GSE183904_RAW/")[c(2,7)] #dir("GSE183904_RAW/"),列出所有文件
  #为了省点内存只做2个样本，去掉[c(2,7)]即做全部样本
  f = lapply(paste0("GSE183904_RAW/",fs),read.csv,row.names = 1)
  #row.names = 1写在lapply的括号里，但是它是read.csv的参数
  fs = stringr::str_split_i(fs,"_",1)
  names(f) = fs
  save(f,file = "f.Rdata")
}
load("f.Rdata")
library(Seurat)
scelist = list()
for(i in 1:length(f)){
  scelist[[i]] <- CreateSeuratObject(counts = f[[i]], 
                                     project = names(f)[[i]])
  print(dim(scelist[[i]]))
}
sce.all = merge(scelist[[1]],scelist[-1])
sce.all = JoinLayers(sce.all)  #连接数据

head(sce.all@meta.data)
table(sce.all$orig.ident)

3.质控指标

sce.all[["percent.mt"]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")

head(sce.all@meta.data, 3)

VlnPlot(sce.all, 
        features = c("nFeature_RNA",
                     "nCount_RNA", 
                     "percent.mt",
                     "percent.rp",
                     "percent.hb"),
        ncol = 3,pt.size = 0, group.by = "orig.ident")

4.整合降维聚类分群

f = "obj.Rdata"
library(harmony)
if(!file.exists(f)){
  sce.all = sce.all %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    ScaleData(features = rownames(.)) %>%  
    RunPCA(pc.genes = VariableFeatures(.))  %>%
    RunHarmony("orig.ident") %>% #RunHarmony 包，整合多个样本，处理多样本的必备步骤
    FindNeighbors(dims = 1:15, reduction = "harmony") %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15,reduction = "harmony") %>% 
    #reduction = "harmony"必须写上
    RunTSNE(dims = 1:15,reduction = "harmony")
  save(sce.all,file = f)
}
load(f)
ElbowPlot(sce.all)
UMAPPlot(sce.all,label = T)
TSNEPlot(sce.all,label = T)

5.手动注释

markers = read.delim("GCmarker.txt",header = F,sep = ";")
library(tidyr)
markers = separate_rows(markers,V2,sep = ",") #拆分marker
markers = split(markers$V2,markers$V1)
DotPlot(sce.all,features = markers,cols = "RdYlBu")+
  RotatedAxis()
ggplot2::ggsave("dotplot.png",height = 10,width = 25)
writeLines(paste0(as.character(0:13),","))
names(markers)

celltype = read.csv("celltype.csv",header = F) #自己照着DotPlot图填的
celltype


new.cluster.ids <- celltype$V2
names(new.cluster.ids) <- levels(sce.all)
seu.obj <- RenameIdents(sce.all, new.cluster.ids)
save(seu.obj,file = "seu.obj.Rdata")
p1 <- DimPlot(seu.obj, 
              reduction = "tsne", 
              label = TRUE, 
              pt.size = 0.5) + NoLegend()
p1

6.自动注释

SingleR完成自主注释，不同的是scRNA = sce.all

library(celldex)
library(SingleR)
ls("package:celldex")
f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <- celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = sce.all
test = scRNA@assays$RNA@layers$data
rownames(test) = Features(scRNA)
colnames(test) = Cells(scRNA)
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
pred.scRNA$pruned.labels
#查看注释准确性 
plotScoreHeatmap(pred.scRNA, clusters=pred.scRNA@rownames, fontsize.row = 9,show_colnames = T)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
levels(scRNA)
p2 <- DimPlot(scRNA, reduction = "tsne",label = T,pt.size = 0.5) + NoLegend()
p1+p2

7.marker基因

找不同细胞类型间的差异基因

f = "markers.Rdata"
if(!file.exists(f)){
  allmarkers <- FindAllMarkers(seu.obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
  save(allmarkers,file = f)
}
load(f)
head(allmarkers)

如果想自行修改orig.ident:使用下边的代码：

sce.all@meta.data$orig.ident=rep(c("a","b"),times= c(ncol(scelist[[1]]),
ncol(scelistl[[2]])))

查看全文

http://www.kler.cn/a/525271.html

全程Kali linux---CTFshow misc入门（14-24）

Node.js 的底层原理

Linux pkill 命令使用详解

(done) MIT6.S081 2023 学习笔记 (Day6: LAB5 COW Fork)

多级缓存（亿级并发解决方案）

第十六届蓝桥杯大赛软件赛（编程类）知识点大纲

大模型（LLM）工程师实战之路（含学习路线图、书籍、课程等免费资料推荐）

为AI聊天工具添加一个知识系统之78 详细设计之19 正则表达式之6

租赁系统为企业资产管理提供高效解决方案促进业务增长与创新

premierePro 2022创建序列方式

为AI聊天工具添加一个知识系统之77 详细设计之18 正则表达式之5

高级同步工具解析

认识小程序页面，小程序的宿主环境

Python 类型注解

新手项目管理的实用工具推荐

《探秘人工智能：从基础到未来变革》

U盘打开提示格式化：深度解析与数据恢复全攻略

如何在 PowerPoint 中新建幻灯片？

2025 春节联欢晚会魔术揭秘

C语言初阶牛客网刷题—— HJ97 记负均正【难度：简单】

飞桨PaddleNLP套件中使用DeepSeek r1大模型

Thinkphp+Uniapp开发的多端商城系统源码H5小程序APP支持DIY模板直播分销（亲测）

Lustre Core 语法 - 数组操作表达式

使用八爪鱼爬虫和Web Scraper抓取数据实战案例，附详细教程

《翻之》

DeepSeek R1：高效经济的RAG系统构建利器