单细胞组间比较分析
单细胞组间比较分析
这篇文章介绍的是有分组的单细胞数据怎样分析,数据来自GEO的GSE,有个treat,个control样本,代码完整,可以自行下载数据跑一跑,但请注意细胞数量是6w,对计算资源要求较高,自己的电脑跑不动,需要在服务器上跑。1.整理数据因为数据组织的不是每个样本一个文件夹的形式,所以需要自行整理,参考代码如下,注意这段改名的代码不要反复运行:代码语言:javascr
单细胞组间比较分析
这篇文章介绍的是有分组的单细胞数据怎样分析,数据来自GEO的GSE21920,有个treat,个control样本,代码完整,可以自行下载数据跑一跑,但请注意细胞数量是6w,对计算资源要求较高,自己的电脑跑不动,需要在服务器上跑。
1.整理数据
因为数据组织的不是每个样本一个文件夹的形式,所以需要自行整理,参考代码如下,注意这段改名的代码不要反复运行:
代码语言:javascript代码运行次数:0运行复制#untar("GSE21920_",exdir = "GSE21920_RAW")
#unlink("GSE21920_")
library(stringr)
fs = paste0("GSE21920_RAW/",dir("GSE21920_RAW/"))
fs
samples = dir("GSE21920_RAW/") %>% str_split_i("_",2) %>% unique();samples
#为每个样本创建单独的文件夹
lapply(samples, function(s){
ns = paste0("01_data/",s)
if(!(ns))(ns,recursive = T)
})
#每个样本的三个文件复制到单独的文件夹
lapply(fs, function(s){
#s = fs[1]
for(i in 1:length(samples)){
#i = 1
if(str_detect(s,samples[[i]])){
(s,paste0("01_data/",samples[[i]]))
}
}
})
#文件名字修改
on = paste0("01_data/",dir("01_data/",recursive = T));on
nn = str_remove(on,"GSM\\d+_sample\\d_");nn
file.rename(on,nn)
代码主要参考:
代码语言:javascript代码运行次数:0运行复制rm(list = ls())
2.批量读取
代码语言:javascript代码运行次数:0运行复制rm(list = ls())
library(Seurat)
f = dir("01_data/")
scelist = list()
for(i in 1:length(f)){
pda <- Read10X(paste0("01_data/",f[[i]]))
scelist[[i]] <- CreateSeuratObject(counts = pda,
project = f[[i]])
print(dim(scelist[[i]]))
}
## [1] 58 10218
## [1] 58 891
## [1] 58 8607
## [1] 58 127
## [1] 58 1108
## [1] 58 10821
sce.all = merge(scelist[[1]],scelist[-1])
sce.all = JoinLayers(sce.all)
head(sce.all@meta.data)
## orig.ident nCount_RA nFeature_RA
## AAACCCACACAAATAG-1_1 sample1 5624 159
## AAACCCACAGGCTCTG-1_1 sample1 6854 1798
## AAACCCACAGGTCCCA-1_1 sample1 5659 146
## AAACCCACAGTCGGTC-1_1 sample1 424 1256
## AAACCCAGTTTGGCTA-1_1 sample1 5105 156
## AAACCCATCCCATAAG-1_1 sample1 817 1495
table(sce.all$orig.ident)
##
## sample1 sample2 sample sample4 sample5 sample6
## 10218 891 8607 127 1108 10821
sum(table(Idents(sce.all)))
## [1] 6248
.质控指标
代码语言:javascript代码运行次数:0运行复制sce.all[[""]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")
head(sce.all@meta.data, )
## orig.ident nCount_RA nFeature_RA percent.rp
## AAACCCACACAAATAG-1_1 sample1 5624 159 6.4544808 4.726174
## AAACCCACAGGCTCTG-1_1 sample1 6854 1798 0.148107 7.640962
## AAACCCACAGGTCCCA-1_1 sample1 5659 146 .4811804 0.04064
## percent.hb
## AAACCCACACAAATAG-1_1 0.00000000
## AAACCCACAGGCTCTG-1_1 0.00000000
## AAACCCACAGGTCCCA-1_1 0.01767097
VlnPlot(sce.all,
features = c("nFeature_RA",
"nCount_RA",
"",
"percent.rp",
"percent.hb"),
ncol = ,pt.size = 0, group.by = "orig.ident")
sce.all = subset(sce.all,<25)
4.整合降维聚类分
代码语言:javascript代码运行次数:0运行复制f = "obj.Rdata"
library(harmony)
if(!(f)){
sce.all = sce.all %>%
ormalizeData() %>%
FindVariableFeatures() %>%
ScaleData(features = rownames(.)) %>%
RunPCA(pc.genes = VariableFeatures(.)) %>%
RunHarmony("orig.ident") %>%
Findeighbors(dims = 1:15, reduction = "harmony") %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15,reduction = "harmony") %>%
RunTSE(dims = 1:15,reduction = "harmony")
save(sce.all,file = f)
}
load(f)
ElbowPlot(sce.all)
UMAPPlot(sce.all,label = T)
TSEPlot(sce.all,label = T)
5.注释
代码语言:javascript代码运行次数:0运行复制library(celldex)
library(SingleR)
ls("package:celldex")
## [1] "BlueprintEncodeData" "DatabaseImmuneCellExpressionData"
## [] "HumanPrimaryCellAtlasData" "ImmGenData"
## [5] "MonacoImmuneData" "MouseRAseqData"
## [7] "overshternHematopoieticData"
f = "ref_HumanPrimaryCellAtlasData.RData"
if(!(f)){
ref <- celldex::HumanPrimaryCellAtlasData()
save(ref,file = f)
}
ref <- list(get(load("ref_BlueprintEncode.RData")),
get(load("ref_HumanPrimaryCellAtlasData.RData")))
library(BiocParallel)
scRA = sce.all
test = scRA@assays$RA$data
pred.scRA <- SingleR(test = test,
ref = ref,
labels = list(ref[[1]]$,ref[[2]]$),
clusters = scRA@active.ident)
pred.scRA$pruned.labels
## [1] "CD8+ T-cells" "B-cells" "Fibroblasts"
## [4] "CD4+ T-cells" "CD8+ T-cells" "eutrophils"
## [7] "Endothelial_cells" "Fibroblasts" "Monocytes"
## [10] "Macrophages" "Fibroblasts" "Adipocytes"
## [1] "B-cells" "K cells" "Monocytes"
## [16] A "Endothelial cells" "Monocytes"
## [19] "eutrophils" "Fibroblasts" "Adipocytes"
#查看注释准确性
#plotScoreHeatmap(pred.scRA, clusters=pred.scRA@rownames, fontsize.row = 9,show_colnames = T)
ids <- pred.scRA$pruned.labels
ids[(ids)] = "unknown"
names(ids) <- levels(scRA)
levels(scRA)
## [1] "0" "1" "2" "" "4" "5" "6" "7" "8" "9" "10" "11" "12" "1" "14"
## [16] "15" "16" "17" "18" "19" "20"
scRA <- RenameIdents(scRA,ids)
levels(scRA)
## [1] "CD8+ T-cells" "B-cells" "Fibroblasts"
## [4] "CD4+ T-cells" "eutrophils" "Endothelial_cells"
## [7] "Monocytes" "Macrophages" "Adipocytes"
## [10] "K cells" "unknown" "Endothelial cells"
p2 <- DimPlot(scRA, reduction = "umap",label = T,pt.size = 0.5) + oLegend()
p2
6.分组可视化及组件细胞比例比较
代码语言:javascript代码运行次数:0运行复制scRA$seurat_annotati = Idents(scRA)
table(scRA$orig.ident)
##
## sample1 sample2 sample sample4 sample5 sample6
## 10109 80 8510 1259 10874 1062
library(tinyarray)
pd = geo_download("GSE21920")$pd
pd$title
## [1] "IgG4-RD1" "IgG4-RD2" "IgG4-RD" "Control1" "Control2" "Control"
scRA$group = ifelse(scRA$orig.ident %in% c("sample1","sample2","sample"), "treat","control")
DimPlot(scRA, reduction = "umap", group.by = "group")
可以计算每个亚的细胞数量和占全部细胞的比例
代码语言:javascript代码运行次数:0运行复制# 每种细胞的数量和比例
cell_counts <- table(Idents(scRA))
cell.all <- cbind(cell_counts = cell_counts,
cell_Freq = round((cell_counts)*100,2))
#各组中每种细胞的数量和比例
group <- table(Idents(scRA), scRA$group)
cell.freq.group <- round((group, margin = 2) *100,2)
cell.all = cbind(cell.all,group,cell.freq.group)
cell.all = cell.all[,c(1,,4,2,5,6)]
colnames(cell.all) = paste(rep(c("all","control","treat"),times = 2),
rep(c("count","freq"),each = ),sep = "_")
cell.all
## all_count control_count treat_count all_freq control_freq
## CD8+ T-cells 15077 5518 9559 24.71 16.19
## B-cells 9089 2870 6219 14.90 8.42
## Fibroblasts 1256 1077 158 20.25 1.60
## CD4+ T-cells 707 20 4770 11.59 6.76
## eutrophils 551 4114 199 9.04 12.07
## Endothelial_cells 26 260 876 5.0 6.92
## Monocytes 2948 194 1014 4.8 5.67
## Macrophages 209 1661 42 .4 4.87
## Adipocytes 1468 122 146 2.41 .88
## K cells 1200 695 505 1.97 2.04
## unknown 51 186 45 0.87 0.55
## Endothelial cells 428 54 74 0.70 1.04
## treat_freq
## CD8+ T-cells 5.51
## B-cells 2.10
## Fibroblasts 5.88
## CD4+ T-cells 17.72
## eutrophils 5.20
## Endothelial_cells .25
## Monocytes .77
## Macrophages 1.60
## Adipocytes 0.54
## K cells 1.88
## unknown 1.28
## Endothelial cells 0.27
7.差异分析
某种细胞在不同组间的差异基因
代码语言:javascript代码运行次数:0运行复制if(!require("multtest"))BiocManager::install('multtest')
if(!require("metap"))install.packages('metap')
table(scRA$seurat_annotati)
##
## CD8+ T-cells B-cells Fibroblasts CD4+ T-cells
## 15077 9089 1256 707
## eutrophils Endothelial_cells Monocytes Macrophages
## 551 26 2948 209
## Adipocytes K cells unknown Endothelial cells
## 1468 1200 51 428
<- FindCervedMarkers(scRA, ident.1 = "K cells", grouping.var = "group", min.pct = 0.25, = 0.25,verbose = F)
head()
## treat_p_val treat_avg_log2FC treat_pct.1 treat_pct.2 treat_p_val_adj
## GLY 0 6.695047 0.949 0.02 0
## KLRD1 0 5.818575 0.950 0.07 0
## GZMB 0 5.41818 0.909 0.09 0
## KG7 0 4.85098 0.998 0.148 0
## PRF1 0 5.655445 0.897 0.054 0
## CTSW 0 4.97424 0.865 0.046 0
## control_p_val control_avg_log2FC control_pct.1 control_pct.2
## GLY 0 6.275650 0.944 0.06
## KLRD1 0 5.566265 0.961 0.041
## GZMB 0 5.895107 0.97 0.0
## KG7 0 5.118054 1.000 0.10
## PRF1 0 5.515456 0.902 0.046
## CTSW 0 4.268425 0.919 0.094
## control_p_val_adj max_pval minimump_p_val
## GLY 0 0 0
## KLRD1 0 0 0
## GZMB 0 0 0
## KG7 0 0 0
## PRF1 0 0 0
## CTSW 0 0 0
组间比较的气泡图
代码语言:javascript代码运行次数:0运行复制plot = c("CDD", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GLY", "KG7", "CCL5",
"CD8A", "MS4A1", "CD79A", "MIR155HG", "ME1", "FCGRA", "VMO1", "CCL2", "S100A9", "HLA-DQA1",
"GPR18", "PPBP", "GG11", "HBA2", "HBB", "TSPA1", "ILRA", "PRSS57") #一组感兴趣的基因
#如果idents有A会报错
#scRA <- subset(scRA, seurat_annotati %in% (scRA$seurat_annotati))
DotPlot(scRA, features = plot, cols = c("blue", "red"),
dot.scale = 8, split.by = "group") +
RotatedAxis()
FeaturePlot(scRA, features = c("CDD", "GLY", "IFI6"), split.by = "group", = , cols = c("grey",
"red"), reduction = "umap")
plots <- VlnPlot(scRA, features = c("LYZ", "ISG15", "CXCL10"), split.by = "group", group.by = "seurat_annotati",
pt.size = 0, combine = FALSE)
library(patchwork)
wrap_plots(plots = plots, ncol = 1)
8.伪bulk 转录组差异分析
每个组要有多个样本才能做
代码语言:javascript代码运行次数:0运行复制bulk <- AggregateExpression(scRA, return.seurat = T, slot = "counts", assays = "RA", group.by = c("seurat_annotati","orig.ident", "group"))
sub <- subset(bulk, seurat_annotati == "CD8+ T-cells")
Idents(sub) <- "group"
de_markers <- FindMarkers(sub, ident.1 = "treat", ident.2 = "control", slot = "counts", test.use = "DESeq2",
verbose = F)
de_markers$gene <- rownames(de_markers)
k1 = de_markers$avg_log2FC< -1 & de_markers$p_val <0.01
k2 = de_markers$avg_log2FC> 1 & de_markers$p_val <0.01
de_markers$change <- ifelse(k1,"down",ifelse(k2,"up","not"))
library(ggplot2)
library(ggrepel)
ggplot(de_markers, aes(avg_log2FC, -log10(p_val),color = change)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_vline(xintercept = c(1,-1),linetype = 4)+
geom_hline(yintercept = -log10(0.01),linetype = 4)+
scale_color_manual(values = c("blue","grey","red"))+
theme_bw() +
ylab("-log10(unadjusted p-value)")
#感谢您对电脑配置推荐网 - 最新i3 i5 i7组装电脑配置单推荐报价格的认可,转载请说明来源于"电脑配置推荐网 - 最新i3 i5 i7组装电脑配置单推荐报价格
上传时间: 2025-07-26 17:36:23
上一篇:R数据框一个有趣的小问题
下一篇:CellChat细胞通讯
推荐阅读
留言与评论(共有 15 条评论) |
本站网友 qq1234 | 7分钟前 发表 |
verbose = F) de_markers$gene <- rownames(de_markers) k1 = de_markers$avg_log2FC< -1 & de_markers$p_val <0.01 k2 = de_markers$avg_log2FC> 1 & de_markers$p_val <0.01 de_markers$change <- ifelse(k1 | |
本站网友 北京市住房公积金网 | 13分钟前 发表 |
get(load("ref_HumanPrimaryCellAtlasData.RData"))) library(BiocParallel) scRA = sce.all test = scRA@assays$RA$data pred.scRA <- SingleR(test = test | |
本站网友 北京高端住宅 | 15分钟前 发表 |
-log10(p_val) | |
本站网友 win7瘦身 | 26分钟前 发表 |
exdir = "GSE21920_RAW") #unlink("GSE21920_") library(stringr) fs = paste0("GSE21920_RAW/" | |
本站网友 同花顺中信金通 | 17分钟前 发表 |
"percent.hb") | |
本站网友 门面装修效果图 | 25分钟前 发表 |
label = T | |
本站网友 贵阳公寓出租 | 27分钟前 发表 |
pt.size = 0 | |
本站网友 光笔顺 | 26分钟前 发表 |
cols = c("blue" | |
本站网友 董衍 | 13分钟前 发表 |
cols = c("blue" | |
本站网友 飞鹰活络油 | 28分钟前 发表 |
本站网友 气功小周天 | 10分钟前 发表 |
"IFI6") | |
本站网友 女性生殖器官图 | 25分钟前 发表 |
"VMO1" | |
本站网友 智能灌溉 | 30分钟前 发表 |
exdir = "GSE21920_RAW") #unlink("GSE21920_") library(stringr) fs = paste0("GSE21920_RAW/" | |
本站网友 长葛新闻 | 17分钟前 发表 |
features = c("CDD" |