您现在的位置是:首页 > 编程 > 

单细胞组间比较分析

2025-07-29 15:22:44
单细胞组间比较分析 这篇文章介绍的是有分组的单细胞数据怎样分析,数据来自GEO的GSE,有个treat,个control样本,代码完整,可以自行下载数据跑一跑,但请注意细胞数量是6w,对计算资源要求较高,自己的电脑跑不动,需要在服务器上跑。1.整理数据因为数据组织的不是每个样本一个文件夹的形式,所以需要自行整理,参考代码如下,注意这段改名的代码不要反复运行:代码语言:javascr

单细胞组间比较分析

这篇文章介绍的是有分组的单细胞数据怎样分析,数据来自GEO的GSE21920,有个treat,个control样本,代码完整,可以自行下载数据跑一跑,但请注意细胞数量是6w,对计算资源要求较高,自己的电脑跑不动,需要在服务器上跑。

1.整理数据

因为数据组织的不是每个样本一个文件夹的形式,所以需要自行整理,参考代码如下,注意这段改名的代码不要反复运行:

代码语言:javascript代码运行次数:0运行复制
#untar("GSE21920_",exdir = "GSE21920_RAW")
#unlink("GSE21920_")
library(stringr)
fs = paste0("GSE21920_RAW/",dir("GSE21920_RAW/"))
fs
samples = dir("GSE21920_RAW/") %>% str_split_i("_",2) %>% unique();samples

#为每个样本创建单独的文件夹
lapply(samples, function(s){
  ns = paste0("01_data/",s)
  if(!(ns))(ns,recursive = T)
})

#每个样本的三个文件复制到单独的文件夹
lapply(fs, function(s){
  #s = fs[1]
  for(i in 1:length(samples)){
    #i = 1
    if(str_detect(s,samples[[i]])){
      (s,paste0("01_data/",samples[[i]]))
    }
  }
})

#文件名字修改
on = paste0("01_data/",dir("01_data/",recursive = T));on
nn = str_remove(on,"GSM\\d+_sample\\d_");nn
file.rename(on,nn)

代码主要参考:

代码语言:javascript代码运行次数:0运行复制
rm(list = ls())

2.批量读取

代码语言:javascript代码运行次数:0运行复制
rm(list = ls())
library(Seurat)
f = dir("01_data/")
scelist = list()
for(i in 1:length(f)){
  pda <- Read10X(paste0("01_data/",f[[i]]))
  scelist[[i]] <- CreateSeuratObject(counts = pda, 
                                     project = f[[i]])
  print(dim(scelist[[i]]))
}

## [1] 58 10218
## [1] 58  891
## [1] 58  8607
## [1] 58 127
## [1] 58 1108
## [1] 58 10821

sce.all = merge(scelist[[1]],scelist[-1])
sce.all = JoinLayers(sce.all)
head(sce.all@meta.data)

##                      orig.ident nCount_RA nFeature_RA
## AAACCCACACAAATAG-1_1    sample1       5624         159
## AAACCCACAGGCTCTG-1_1    sample1      6854         1798
## AAACCCACAGGTCCCA-1_1    sample1       5659         146
## AAACCCACAGTCGGTC-1_1    sample1       424         1256
## AAACCCAGTTTGGCTA-1_1    sample1       5105         156
## AAACCCATCCCATAAG-1_1    sample1       817         1495

table(sce.all$orig.ident)

## 
## sample1 sample2 sample sample4 sample5 sample6 
##   10218    891    8607   127   1108   10821

sum(table(Idents(sce.all)))

## [1] 6248

.质控指标

代码语言:javascript代码运行次数:0运行复制
sce.all[[""]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")

head(sce.all@meta.data, )

##                      orig.ident nCount_RA nFeature_RA  percent.rp
## AAACCCACACAAATAG-1_1    sample1       5624         159  6.4544808  4.726174
## AAACCCACAGGCTCTG-1_1    sample1      6854         1798  0.148107   7.640962
## AAACCCACAGGTCCCA-1_1    sample1       5659         146  .4811804  0.04064
##                      percent.hb
## AAACCCACACAAATAG-1_1 0.00000000
## AAACCCACAGGCTCTG-1_1 0.00000000
## AAACCCACAGGTCCCA-1_1 0.01767097

VlnPlot(sce.all, 
        features = c("nFeature_RA",
                     "nCount_RA", 
                     "",
                     "percent.rp",
                     "percent.hb"),
        ncol = ,pt.size = 0, group.by = "orig.ident")
代码语言:javascript代码运行次数:0运行复制
sce.all = subset(sce.all,<25)

4.整合降维聚类分

代码语言:javascript代码运行次数:0运行复制
f = "obj.Rdata"
library(harmony)
if(!(f)){
  sce.all = sce.all %>% 
    ormalizeData() %>%  
    FindVariableFeatures() %>%  
    ScaleData(features = rownames(.)) %>%  
    RunPCA(pc.genes = VariableFeatures(.))  %>%
    RunHarmony("orig.ident") %>%
    Findeighbors(dims = 1:15, reduction = "harmony") %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15,reduction = "harmony") %>% 
    RunTSE(dims = 1:15,reduction = "harmony")
  save(sce.all,file = f)
}
load(f)
ElbowPlot(sce.all)
代码语言:javascript代码运行次数:0运行复制
UMAPPlot(sce.all,label = T)
代码语言:javascript代码运行次数:0运行复制
TSEPlot(sce.all,label = T)

5.注释

代码语言:javascript代码运行次数:0运行复制
library(celldex)
library(SingleR)
ls("package:celldex")

## [1] "BlueprintEncodeData"              "DatabaseImmuneCellExpressionData"
## [] "HumanPrimaryCellAtlasData"        "ImmGenData"                      
## [5] "MonacoImmuneData"                 "MouseRAseqData"                 
## [7] "overshternHematopoieticData"

f = "ref_HumanPrimaryCellAtlasData.RData"
if(!(f)){
  ref <- celldex::HumanPrimaryCellAtlasData()
  save(ref,file = f)
}
ref <- list(get(load("ref_BlueprintEncode.RData")),
            get(load("ref_HumanPrimaryCellAtlasData.RData")))
library(BiocParallel)
scRA = sce.all
test = scRA@assays$RA$data
pred.scRA <- SingleR(test = test, 
                      ref = ref,
                      labels = list(ref[[1]]$,ref[[2]]$), 
                      clusters = scRA@active.ident)
pred.scRA$pruned.labels

##  [1] "CD8+ T-cells"      "B-cells"           "Fibroblasts"      
##  [4] "CD4+ T-cells"      "CD8+ T-cells"      "eutrophils"      
##  [7] "Endothelial_cells" "Fibroblasts"       "Monocytes"        
## [10] "Macrophages"       "Fibroblasts"       "Adipocytes"       
## [1] "B-cells"           "K cells"          "Monocytes"        
## [16] A                  "Endothelial cells" "Monocytes"        
## [19] "eutrophils"       "Fibroblasts"       "Adipocytes"

#查看注释准确性 
#plotScoreHeatmap(pred.scRA, clusters=pred.scRA@rownames, fontsize.row = 9,show_colnames = T)
ids <- pred.scRA$pruned.labels
ids[(ids)] = "unknown"
names(ids) <- levels(scRA)
levels(scRA)

##  [1] "0"  "1"  "2"  ""  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "1" "14"
## [16] "15" "16" "17" "18" "19" "20"

scRA <- RenameIdents(scRA,ids)
levels(scRA)

##  [1] "CD8+ T-cells"      "B-cells"           "Fibroblasts"      
##  [4] "CD4+ T-cells"      "eutrophils"       "Endothelial_cells"
##  [7] "Monocytes"         "Macrophages"       "Adipocytes"       
## [10] "K cells"          "unknown"           "Endothelial cells"

p2 <- DimPlot(scRA, reduction = "umap",label = T,pt.size = 0.5) + oLegend()
p2

6.分组可视化及组件细胞比例比较

代码语言:javascript代码运行次数:0运行复制
scRA$seurat_annotati = Idents(scRA)
table(scRA$orig.ident)

## 
## sample1 sample2 sample sample4 sample5 sample6 
##   10109    80    8510   1259   10874   1062

library(tinyarray)
pd = geo_download("GSE21920")$pd
pd$title

## [1] "IgG4-RD1" "IgG4-RD2" "IgG4-RD" "Control1" "Control2" "Control"

scRA$group = ifelse(scRA$orig.ident %in% c("sample1","sample2","sample"), "treat","control")
DimPlot(scRA, reduction = "umap", group.by = "group")

可以计算每个亚的细胞数量和占全部细胞的比例

代码语言:javascript代码运行次数:0运行复制
# 每种细胞的数量和比例
cell_counts <- table(Idents(scRA))
cell.all <- cbind(cell_counts = cell_counts, 
                  cell_Freq = round((cell_counts)*100,2))
#各组中每种细胞的数量和比例
group <- table(Idents(scRA), scRA$group) 
cell.freq.group <- round((group, margin = 2) *100,2)
cell.all = cbind(cell.all,group,cell.freq.group)
cell.all = cell.all[,c(1,,4,2,5,6)]
colnames(cell.all) = paste(rep(c("all","control","treat"),times = 2),
      rep(c("count","freq"),each = ),sep = "_")
cell.all

##                   all_count control_count treat_count all_freq control_freq
## CD8+ T-cells          15077          5518        9559    24.71        16.19
## B-cells                9089          2870        6219    14.90         8.42
## Fibroblasts           1256         1077        158    20.25        1.60
## CD4+ T-cells           707          20        4770    11.59         6.76
## eutrophils            551          4114        199     9.04        12.07
## Endothelial_cells      26          260         876     5.0         6.92
## Monocytes              2948          194        1014     4.8         5.67
## Macrophages            209          1661         42     .4         4.87
## Adipocytes             1468          122         146     2.41         .88
## K cells               1200           695         505     1.97         2.04
## unknown                 51           186         45     0.87         0.55
## Endothelial cells       428           54          74     0.70         1.04
##                   treat_freq
## CD8+ T-cells           5.51
## B-cells                2.10
## Fibroblasts             5.88
## CD4+ T-cells           17.72
## eutrophils             5.20
## Endothelial_cells       .25
## Monocytes               .77
## Macrophages             1.60
## Adipocytes              0.54
## K cells                1.88
## unknown                 1.28
## Endothelial cells       0.27

7.差异分析

某种细胞在不同组间的差异基因

代码语言:javascript代码运行次数:0运行复制
if(!require("multtest"))BiocManager::install('multtest')
if(!require("metap"))install.packages('metap')
table(scRA$seurat_annotati)

## 
##      CD8+ T-cells           B-cells       Fibroblasts      CD4+ T-cells 
##             15077              9089             1256              707 
##       eutrophils Endothelial_cells         Monocytes       Macrophages 
##              551              26              2948              209 
##        Adipocytes          K cells           unknown Endothelial cells 
##              1468              1200               51               428

 <- FindCervedMarkers(scRA, ident.1 = "K cells", grouping.var = "group", min.pct = 0.25,  = 0.25,verbose = F)
head()

##       treat_p_val treat_avg_log2FC treat_pct.1 treat_pct.2 treat_p_val_adj
## GLY            0         6.695047       0.949       0.02               0
## KLRD1           0         5.818575       0.950       0.07               0
## GZMB            0         5.41818       0.909       0.09               0
## KG7            0         4.85098       0.998       0.148               0
## PRF1            0         5.655445       0.897       0.054               0
## CTSW            0         4.97424       0.865       0.046               0
##       control_p_val control_avg_log2FC control_pct.1 control_pct.2
## GLY              0           6.275650         0.944         0.06
## KLRD1             0           5.566265         0.961         0.041
## GZMB              0           5.895107         0.97         0.0
## KG7              0           5.118054         1.000         0.10
## PRF1              0           5.515456         0.902         0.046
## CTSW              0           4.268425         0.919         0.094
##       control_p_val_adj max_pval minimump_p_val
## GLY                  0        0              0
## KLRD1                 0        0              0
## GZMB                  0        0              0
## KG7                  0        0              0
## PRF1                  0        0              0
## CTSW                  0        0              0

组间比较的气泡图

代码语言:javascript代码运行次数:0运行复制
plot = c("CDD", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GLY", "KG7", "CCL5",
    "CD8A", "MS4A1", "CD79A", "MIR155HG", "ME1", "FCGRA", "VMO1", "CCL2", "S100A9", "HLA-DQA1",
    "GPR18", "PPBP", "GG11", "HBA2", "HBB", "TSPA1", "ILRA", "PRSS57") #一组感兴趣的基因
#如果idents有A会报错
#scRA <- subset(scRA, seurat_annotati %in% (scRA$seurat_annotati))
DotPlot(scRA, features = plot, cols = c("blue", "red"), 
        dot.scale = 8, split.by = "group") +
    RotatedAxis()
代码语言:javascript代码运行次数:0运行复制
FeaturePlot(scRA, features = c("CDD", "GLY", "IFI6"), split.by = "group",  = , cols = c("grey",
    "red"), reduction = "umap")
代码语言:javascript代码运行次数:0运行复制
plots <- VlnPlot(scRA, features = c("LYZ", "ISG15", "CXCL10"), split.by = "group", group.by = "seurat_annotati",
    pt.size = 0, combine = FALSE)
library(patchwork)
wrap_plots(plots = plots, ncol = 1)

8.伪bulk 转录组差异分析

每个组要有多个样本才能做

代码语言:javascript代码运行次数:0运行复制
bulk <- AggregateExpression(scRA, return.seurat = T, slot = "counts", assays = "RA", group.by = c("seurat_annotati","orig.ident", "group"))

sub <- subset(bulk, seurat_annotati == "CD8+ T-cells")
Idents(sub) <- "group"
de_markers <- FindMarkers(sub, ident.1 = "treat", ident.2 = "control", slot = "counts", test.use = "DESeq2",
    verbose = F)
de_markers$gene <- rownames(de_markers)
k1 = de_markers$avg_log2FC< -1 & de_markers$p_val <0.01
k2 = de_markers$avg_log2FC> 1 & de_markers$p_val <0.01
de_markers$change <- ifelse(k1,"down",ifelse(k2,"up","not"))
library(ggplot2)
library(ggrepel)
ggplot(de_markers, aes(avg_log2FC, -log10(p_val),color = change)) + 
  geom_point(size = 0.5, alpha = 0.5) + 
  geom_vline(xintercept = c(1,-1),linetype = 4)+
  geom_hline(yintercept = -log10(0.01),linetype = 4)+
  scale_color_manual(values = c("blue","grey","red"))+
  theme_bw() +
  ylab("-log10(unadjusted p-value)") 
本文参与 腾讯云自媒体同步曝光计划,分享自。原始发表:2024-06-14,如有侵权请联系 cloudcommunity@tencent 删除可视化数据celldataref

#感谢您对电脑配置推荐网 - 最新i3 i5 i7组装电脑配置单推荐报价格的认可,转载请说明来源于"电脑配置推荐网 - 最新i3 i5 i7组装电脑配置单推荐报价格

本文地址:http://www.dnpztj.cn/biancheng/1237099.html

相关标签:无
上传时间: 2025-07-26 17:36:23
留言与评论(共有 15 条评论)
本站网友 qq1234
7分钟前 发表
verbose = F) de_markers$gene <- rownames(de_markers) k1 = de_markers$avg_log2FC< -1 & de_markers$p_val <0.01 k2 = de_markers$avg_log2FC> 1 & de_markers$p_val <0.01 de_markers$change <- ifelse(k1
本站网友 北京市住房公积金网
13分钟前 发表
get(load("ref_HumanPrimaryCellAtlasData.RData"))) library(BiocParallel) scRA = sce.all test = scRA@assays$RA$data pred.scRA <- SingleR(test = test
本站网友 北京高端住宅
15分钟前 发表
-log10(p_val)
本站网友 win7瘦身
26分钟前 发表
exdir = "GSE21920_RAW") #unlink("GSE21920_") library(stringr) fs = paste0("GSE21920_RAW/"
本站网友 同花顺中信金通
17分钟前 发表
"percent.hb")
本站网友 门面装修效果图
25分钟前 发表
label = T
本站网友 贵阳公寓出租
27分钟前 发表
pt.size = 0
本站网友 光笔顺
26分钟前 发表
cols = c("blue"
本站网友 董衍
13分钟前 发表
cols = c("blue"
本站网友 飞鹰活络油
28分钟前 发表
本站网友 气功小周天
10分钟前 发表
"IFI6")
本站网友 女性生殖器官图
25分钟前 发表
"VMO1"
本站网友 智能灌溉
30分钟前 发表
exdir = "GSE21920_RAW") #unlink("GSE21920_") library(stringr) fs = paste0("GSE21920_RAW/"
本站网友 长葛新闻
17分钟前 发表
features = c("CDD"