（五）单细胞数据分析——细胞亚群的表型特征刻画（补充工作）

本文主要是介绍（五）单细胞数据分析——细胞亚群的表型特征刻画（补充工作），希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

由于细胞表型特征这里，我们可以进行一些信息的补充，一个是从全局重新观察每个细胞亚群的基因表达，另一个是细胞亚群特征基因的小提琴图。

第一步：导入R包

library(scibet)
library(Seurat)
library(scater)
library(scran)
library(dplyr)
library(Matrix)
library(cowplot)
library(ggplot2)
library(harmony)

第二步：全局观察每个细胞亚群的基因表达

导入所有细胞亚群的数据

BRCA_SingleCell.Bcell <- readRDS("BRCA_SingleCell.B_Pcell.RDS")
BRCA_SingleCell.Tcell <- readRDS("BRCA_SingleCell.Tcell.RDS")
BRCA_SingleCell.Mycell <- readRDS("BRCA_SingleCell.Mycell.RDS")
BRCA_SingleCell.Fcell <- readRDS("BRCA_SingleCell.Fcell.RDS")

设置颜色种类

cb_palette <- c("antiquewhite", "yellowgreen", "thistle", "violet", "azure3", "azure4", "black","blue", "blue4","blueviolet", #B"brown4","brown1", "burlywood1", "burlywood4", "cadetblue1", "cadetblue4","chartreuse", "chartreuse3", #T"chartreuse4","chocolate1", "chocolate3", "chocolate4", "cyan3", "darkgoldenrod","darkgoldenrod1","darkgoldenrod4", "darkolivegreen", "khaki", "yellow" ,"hotpink" ,"hotpink4" , #My"slateblue1" ,"slateblue4", "yellow3","sienna4", "tomato1", "sienna1", "dodgerblue4", "darkmagenta","cornsilk4") #F

绘制每个细胞亚群的UMAP图

DimPlot(object = BRCA_SingleCell.Bcell,reduction = 'umap',group.by = c('cell_Subtype'),cols = cb_palette[1:10], label = TRUE)
DimPlot(object = BRCA_SingleCell.Tcell,reduction = 'umap',group.by = c('cell_Subtype'),cols = cb_palette[11:18], label = TRUE)
DimPlot(object = BRCA_SingleCell.Mycell,reduction = 'umap',group.by = c('cell_Subtype'),cols = cb_palette[19:31], label = TRUE)
DimPlot(object = BRCA_SingleCell.Fcell,reduction = 'umap',group.by = c('cell_Subtype'),cols = cb_palette[32:40], label = TRUE)

其中一个结果如下：

合并所有数据绘制总的UMAP图

BRCA_SingleCell <- merge(BRCA_SingleCell.Fcell,BRCA_SingleCell.Tcell)
BRCA_SingleCell <- merge(BRCA_SingleCell,BRCA_SingleCell.Bcell)
BRCA_SingleCell <- merge(BRCA_SingleCell,BRCA_SingleCell.Mycell)
DimPlot(object = BRCA_SingleCell,reduction = 'umap',group.by = c('cell_Subtype'), cols = cb_palette[1:40], label = FALSE)

结果如下：

分析不同细胞亚群之间的差异基因，并绘制热图

cellType_markerGene <- findMarker(BRCA_SingleCell)
top <- cellType_markerGene %>% group_by(cluster) %>% top_n(n =10, wt = myAUC)
top$cluster %>% levels(.)
clusters <- unique(BRCA_SingleCell$cell_Subtype)
genes <- c()
for(i in clusters){gene <- as.vector(as.matrix(subset(top, cluster == i, gene)))gene <- gene[1:(ifelse(length(gene)>5,5,length(gene)))]names(gene) <- rep(i, times = length(gene))genes <- c(genes, gene)
}
table(names(genes))
table(unique(names(genes)))
B_P_gene <- c(genes[which(names(genes) == "naive B")],genes[which(names(genes) == "Memory B")],genes[which(names(genes) == "Germinal center B cell")],genes[which(names(genes) == "IgA Plasma")],genes[which(names(genes) == "IGHGP+ Plasma")],genes[which(names(genes) == "IGLC7+ Plasma")],genes[which(names(genes) == "IL7R+ Plasma")],genes[which(names(genes) == "NKG7+ Plasma")],genes[which(names(genes) == "IgG Plasma")])
T_gene <- c(genes[which(names(genes) == "CD8+ Tem")],genes[which(names(genes) == "CD4+ Tfh")],genes[which(names(genes) == "NK")],genes[which(names(genes) == "Th0")],genes[which(names(genes) == "GZMK+ Tc")],genes[which(names(genes) == "CD8+ Tex")],genes[which(names(genes) == "CD4+ Treg")],genes[which(names(genes) == "Tem")])
Stroma_gene <- c(genes[which(names(genes) == "MYH11+MCAM+ Myofibro")],genes[which(names(genes) == "RGS5+MCAM+ Myofibro")],genes[which(names(genes) == "VEGF+ Fibro")],genes[which(names(genes) == "CFD+ Fibro")],genes[which(names(genes) == "PLA2G2A+CFD+ Fibro")],genes[which(names(genes) == "PDGFC+ Fibro")],genes[which(names(genes) == "CXCL11+ Fibro")],genes[which(names(genes) == "MMP11+ Fibro")],genes[which(names(genes) == "MMP1+ Fibro")],genes[which(names(genes) == "MMP11+ Fibro")])
Myeloid_gene <- c(genes[which(names(genes) == "MM9+ Macro")],genes[which(names(genes) == "C1QC+MRC1-  Macro")],genes[which(names(genes) == "MRC1+ Macro")],genes[which(names(genes) == "cDC2")],genes[which(names(genes) == "VCAN+SPP1+ Macro")],genes[which(names(genes) == "MARCO+SPP1+ Macro")],genes[which(names(genes) == "cDC1")],genes[which(names(genes) == "Activated DCs")],genes[which(names(genes) == "Mast cell")],genes[which(names(genes) == "TACSTD2+ Macro")],genes[which(names(genes) == "CXCL10+ Macro")])
cellMarker <- c(B_P_gene,T_gene,Myeloid_gene,Stroma_gene)
length(cellMarker)
length(unique(cellMarker))
sum(table(cellMarker) == 1)
cellMarker.unique <- unique(cellMarker)
options(repr.plot.height = 14, repr.plot.width = 18)
test <- DotPlot(object = BRCA_SingleCell,features = cellMarker.unique,cols = c('blue','red'),group.by = 'cell_Subtype')+
theme_bw()+ theme(axis.text.x = element_text(angle = -90,hjust = 0,vjust = 0)) +
coord_flip()
test
test$data$id %>% unique(.) %>% setdiff(.,c('naive B','IL7R+ Plasma','memory B','Germinal center B','IGHGP+ Plasma','IGLC7+ Plasma','NKG7+ Plasma','STMN1+ Plasma','IgA Plasma','IgG Plasma','Th0','CD4+ Tfh','CD8+ Tem','CD4+ Treg','GZMK+ Tc','CD8+ Tex','NK','Tem','TACSTD2+ Macro','MARCO+SPP1+ Macro','VCAN+SPP1+ Macro','CXCL10+ Macro','cDC2','CXCL9+ Macro','MRC1+ Macro','cDC1','Activated DCs','C1QC+MRC1- Macro','MM9+ Macro','CPB1+ Macro','Mast cell','CXCL11+ Fibro','CFD+ Fibro','RGS5+MCAM+ Myofibro','MMP11+ Fibro','VEGF+ Fibro','PLA2G2A+CFD+ Fibro','MYH11+MCAM+ Myofibro','PDGFC+ CAFs','Endo'))
Mmarker_data  <- test$data
Mmarker_data$Zscore <- Mmarker_data$avg.exp.scaled
Mmarker_data$cluster <- factor(x = Mmarker_data$id,levels = c('naive B','IL7R+ Plasma','memory B','Germinal center B','IGHGP+ Plasma','IGLC7+ Plasma','NKG7+ Plasma','STMN1+ Plasma','IgA Plasma','IgG Plasma','Th0','CD4+ Tfh','CD8+ Tem','CD4+ Treg','GZMK+ Tc','CD8+ Tex','NK','Tem','TACSTD2+ Macro','MARCO+SPP1+ Macro','VCAN+SPP1+ Macro','CXCL10+ Macro','cDC2','CXCL9+ Macro','MRC1+ Macro','cDC1','Activated DCs','C1QC+MRC1- Macro','MM9+ Macro','CPB1+ Macro','Mast cell','CXCL11+ Fibro','CFD+ Fibroblast','RGS5+MCAM+ Myofibro','MMP11+ Fibro','VEGF+ Fibro','PLA2G2A+CFD+ Fibro','MYH11+MCAM+ Myofibro','PDGFC+ Fibro','Endo'),labels = c('naive B','IL7R+ Plasma','memory B','Germinal center B','IGHGP+ Plasma','IGLC7+ Plasma','NKG7+ Plasma','STMN1+ Plasma','IgA Plasma','IgG Plasma','Th0','CD4+ Tfh','CD8+ Tem','CD4+ Treg','GZMK+ Tc','CD8+ Tex','NK','Tem','TACSTD2+ Macro','MARCO+SPP1+ Macro','VCAN+SPP1+ Macro','CXCL10+ Macro','cDC2','CXCL9+ Macro','MRC1+ Macro','cDC1','Activated DCs','C1QC+MRC1- Macro','MM9+ Macro','CPB1+ Macro','Mast cell','CXCL11+ Fibro','CFD+ Fibro','RGS5+MCAM+ Myofibro','MMP11+ Fibro','VEGF+ Fibro','PLA2G2A+CFD+ Fibro','MYH11+MCAM+ Myofibro','PDGFC+ Fibro','Endo'))
unique(Mmarker_data$id)
Mmarker_data$geneClass <- NA
Mmarker_data$geneClass <- ifelse(Mmarker_data$features.plot %in% B_P_gene,'B',Mmarker_data$geneClass)
Mmarker_data$geneClass <- ifelse(Mmarker_data$features.plot %in% T_gene,'T',Mmarker_data$geneClass)
Mmarker_data$geneClass <- ifelse(Mmarker_data$features.plot %in% Myeloid_gene,'M',Mmarker_data$geneClass)
Mmarker_data$geneClass <- ifelse(Mmarker_data$features.plot %in% Stroma_gene,'S',Mmarker_data$geneClass)
library(aplot)
options(repr.plot.height = 14, repr.plot.width = 13)
group <- factor(c('naive B','IL7R+ Plasma','memory B','Germinal center B','IGHGP+ Plasma','IGLC7+ Plasma','NKG7+ Plasma','STMN1+ Plasma','IgA Plasma','IgG Plasma','Th0','CD4+ Tfh','CD8+ Tem','CD4+ Treg','GZMK+ Tc','CD8+ Tex','NK','Tem','TACSTD2+ Macro','MARCO+SPP1+ Macro','VCAN+SPP1+ Macro','CXCL10+ Macro','cDC2','CXCL9+ Macro','MRC1+ Macro','cDC1','Activated DCs','C1QC+MRC1- Macro','MM9+ Macro','CPB1+ Macro','Mast cell','CXCL11+ Fibro','CFD+ Fibro','RGS5+MCAM+ Myofibro','MMP11+ Fibro','VEGF+ Fibro','PLA2G2A+CFD+ Fibro','MYH11+MCAM+ Myofibro','PDGFC+ Fibro','Endo'),levels = c('naive B','IL7R+ Plasma','memory B','Germinal center B','IGHGP+ Plasma','IGLC7+ Plasma','NKG7+ Plasma','STMN1+ Plasma','IgA Plasma','IgG Plasma','Th0','CD4+ Tfh','CD8+ Tem','CD4+ Treg','GZMK+ Tc','CD8+ Tex','NK','Tem','TACSTD2+ Macro','MARCO+SPP1+ Macro','VCAN+SPP1+ Macro','CXCL10+ Macro','cDC2','CXCL9+ Macro','MRC1+ Macro','cDC1','Activated DCs','C1QC+MRC1- Macro','MM9+ Macro','CPB1+ Macro','Mast cell','CXCL11+ Fibro','CFD+ Fibro','RGS5+MCAM+ Myofibro','MMP11+ Fibro','VEGF+ Fibro','PLA2G2A+CFD+ Fibro','MYH11+MCAM+ Myofibro','PDGFC+ Fibro','Endo')) %>% as.data.frame() %>% mutate(group=c(rep("B cell",10),rep("T cell",8),rep("Myeloid",13),rep("Strom cell",9))) %>%mutate(p="group") %>%ggplot(aes(.,y=p,fill=group))+geom_tile() + scale_y_discrete(position="right") +theme_minimal()+xlab(NULL) + ylab(NULL) +theme(axis.text.x = element_blank())+labs(fill = "Group")
heatmap <- ggplot(Mmarker_data, aes(y=features.plot, x=cluster, fill=Zscore))+ 
geom_raster()+scale_fill_gradient2(low="#003366", high="#990033", mid="white",midpoint = 0)+
theme(axis.text.x = element_text(angle = -90,hjust = 0,vjust = 0.5, size = 15),axis.ticks.y = element_blank())+
xlab(NULL) + ylab(NULL)+theme(panel.border = element_rect(fill=NA,color="black", size=1, linetype="solid"))+geom_vline(xintercept=c(-0.5,10.5,18.5,31.5),size=.8)+geom_hline(yintercept=c(-0.5,18.5,48.5,76.5), size=.8)
heatmap <- heatmap %>% insert_top(group ,height = 0.05)
heatmap

结果如下：

第三步：绘制特征基因的小提琴图

构造绘制小提琴图的函数与特征基因筛选函数

modify_vlnplot<- function(obj, feature, pt.size = 0, plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),...) {p<- VlnPlot(obj, features = feature, pt.size = pt.size,group.by = 'cell_Subtype', ... )  + xlab("") + ylab(feature) + ggtitle("") + theme(legend.position = "none", axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_text(size = rel(1), angle = 0), axis.text.y = element_text(size = rel(1)), plot.margin = plot.margin ) #+ rotate_x_text() return(p)
}
extract_max<- function(p){ymax<- max(ggplot_build(p)$layout$panel_scales_y[[1]]$range$range)return(ceiling(ymax))
}## main function
StackedVlnPlot<- function(obj, features,pt.size = 0, plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),...) {plot_list<- purrr::map(features, function(x) modify_vlnplot(obj = obj,feature = x, ...))# Add back x-axis title to bottom plot. patchwork is going to support this?# rotate_x_text() 转置横坐标的字体，ggpubr 包，在这里添加才会只出现一次横坐标，不会出现多个横坐标plot_list[[length(plot_list)]]<- plot_list[[length(plot_list)]] +theme(axis.text.x=element_text(), axis.ticks.x = element_line()) + rotate_x_text(angle = 90)# change the y-axis tick to only max value ymaxs<- purrr::map_dbl(plot_list, extract_max)plot_list<- purrr::map2(plot_list, ymaxs, function(x,y) x + scale_y_continuous(breaks = c(y)) + expand_limits(y = y))p<- patchwork::wrap_plots(plotlist = plot_list, ncol = 1)return(p)
}
findMarker <- function(x){Idents(x) <- 'cell_Subtype'cellType_markerGene <- FindAllMarkers(x,logfc.threshold = 0.5,only.pos = T,test.use = 'roc',min.pct = 0.2)cellType_markerGene <- subset(cellType_markerGene,!grepl(pattern = 'RP[LS]',gene))cellType_markerGene <- subset(cellType_markerGene,!grepl(pattern = 'MT-',gene))top5 <- cellType_markerGene %>% group_by(cluster) %>% top_n(n = 10, wt = myAUC)print(top5)return(cellType_markerGene)
}

注：查找差异基因是因为，如果没有去了解特征基因，可以通过差异基因进行一个“预见”

绘制小提琴图（以髓系细胞为例）

cellMarker.Mycell <- findMarker(BRCA_SingleCell.Mycell)
features<- c("HLA-DOB","HLA-DQA2","HLA-DPB1","LGALS2","CLEC9A","WDFY4","IDO1","CCND1","IRF8","RGCC","SHTN1","BATF3","DNASE1L3","CD1C","XCR1","CADM1","LIMD2","CPNE3")
vlnplt <- StackedVlnPlot(obj = BRCA_SingleCell.Mycell, features = features)
vlnplt

结果如下：