[1]:
library(Seurat)
library(gridExtra)
library(Matrix)
library(ggplot2)
T annotation#
[2]:
source('/home/public/ghx/software/my_seurat.r')
[3]:
RDSPath <- '/home/public/ghx/wuLab/Bladder/1217_all/seu'
[4]:
ct <- 'T'
[5]:
seu <- readRDS(file = file.path('/home/public/ghx/wuLab/Bladder/1217_all/seu',
'V1_Celltype_T_seu.rds'))
[6]:
old.clusters <- readRDS(file = '/home/public/ghx/wuLab/Bladder/1119_T/sampleCluster.rds')
new.clusters <- readRDS(file = '/home/public/ghx/wuLab/Bladder/1217_all/CellTypes/T/sample_cluster.rds')
[7]:
sample.id <- sapply(old.clusters, USE.NAMES = T, simplify = T,
FUN = function(x){
return(strsplit(x, split = '-', fixed = T)[[1]][1])
})
sample.id <- paste0('MIBC',sample.id)
names(old.clusters) <- paste0(sample.id, '.',names(old.clusters))
3rd annotation#
[20]:
annostr <- '11T-0 CD4-?
6P-1 CD4-?
7T-4 CD4-?
11T-4 CD4-?
5P-4 CD4-?
8P-3 CD4-?
11P-1 CD4-?
7T-6 CD4-UK1
9T-3 CD4-UK1
6T-5 CD4-UK1
6P-6 CD4-UK1
5T-3 CD4-UK1
8T-14 CD4-UK1
10T-5 CD4-naïve
9T-10 CD4-naïve
10P-10 CD4-naïve-RPL
7T-2 CD4-naïve-RPL
5T-6 CD4-naïve-RPL
9P-2 CD4-naïve-RPL
10T-4 CD4-naïve-RPL
9T-4 CD4-naïve-RPL
5P-3 CD4-naïve-RPL
8P-5 CD4-naïve-RPL
11T-5 CD4-naïve
10T-1 CD4-naïve
10T-2 CD4-naïve
10P-2 CD4-naïve
6T-1 CD4-RPL
11T-3 CD4-RPL
6P-2 CD4-RPL
4P-5 CD4-Th17
8T-5 CD4-Th17
11P-9 CD4-Th17
11P-0 CD4-Th17
10P-1 CD4-Th17
5T-2 CD4-Th17
7P-10 CD4-Th17
10P-5 CD4-Th17
8P-8 CD4-Th17
5P-1 CD4-Th17
7P-8 CD4-Th17
5T-8 CD4-Th17
7P-2 CD4-Th17
9P-0 CD4-Th17
5P-8 CD4-Tr1
4P-2 CD4-Tr1
6T-10 CD4-Treg
9T-9 CD4-Treg
8P-7 CD4-Treg
8T-4 CD4-Treg
10T-6 CD4-Treg
7T-1 CD4-Treg
7P-6 CD4-Treg
11T-6 CD4-Treg
5T-11 CD4-Treg
11P-10 CD4-Treg(naïve)
6P-4 CD4-Treg(naïve)
5T-5 CD4-Treg(naïve)
6T-2 CD4-Treg(naïve)
9P-7 CD4-Treg(naïve)
4P-4 CD4-Treg(naïve)
11T-1 CD4-Treg(naïve)
5P-10 CD4-Treg(naïve)
10P-12 CD4-Treg(Th17)
9T-1 CD4-Treg(Th17)
6T-6 CD4-Treg(Th17)
5T-0 CD8-TEM
4T-0 CD8-?
4T-1 CD8-?
5T-4 CD8-CT
4T-3 CD8-CT
9T-5 CD8-CT
4T-5 CD8-CT
6T-4 CD8-CT
6T-12 CD8-Exhau
7T-11 CD8-Exhau
4T-7 CD8-Exhau
4T-2 CD8-Exhau
6P-8 CD8-HSP
11P-6 CD8-HSP
9T-12 CD8-HSP
10P-7 CD8-HSP
5T-9 CD8-HSP
4T-4 CD8-HSP
9T-7 CD8-IFNsignaling
4T-8 CD8-IFNsignaling
7P-13 CD8-IG
11P-13 CD8-IG
7T-9 CD8-IG
11T-11 CD8-IG
10T-11 CD8-IG
10T-10 CD8-MAIT
6T-9 CD8-MAIT
11T-7 CD8-MAIT
6P-12 CD8-MAIT
6P-9 CD8-RPL
7P-9 CD8-RPL
9P-6 CD8-TEM
8P-9 CD8-TEM
6P-10 CD8-TEM
7T-5 CD8-TEM
11T-2 CD8-TEM
8P-4 CD8-TEM
9P-3 CD8-TEM
10T-9 CD8-TRM
5P-0 CD8-TRM
9P-1 CD8-TTE
6T-3 CD8-virus-responding
6T-0 CD8-virus-responding
4P-3 Deadcell
8T-7 Deadcell
6T-7 Deadcell
6P-7 Deadcell
8T-8 Deadcell
5T-1 Deadcell
10T-8 Deadcell
7P-11 Deadcell
11T-9 Deadcell
9T-11 Deadcell
7T-12 Deadcell
10P-8 Deadcell
5P-9 Deadcell
8P-1 Deadcell
11P-4 Deadcell
11P-12 Dump
10T-12 Dump
4T-11 Dump
11T-10 Dump
8P-12 Dump
10P-11 Dump
9P-8 Dump
9P-5 Dump
10P-14 Dump
8T-2 gdT-antitumor
9T-8 gdT-antitumor
8P-13 gdT-antitumor
5P-11 gdT-antitumor
6T-11 gdT-antitumor
11P-8 gdT-antitumor
10T-13 gdT-antitumor
9P-4 gdT-antitumor
7P-12 gdT-antitumor
10P-9 gdT-antitumor
7T-7 gdT-antitumor
11T-8 gdT-antitumor
8T-6 gdT-antitumor
8T-9 gdT-antitumor
8T-13 gdT-antitumor
5T-12 gdT-antitumor
8P-10 gdT-antitumor
4T-10 gdT-antitumor
4P-6 gdT-antitumor
4T-9 Tunconv
5T-10 Tunconv
8P-11 CD8-TEM
7T-0 CD8-CT
7T-3 CD8-CT
8T-3 CD8-Exhau
9T-0 CD8-TEM
9T-6 CD8-TEM
8T-1 CD8-RPL
11P-7 CD8-TEM
6P-0 CD8-CT
8P-2 CD8-CT
6P-3 CD8-TEM
4P-0 CD8-CT
10P-3 CD8-CT
5P-2 CD8-TEM
11P-5 CD8-TEM
11P-2 CD8-TRM
8T-11 CD8-HSP
4P-1 CD8-TRM
11P-11 CD8-TEM
5P-5 CD8-TRM
8P-0 CD8-TRM
8P-6 CD8-Exhau
7P-7 CD8-TRM
9T-2 CD8-?
7P-5 CD8-TEM
8T-0 CD8-TEM
8T-10 CD8-TEM
7P-3 CD8-TEM
10P-13 CD8-TRM
8T-12 CD8-IFNsignaling
4T-6 gdT-antitumor
10T-7 CD8-CT
10T-0 CD8-TEM
10P-4 CD8-TEM
7P-4 CD8-TEM
7P-0 CD8-TEM
11P-3 CD8-TTE
10P-6 CD8-?
6T-8 CD8-CT
7T-10 CD8-?
5T-7 CD8-TRM'
annolines <- strsplit(annostr, split = '\n')[[1]]
[22]:
anno.ct <- rep(NA, length(old.clusters))
names(anno.ct) <- names(old.clusters)
for(anno in strsplit(annolines, split = '\t')){
anno.ct[old.clusters %in% anno[1]] <- anno[2]
}
[23]:
intersect.cells <- intersect(names(anno.ct), Cells(seu))
anno.ct.seu <- rep(NA, length(Cells(seu)))
names(anno.ct.seu) <- Cells(seu)
anno.ct.seu[intersect.cells] <- anno.ct[intersect.cells]
seu@meta.data$anno.ct <- anno.ct.seu
[ ]:
options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
p1 <- DimPlot(seu, label = T, pt.size = 0.1,
reduction = 'umap_harmony') +
labs(title = 'cluster') #+ theme(legend.position = 'none')
p2 <- DimPlot(seu, label = T, pt.size = 0.1, group.by = 'orig.ident',
reduction = 'umap_harmony') +
labs(title = 'sample.origin') #+ theme(legend.position = 'none')
p3 <- DimPlot(seu, label = T, pt.size = 0.1, group.by = 'status',
reduction = 'umap_harmony') +
labs(title = 'status') #+ theme(legend.position = 'none')
p4 <- DimPlot(seu, label = T, pt.size = 0.1, group.by = 'patient',
reduction = 'umap_harmony') +
labs(title = 'patient') #+ theme(legend.position = 'none')
p <- arrangeGrob(grobs = list(p1,p2,p3,p4), ncol = 2,
top = paste0('MIBC5-16: raw merge'))
grid.arrange(p)
[ ]:
options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)
cells.1 <- Cells(seu)[ is.na(anno.ct.seu) ]
cells.2 <- Cells(seu)[ !is.na(anno.ct.seu) ]
DimPlot(seu, label = T, pt.size = 0.1, group.by = 'anno.ct', cells = c(cells.1,cells.2),
reduction = 'umap_harmony') +
labs(title = 'annotated cell types')+ theme(legend.position = 'none')
[26]:
plots <- list()
for(temp.ct in unique(anno.ct.seu)){
if(is.na(temp.ct)) next
p <- DimPlot(seu, label = T, pt.size = 0.1, sizes.highlight = 0.2,
cells.highlight = Cells(seu)[seu@meta.data$anno.ct == temp.ct],
reduction = 'umap_harmony') +
labs(title = temp.ct)+ theme(legend.position = 'none')
plots[[temp.ct]] <- p
}
[27]:
p <- arrangeGrob(grobs = plots, ncol = 2,
top = paste0('annotated cell type'))
[28]:
length(plots)
26
[ ]:
options(repr.plot.width=10, repr.plot.height=65, repr.plot.res = 200)
grid.arrange(p)
[32]:
anno.by.cluster <- as.character(seu@active.ident)
[33]:
anno.by.cluster[ anno.by.cluster %in% c(5,15,19,21,22)] <- 'Deadcell'
anno.by.cluster[ anno.by.cluster %in% c(11)] <- 'CD4-Treg(Th17)'
anno.by.cluster[ anno.by.cluster %in% c(4)] <- 'CD4-Treg(naïve)'
anno.by.cluster[ anno.by.cluster %in% c(13)] <- 'CD4-UK1'
anno.by.cluster[ anno.by.cluster %in% c(10,12,18)] <- 'CD4-Th17'
anno.by.cluster[ anno.by.cluster %in% c(2,3)] <- 'CD4-naïve-RPL'
anno.by.cluster[ anno.by.cluster %in% c(7)] <- 'gdT-antitumor'
anno.by.cluster[ anno.by.cluster %in% c(17)] <- 'CD8-TTE'
anno.by.cluster[ anno.by.cluster %in% c(16)] <- 'CD8-IFNsignaling'
anno.by.cluster[ anno.by.cluster %in% c(0,14)] <- 'CD8-TRM'
anno.by.cluster[ anno.by.cluster %in% c(1,6,8,9,20)] <- 'CD8-CT'
anno.by.cluster[ anno.by.cluster %in% c(14)] <- '14'
[34]:
seu@meta.data$anno.by.cluster <- anno.by.cluster
[35]:
seu.sub <- SubsetData(seu,
cells = Cells(seu)[seu@meta.data$anno.by.cluster != 'Deadcell'])
Warning message:
“'SubsetData' is deprecated.
Use 'subset' instead.
See help("Deprecated")”
Warning message:
“'OldWhichCells' is deprecated.
Use 'WhichCells' instead.
See help("Deprecated")”
Warning message:
“All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to umap_”
[36]:
rm(seu)
gc()
| used | (Mb) | gc trigger | (Mb) | max used | (Mb) | |
|---|---|---|---|---|---|---|
| Ncells | 2491200 | 133.1 | 4237052 | 226.3 | 4237052 | 226.3 |
| Vcells | 7294076914 | 55649.4 | 10463667213 | 79831.5 | 7299759682 | 55692.8 |
[37]:
seu.sub <- RunUMAP(object = seu.sub, reduction.name = 'umap_harmony',
umap.method = 'umap-learn', metric = 'correlation',
reduction = 'harmony', dims = 1:20)
[ ]:
options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
p1 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'anno.by.cluster',
reduction = 'umap_harmony') +
labs(title = 'annotation') #+ theme(legend.position = 'none')
p2 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'orig.ident',
reduction = 'umap_harmony') +
labs(title = 'sample.origin') #+ theme(legend.position = 'none')
p3 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'status',
reduction = 'umap_harmony') +
labs(title = 'status') #+ theme(legend.position = 'none')
p4 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'patient',
reduction = 'umap_harmony') +
labs(title = 'patient') #+ theme(legend.position = 'none')
p <- arrangeGrob(grobs = list(p1,p2,p3,p4), ncol = 2,
top = paste0('MIBC5-16: raw merge'))
grid.arrange(p)
[39]:
# X=U*Sigma*V, U and V are Unitary Matrix, e.g. V^-1=t(V)
# U=X*t(V)*(1/Sigma)
# X(nGene*nCell)=U(nGene*nPC)*Sigma(nPC*nPC)*V(nPC*nCell)
#X <- seu@assays$RNA@scale.data
Sigma <- seu.sub@reductions$pca@stdev
V <- seu.sub@reductions$pca@cell.embeddings[, 1:100]
V <- apply(V,MARGIN = 2,FUN = function(x){
return(x/sqrt(sum(x^2)))
})
# here V(nCell*nPC) is actually t(V)
U <- seu.sub@assays$RNA@scale.data %*% V %*% diag(1/Sigma)
# after batch effect removal, X.new=U*Sigma*V.new
V.new <- seu.sub@reductions$harmony@cell.embeddings[, 1:100]
V.new <- apply(V.new,MARGIN = 2,FUN = function(x){
return(x/sqrt(sum(x^2)))
})
#clean.data.full <- U %*% diag(Sigma) %*% t(V.new)
seu.sub <- SetAssayData(seu.sub, slot = 'scale.data',
new.data = U %*% diag(Sigma) %*% t(V.new))
[40]:
cell.subset <- c()
for(ct in unique(seu.sub@meta.data$anno.by.cluster)){
temp.cells <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct]
temp.sub <- sample(temp.cells,
size = min(2000, length(temp.cells)),
replace = F)
cell.subset <- c(cell.subset,temp.sub)
}
[41]:
seu.forMarkers <- CreateSeuratObject(counts = seu.sub@assays$RNA@scale.data[, cell.subset],
project = 'sub')
[42]:
seu.forMarkers@meta.data$anno.by.cluster <- seu.sub@meta.data[cell.subset,]$anno.by.cluster
[45]:
FindMyMarkers.temp <- function (seu, group.by = NULL, slot = 'data',
order = NULL, features = NULL)
{
if (is.null(group.by)) {
cell.label <- seu@active.ident
}
else {
cell.label <- seu[[group.by]]
}
if (is.null(order)) {
order <- unique(cell.label)[, 1]
}
cell.names <- Cells(seu)
markers <- data.frame()
for (cluster in order) {
temp.markers <- FindMarkers(object = seu, slot = slot,
group.by = group.by,
ident.1 = cluster, verbose = F, features = features)
temp.markers$cluster <- cluster
temp.markers$gene <- row.names(temp.markers)
row.names(temp.markers) <- paste0(temp.markers$gene,
".", as.character(cluster))
markers <- rbind(markers, temp.markers)
}
return(markers)
}
[46]:
plan("sequential")
[47]:
library(future)
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
markers <- FindMyMarkers.temp(seu.forMarkers, slot = 'counts',
group.by = 'anno.by.cluster')
plan("sequential")
Warning message in log(x = mean(x = x) + pseudocount.use):
“NaNs produced”
Warning message in log(x = mean(x = x) + pseudocount.use):
“NaNs produced”
[ ]:
options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
tops <- extractTopNMarkers(df = markers, top_n = 15,
group.order = unique(anno.by.cluster)[order(unique(anno.by.cluster))])
p <- DoHeatmap(object = seu.sub, cells = cell.subset,
group.by = 'anno.by.cluster',
features = tops$gene, draw.lines = FALSE,
size = 5, angle = 45, disp.min = -2, disp.max = 2) +
theme(text = element_text(size = 6)) +
labs(title = 'T cells')
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'T_heatmap_3rdAnnotation.jpg'))
p
[49]:
write.csv(markers, file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'T_markers_3rdAnnotation.csv'))
[58]:
saveRDS(seu.sub,
file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'T_clean_3rdAnnotation.rds'))
4th annotation#
[18]:
annostr <- 'Dead cell 7T-12
Dead cell 8T-7
Dead cell 6T-7
Dead cell 9T-11
Dead cell 4P-3
Dead cell 6P-7
Dead cell 5T-1
Dead cell 5P-9
Dead cell 10T-8
Dead cell 7P-11
Dead cell 11P-4
Dead cell 10P-8
Dead cell 11T-9
Dead cell 8P-1
Dead cell 8T-8
Dump 9P-8
Dump 11P-12
Dump 10P-14
Dump 9P-9
Dump 7P-12
Dump 5T-11
Dump 6T-10
Dump 7T-10
Dump 8T-11
Dump 5T-2
Dump 6T-12
Dump 11P-13
Dump 7P-13
Dump 10P-15
Dump 11T-1
Dump 9T-12
Dump 10P-11
Dump 10T-12
Dump 6P-11
Dump 11T-11
Dump 10T-11
Dump 9P-5
Dump 6P-8'
annolines <- strsplit(annostr, split = '\n')[[1]]
[19]:
anno.ct <- rep(NA, length(old.clusters))
names(anno.ct) <- names(old.clusters)
for(anno in strsplit(annolines, split = '\t')){
anno.ct[old.clusters %in% anno[2]] <- anno[1]
}
[22]:
cells.remove <- union(names(anno.ct)[!is.na(anno.ct)],
Cells(seu)[ seu@active.ident %in% c(5,15,19,21,22)])
[ ]:
DimPlot(seu, reduction = 'umap_harmony',
cells.highlight = cells.remove, pt.size = 0.1, sizes.highlight = 0.1)
[26]:
orig.ident <- seu@active.ident
[28]:
cells.keep <- setdiff(Cells(seu),cells.remove)
seu.sub <- subset(seu, cells = cells.keep)
Warning message:
“All object keys must be alphanumeric characters, followed by an underscore ('_'), setting key to 'umapharmony_'”
[29]:
seu.sub@meta.data$orig.clusters <- orig.ident[Cells(seu.sub)]
[30]:
seu.sub <- RunUMAP(object = seu.sub, reduction.name = 'umap_harmony',
umap.method = 'umap-learn', metric = 'correlation',
reduction = 'harmony', dims = 1:20)
[31]:
seu.sub <- FindNeighbors(seu.sub, reduction = 'harmony', dims = 1:20, verbose = F)
[57]:
anno.by.cluster <- as.character(seu.sub@meta.data$orig.clusters)
anno.by.cluster[ anno.by.cluster %in% c(11)] <- 'CD4-Treg(Th17)'
anno.by.cluster[ anno.by.cluster %in% c(4)] <- 'CD4-Treg(naïve)'
anno.by.cluster[ anno.by.cluster %in% c(13)] <- 'CD4-Th1-like'
anno.by.cluster[ anno.by.cluster %in% c(10,12)] <- 'CD4-Th17-1'
anno.by.cluster[ anno.by.cluster %in% c(18)] <- 'CD4-Th17-2'
anno.by.cluster[ anno.by.cluster %in% c(2)] <- 'naïve'
anno.by.cluster[ anno.by.cluster %in% c(3)] <- 'naïve-RPL'
anno.by.cluster[ anno.by.cluster %in% c(7)] <- 'gdT-antitumor'
anno.by.cluster[ anno.by.cluster %in% c(17)] <- 'CD8-TEM-CX3CR1'
anno.by.cluster[ anno.by.cluster %in% c(16)] <- 'CD8-IFNsignaling'
anno.by.cluster[ anno.by.cluster %in% c(0)] <- 'CD8-TRM-1'
anno.by.cluster[ anno.by.cluster %in% c(14)] <- 'CD8-TRM-2'
anno.by.cluster[ anno.by.cluster %in% c(1)] <- 'CD8-TEM'
anno.by.cluster[ anno.by.cluster %in% c(6)] <- 'CD8-CT-1'
anno.by.cluster[ anno.by.cluster %in% c(9)] <- 'CD8-CT-2'
anno.by.cluster[ anno.by.cluster %in% c(8)] <- 'CD8-Exhausted'
anno.by.cluster[ anno.by.cluster %in% c(20)] <- 'CD8-UK'
ct.order <- c('naïve','naïve-RPL',
'CD4-Th1-like','CD4-Th17-1','CD4-Th17-2',
'CD4-Treg(naïve)','CD4-Treg(Th17)',
'CD8-TRM-1','CD8-TRM-2','CD8-TEM',
'CD8-TEM-CX3CR1','CD8-CT-1','CD8-CT-2','CD8-IFNsignaling',
'CD8-Exhausted','CD8-UK',
'gdT-antitumor')
seu.sub@meta.data$anno.by.cluster <- factor(anno.by.cluster, levels = ct.order)
[ ]:
options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
p1 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'orig.clusters',
reduction = 'umap_harmony') +
labs(title = 'cluster') #+ theme(legend.position = 'none')
p2 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'anno.by.cluster',
reduction = 'umap_harmony') +
labs(title = 'annotation') #+ theme(legend.position = 'none')
p3 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'status',
reduction = 'umap_harmony') +
labs(title = 'status') #+ theme(legend.position = 'none')
p4 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'orig.ident',
reduction = 'umap_harmony') +
labs(title = 'orig.clusters') #+ theme(legend.position = 'none')
p <- arrangeGrob(grobs = list(p1,p2,p3,p4), ncol = 2,
top = paste0('MIBC5-16: harmony T'))
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
'T_overview_4thAnnotation.jpg'))
grid.arrange(p)
[ ]:
[ ]:
library(future)
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
markers <- FindMyMarkers(seu.sub, group.by = 'anno.by.cluster')
plan("sequential")
[ ]:
top10 <- extractTopNMarkers(df = markers, top_n = 20, group.order = ct.order)
sub.cells <- c()
for(c in levels(seu.sub@meta.data$anno.by.cluster)){
sub.cells <- c(sub.cells, sample(Cells(seu.sub)[ seu.sub@meta.data$anno.by.cluster == c ], replace = F,
size = min(1000,sum(seu.sub@meta.data$anno.by.cluster == c))))
}
length(sub.cells)
[ ]:
options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)
p <- DoHeatmap(object = seu.sub, cells = sub.cells,draw.lines = FALSE, group.by = 'anno.by.cluster',
features = top10$gene, size = 5, angle = 30, disp.min = -2, disp.max = 2) +
theme(text = element_text(size = 4))
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
'T_heatmap_4thAnnotation.jpg'))
p
[ ]:
saveRDS(seu.sub, file = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'T_clean_4thAnnotation.rds'))
[1]:
seu.sub <- readRDS(file = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'T_clean_4thAnnotation.rds'))
[3]:
saveRDS(seu.sub@meta.data, file = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'V1_Celltype_T_annotation_withAnno.rds'))
[ ]:
write.csv(markers, file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
'T_markers_4thAnnotation.csv'))
[68]:
# X=U*Sigma*V, U and V are Unitary Matrix, e.g. V^-1=t(V)
# U=X*t(V)*(1/Sigma)
# X(nGene*nCell)=U(nGene*nPC)*Sigma(nPC*nPC)*V(nPC*nCell)
#X <- seu@assays$RNA@scale.data
Sigma <- seu.sub@reductions$pca@stdev
V <- seu.sub@reductions$pca@cell.embeddings[, 1:100]
V <- apply(V,MARGIN = 2,FUN = function(x){
return(x/sqrt(sum(x^2)))
})
# here V(nCell*nPC) is actually t(V)
U <- seu.sub@assays$RNA@scale.data %*% V %*% diag(1/Sigma)
# after batch effect removal, X.new=U*Sigma*V.new
V.new <- seu.sub@reductions$harmony@cell.embeddings[, 1:100]
V.new <- apply(V.new,MARGIN = 2,FUN = function(x){
return(x/sqrt(sum(x^2)))
})
#clean.data.full <- U %*% diag(Sigma) %*% t(V.new)
seu.sub <- SetAssayData(seu.sub, slot = 'scale.data',
new.data = U %*% diag(Sigma) %*% t(V.new))
[ ]:
options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
tops <- extractTopNMarkers(df = markers, top_n = 15, group.order = ct.order)
p <- DoHeatmap(object = seu.sub, cells = sub.cells, group.by = 'anno.by.cluster',
features = tops$gene, draw.lines = FALSE,
size = 5, angle = 30, disp.min = -2, disp.max = 2) +
theme(text = element_text(size = 6)) +
labs(title = 'T cells')
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
'T_heatmap_4thAnnotation_recons.jpg'))
p
[ ]:
compare cell type composition of each sample, P&T#
[71]:
stats <- table(seu.sub@meta.data$anno.by.cluster, seu.sub@meta.data$orig.ident)
stats <- as.data.frame(stats)
names(stats) <- c('cluster', 'origin', 'cell.number')
color <- rep(c(rep('white',length(unique(seu.sub@meta.data$anno.by.cluster))),
rep('black',length(unique(seu.sub@meta.data$anno.by.cluster)))), 12)
p5 <- ggplot(stats, aes(fill=cluster, y=cell.number, x=origin)) +
geom_bar( stat="identity", position="fill", color = color) +
theme(axis.text.x = element_text(size=10, angle = 90))
options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)
p5
[72]:
stats <- table(seu.sub@meta.data$anno.by.cluster, seu.sub@meta.data$orig.ident)
stats <- apply(stats, MARGIN = 2,
FUN = function(x) {return(x/sum(x))})
stats <- t(apply(stats, MARGIN = 1,
FUN = function(x) {return(x/max(x))}))
stats <- as.data.frame(as.table(stats))
colnames(stats) <- c('celltype','sample','Freq')
stats$patient <- substr(stats$sample, start = 5,
stop = sapply(as.character(stats$sample),FUN=nchar)-1)
stats$patient <- factor(stats$patient, levels=5:16)
stats$status <- substr(stats$sample,
start = sapply(as.character(stats$sample),FUN=nchar),
stop = sapply(as.character(stats$sample),FUN=nchar))
stats$status <- factor(stats$status, levels=c('P','T'))
[73]:
p <- ggplot(data = stats,
aes(x=status, y=Freq, color=patient, group=patient)) +
geom_line() + geom_point(size=4, shape=20)
[74]:
p + facet_wrap(. ~ celltype)
[75]:
library(pheatmap)
[80]:
stats <- table(seu.sub@meta.data$anno.by.cluster, seu.sub@meta.data$orig.ident)
stats <- apply(stats, MARGIN = 2,
FUN = function(x) {return(x/sum(x))})
stats <- t(apply(stats, MARGIN = 1,
FUN = function(x) {return(x/max(x))}))
samples <- colnames(stats)
patient <- substr(samples, start = 5,
stop = sapply(samples,FUN=nchar)-1)
patient <- factor(patient, levels=5:16)
status <- substr(samples, start = sapply(samples,FUN=nchar),
stop = sapply(samples,FUN=nchar))
status <- factor(status, levels=c('P','T'))
row.anno <- data.frame(row.names = colnames(stats),
patient = patient, status = status)
col.anno <- data.frame(row.names = row.names(stats),
CT = substr(row.names(stats),
start = 1, stop=3))
[81]:
pheatmap(t(stats), cutree_rows = 6, cutree_cols = 5,
annotation_row = row.anno, annotation_col = col.anno)
[ ]:
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
markers <- FindMyMarkers.temp(seu.forMarkers, slot = 'counts',
group.by = 'anno.by.cluster')
plan("sequential")
[178]:
cells.P <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
seu.sub@meta.data$status == 'P']
cells.T <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
seu.sub@meta.data$status == 'T']
temp.cluster <- rep(0, length(Cells(seu.sub)))
names(temp.cluster) <- Cells(seu.sub)
temp.cluster[cells.P] <- 'P'
temp.cluster[cells.T] <- 'T'
seu.sub@meta.data$temp.cluster <- temp.cluster
[197]:
plan("sequential")
[198]:
markers.within.ct <- data.frame()
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
for(ct in row.names(stats)){
cells.P <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
seu.sub@meta.data$status == 'P']
cells.T <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
seu.sub@meta.data$status == 'T']
temp.cluster <- rep(0, length(Cells(seu.sub)))
names(temp.cluster) <- Cells(seu.sub)
temp.cluster[cells.P] <- 'P'
temp.cluster[cells.T] <- 'T'
seu.sub@meta.data$temp.cluster <- temp.cluster
temp.markers <- FindMarkers(object = seu.sub, group.by = 'temp.cluster',
ident.1 = 'P',ident.2 = 'T')
temp.markers$cluster <- ct
temp.markers$gene <- as.character(row.names(temp.markers))
markers.within.ct <- rbind(markers.within.ct,temp.markers)
}
plan("sequential")
[199]:
write.csv(markers.within.ct,
file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
'markers_compare_PT.csv'))
[185]:
genes <- row.names(seu.sub)
numbers of cells from each sample cluster to each merge cluster#
[189]:
cell.nb.table <- as.matrix(table(old.clusters[ Cells(seu.sub)],
as.character(seu.sub@active.ident)))
cell.nb.table <- cbind(cell.nb.table, rowSums(cell.nb.table))
[193]:
cell.nb.table.pct <- t(apply(cell.nb.table, MARGIN = 1,
FUN = function(x){ return(x/x[19])}))
[194]:
write.csv(cell.nb.table,
file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
'Table_sampleC-mergeC_number.csv'))
write.csv(cell.nb.table.pct,
file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
'Table_sampleC-mergeC_percentage.csv'))