T annotation

Contents

[1]:

library(Seurat)
library(gridExtra)
library(Matrix)
library(ggplot2)

T annotation#

[2]:

source('/home/public/ghx/software/my_seurat.r')

[3]:

RDSPath <- '/home/public/ghx/wuLab/Bladder/1217_all/seu'

[4]:

ct <- 'T'

[5]:

seu <- readRDS(file = file.path('/home/public/ghx/wuLab/Bladder/1217_all/seu',
                                'V1_Celltype_T_seu.rds'))

[6]:

old.clusters <- readRDS(file = '/home/public/ghx/wuLab/Bladder/1119_T/sampleCluster.rds')
new.clusters <- readRDS(file = '/home/public/ghx/wuLab/Bladder/1217_all/CellTypes/T/sample_cluster.rds')

[7]:

sample.id <- sapply(old.clusters, USE.NAMES = T, simplify = T,
       FUN = function(x){
           return(strsplit(x, split = '-', fixed = T)[[1]][1])
       })
sample.id <- paste0('MIBC',sample.id)
names(old.clusters) <- paste0(sample.id, '.',names(old.clusters))

3rd annotation#

[20]:

annostr <- '11T-0   CD4-?
6P-1        CD4-?
7T-4        CD4-?
11T-4       CD4-?
5P-4        CD4-?
8P-3        CD4-?
11P-1       CD4-?
7T-6        CD4-UK1
9T-3        CD4-UK1
6T-5        CD4-UK1
6P-6        CD4-UK1
5T-3        CD4-UK1
8T-14       CD4-UK1
10T-5       CD4-naïve
9T-10       CD4-naïve
10P-10      CD4-naïve-RPL
7T-2        CD4-naïve-RPL
5T-6        CD4-naïve-RPL
9P-2        CD4-naïve-RPL
10T-4       CD4-naïve-RPL
9T-4        CD4-naïve-RPL
5P-3        CD4-naïve-RPL
8P-5        CD4-naïve-RPL
11T-5       CD4-naïve
10T-1       CD4-naïve
10T-2       CD4-naïve
10P-2       CD4-naïve
6T-1        CD4-RPL
11T-3       CD4-RPL
6P-2        CD4-RPL
4P-5        CD4-Th17
8T-5        CD4-Th17
11P-9       CD4-Th17
11P-0       CD4-Th17
10P-1       CD4-Th17
5T-2        CD4-Th17
7P-10       CD4-Th17
10P-5       CD4-Th17
8P-8        CD4-Th17
5P-1        CD4-Th17
7P-8        CD4-Th17
5T-8        CD4-Th17
7P-2        CD4-Th17
9P-0        CD4-Th17
5P-8        CD4-Tr1
4P-2        CD4-Tr1
6T-10       CD4-Treg
9T-9        CD4-Treg
8P-7        CD4-Treg
8T-4        CD4-Treg
10T-6       CD4-Treg
7T-1        CD4-Treg
7P-6        CD4-Treg
11T-6       CD4-Treg
5T-11       CD4-Treg
11P-10      CD4-Treg(naïve)
6P-4        CD4-Treg(naïve)
5T-5        CD4-Treg(naïve)
6T-2        CD4-Treg(naïve)
9P-7        CD4-Treg(naïve)
4P-4        CD4-Treg(naïve)
11T-1       CD4-Treg(naïve)
5P-10       CD4-Treg(naïve)
10P-12      CD4-Treg(Th17)
9T-1        CD4-Treg(Th17)
6T-6        CD4-Treg(Th17)
5T-0        CD8-TEM
4T-0        CD8-?
4T-1        CD8-?
5T-4        CD8-CT
4T-3        CD8-CT
9T-5        CD8-CT
4T-5        CD8-CT
6T-4        CD8-CT
6T-12       CD8-Exhau
7T-11       CD8-Exhau
4T-7        CD8-Exhau
4T-2        CD8-Exhau
6P-8        CD8-HSP
11P-6       CD8-HSP
9T-12       CD8-HSP
10P-7       CD8-HSP
5T-9        CD8-HSP
4T-4        CD8-HSP
9T-7        CD8-IFNsignaling
4T-8        CD8-IFNsignaling
7P-13       CD8-IG
11P-13      CD8-IG
7T-9        CD8-IG
11T-11      CD8-IG
10T-11      CD8-IG
10T-10      CD8-MAIT
6T-9        CD8-MAIT
11T-7       CD8-MAIT
6P-12       CD8-MAIT
6P-9        CD8-RPL
7P-9        CD8-RPL
9P-6        CD8-TEM
8P-9        CD8-TEM
6P-10       CD8-TEM
7T-5        CD8-TEM
11T-2       CD8-TEM
8P-4        CD8-TEM
9P-3        CD8-TEM
10T-9       CD8-TRM
5P-0        CD8-TRM
9P-1        CD8-TTE
6T-3        CD8-virus-responding
6T-0        CD8-virus-responding
4P-3        Deadcell
8T-7        Deadcell
6T-7        Deadcell
6P-7        Deadcell
8T-8        Deadcell
5T-1        Deadcell
10T-8       Deadcell
7P-11       Deadcell
11T-9       Deadcell
9T-11       Deadcell
7T-12       Deadcell
10P-8       Deadcell
5P-9        Deadcell
8P-1        Deadcell
11P-4       Deadcell
11P-12      Dump
10T-12      Dump
4T-11       Dump
11T-10      Dump
8P-12       Dump
10P-11      Dump
9P-8        Dump
9P-5        Dump
10P-14      Dump
8T-2        gdT-antitumor
9T-8        gdT-antitumor
8P-13       gdT-antitumor
5P-11       gdT-antitumor
6T-11       gdT-antitumor
11P-8       gdT-antitumor
10T-13      gdT-antitumor
9P-4        gdT-antitumor
7P-12       gdT-antitumor
10P-9       gdT-antitumor
7T-7        gdT-antitumor
11T-8       gdT-antitumor
8T-6        gdT-antitumor
8T-9        gdT-antitumor
8T-13       gdT-antitumor
5T-12       gdT-antitumor
8P-10       gdT-antitumor
4T-10       gdT-antitumor
4P-6        gdT-antitumor
4T-9        Tunconv
5T-10       Tunconv
8P-11       CD8-TEM
7T-0        CD8-CT
7T-3        CD8-CT
8T-3        CD8-Exhau
9T-0        CD8-TEM
9T-6        CD8-TEM
8T-1        CD8-RPL
11P-7       CD8-TEM
6P-0        CD8-CT
8P-2        CD8-CT
6P-3        CD8-TEM
4P-0        CD8-CT
10P-3       CD8-CT
5P-2        CD8-TEM
11P-5       CD8-TEM
11P-2       CD8-TRM
8T-11       CD8-HSP
4P-1        CD8-TRM
11P-11      CD8-TEM
5P-5        CD8-TRM
8P-0        CD8-TRM
8P-6        CD8-Exhau
7P-7        CD8-TRM
9T-2        CD8-?
7P-5        CD8-TEM
8T-0        CD8-TEM
8T-10       CD8-TEM
7P-3        CD8-TEM
10P-13      CD8-TRM
8T-12       CD8-IFNsignaling
4T-6        gdT-antitumor
10T-7       CD8-CT
10T-0       CD8-TEM
10P-4       CD8-TEM
7P-4        CD8-TEM
7P-0        CD8-TEM
11P-3       CD8-TTE
10P-6       CD8-?
6T-8        CD8-CT
7T-10       CD8-?
5T-7        CD8-TRM'
annolines <- strsplit(annostr, split = '\n')[[1]]

[22]:

anno.ct <- rep(NA, length(old.clusters))
names(anno.ct) <- names(old.clusters)
for(anno in strsplit(annolines, split = '\t')){
    anno.ct[old.clusters %in% anno[1]] <- anno[2]
}

[23]:

intersect.cells <- intersect(names(anno.ct), Cells(seu))
anno.ct.seu <- rep(NA, length(Cells(seu)))
names(anno.ct.seu) <- Cells(seu)
anno.ct.seu[intersect.cells] <- anno.ct[intersect.cells]
seu@meta.data$anno.ct <- anno.ct.seu

[ ]:

options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
p1 <- DimPlot(seu, label = T, pt.size = 0.1,
              reduction = 'umap_harmony') +
     labs(title = 'cluster') #+ theme(legend.position = 'none')
p2 <- DimPlot(seu, label = T, pt.size = 0.1, group.by = 'orig.ident',
              reduction = 'umap_harmony') +
     labs(title = 'sample.origin') #+ theme(legend.position = 'none')
p3 <- DimPlot(seu, label = T, pt.size = 0.1, group.by = 'status',
              reduction = 'umap_harmony') +
     labs(title = 'status') #+ theme(legend.position = 'none')
p4 <- DimPlot(seu, label = T, pt.size = 0.1, group.by = 'patient',
              reduction = 'umap_harmony') +
     labs(title = 'patient') #+ theme(legend.position = 'none')
p <- arrangeGrob(grobs = list(p1,p2,p3,p4), ncol = 2,
                top = paste0('MIBC5-16: raw merge'))
grid.arrange(p)

[ ]:

options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)

cells.1 <- Cells(seu)[ is.na(anno.ct.seu) ]
cells.2 <- Cells(seu)[ !is.na(anno.ct.seu) ]
DimPlot(seu, label = T, pt.size = 0.1, group.by = 'anno.ct', cells = c(cells.1,cells.2),
        reduction = 'umap_harmony') +
     labs(title = 'annotated cell types')+ theme(legend.position = 'none')

[26]:

plots <- list()
for(temp.ct in unique(anno.ct.seu)){
    if(is.na(temp.ct)) next
    p <- DimPlot(seu, label = T, pt.size = 0.1, sizes.highlight = 0.2,
                 cells.highlight = Cells(seu)[seu@meta.data$anno.ct == temp.ct],
                reduction = 'umap_harmony') +
             labs(title = temp.ct)+ theme(legend.position = 'none')
    plots[[temp.ct]] <- p
}

[27]:

p <- arrangeGrob(grobs = plots, ncol = 2,
                top = paste0('annotated cell type'))

[28]:

length(plots)

26

[ ]:

options(repr.plot.width=10, repr.plot.height=65, repr.plot.res = 200)
grid.arrange(p)

[32]:

anno.by.cluster <- as.character(seu@active.ident)

[33]:

anno.by.cluster[ anno.by.cluster %in% c(5,15,19,21,22)] <- 'Deadcell'
anno.by.cluster[ anno.by.cluster %in% c(11)] <- 'CD4-Treg(Th17)'
anno.by.cluster[ anno.by.cluster %in% c(4)] <- 'CD4-Treg(naïve)'
anno.by.cluster[ anno.by.cluster %in% c(13)] <- 'CD4-UK1'
anno.by.cluster[ anno.by.cluster %in% c(10,12,18)] <- 'CD4-Th17'
anno.by.cluster[ anno.by.cluster %in% c(2,3)] <- 'CD4-naïve-RPL'
anno.by.cluster[ anno.by.cluster %in% c(7)] <- 'gdT-antitumor'
anno.by.cluster[ anno.by.cluster %in% c(17)] <- 'CD8-TTE'
anno.by.cluster[ anno.by.cluster %in% c(16)] <- 'CD8-IFNsignaling'
anno.by.cluster[ anno.by.cluster %in% c(0,14)] <- 'CD8-TRM'
anno.by.cluster[ anno.by.cluster %in% c(1,6,8,9,20)] <- 'CD8-CT'
anno.by.cluster[ anno.by.cluster %in% c(14)] <- '14'

[34]:

seu@meta.data$anno.by.cluster <- anno.by.cluster

[35]:

seu.sub <- SubsetData(seu,
                      cells = Cells(seu)[seu@meta.data$anno.by.cluster != 'Deadcell'])

Warning message:
“'SubsetData' is deprecated.
Use 'subset' instead.
See help("Deprecated")”
Warning message:
“'OldWhichCells' is deprecated.
Use 'WhichCells' instead.
See help("Deprecated")”
Warning message:
“All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to umap_”

[36]:

rm(seu)
gc()

A matrix: 2 × 6 of type dbl
	used	(Mb)	gc trigger	(Mb)	max used	(Mb)
Ncells	2491200	133.1	4237052	226.3	4237052	226.3
Vcells	7294076914	55649.4	10463667213	79831.5	7299759682	55692.8

[37]:

seu.sub <- RunUMAP(object = seu.sub, reduction.name = 'umap_harmony',
               umap.method = 'umap-learn', metric = 'correlation',
               reduction = 'harmony', dims = 1:20)

[ ]:

options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
p1 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'anno.by.cluster',
              reduction = 'umap_harmony') +
     labs(title = 'annotation') #+ theme(legend.position = 'none')
p2 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'orig.ident',
              reduction = 'umap_harmony') +
     labs(title = 'sample.origin') #+ theme(legend.position = 'none')
p3 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'status',
              reduction = 'umap_harmony') +
     labs(title = 'status') #+ theme(legend.position = 'none')
p4 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'patient',
              reduction = 'umap_harmony') +
     labs(title = 'patient') #+ theme(legend.position = 'none')
p <- arrangeGrob(grobs = list(p1,p2,p3,p4), ncol = 2,
                top = paste0('MIBC5-16: raw merge'))
grid.arrange(p)

[39]:

# X=U*Sigma*V, U and V are Unitary Matrix, e.g. V^-1=t(V)
# U=X*t(V)*(1/Sigma)
# X(nGene*nCell)=U(nGene*nPC)*Sigma(nPC*nPC)*V(nPC*nCell)
#X <- seu@assays$RNA@scale.data
Sigma <- seu.sub@reductions$pca@stdev
V <- seu.sub@reductions$pca@cell.embeddings[, 1:100]
V <- apply(V,MARGIN = 2,FUN = function(x){
    return(x/sqrt(sum(x^2)))
})
# here V(nCell*nPC) is actually t(V)
U <- seu.sub@assays$RNA@scale.data %*% V %*% diag(1/Sigma)
# after batch effect removal, X.new=U*Sigma*V.new
V.new <- seu.sub@reductions$harmony@cell.embeddings[, 1:100]
V.new <- apply(V.new,MARGIN = 2,FUN = function(x){
    return(x/sqrt(sum(x^2)))
})
#clean.data.full <- U %*% diag(Sigma) %*% t(V.new)
seu.sub <- SetAssayData(seu.sub, slot = 'scale.data',
                    new.data = U %*% diag(Sigma) %*% t(V.new))

[40]:

cell.subset <- c()
for(ct in unique(seu.sub@meta.data$anno.by.cluster)){
    temp.cells <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct]
    temp.sub <- sample(temp.cells,
                       size = min(2000, length(temp.cells)),
                       replace = F)
    cell.subset <- c(cell.subset,temp.sub)
}

[41]:

seu.forMarkers <- CreateSeuratObject(counts = seu.sub@assays$RNA@scale.data[, cell.subset],
                              project = 'sub')

[42]:

seu.forMarkers@meta.data$anno.by.cluster <- seu.sub@meta.data[cell.subset,]$anno.by.cluster

[45]:

FindMyMarkers.temp <- function (seu, group.by = NULL, slot = 'data',
                                order = NULL, features = NULL)
{
    if (is.null(group.by)) {
        cell.label <- seu@active.ident
    }
    else {
        cell.label <- seu[[group.by]]
    }
    if (is.null(order)) {
        order <- unique(cell.label)[, 1]
    }
    cell.names <- Cells(seu)
    markers <- data.frame()
    for (cluster in order) {
        temp.markers <- FindMarkers(object = seu, slot = slot,
                                    group.by = group.by,
            ident.1 = cluster, verbose = F, features = features)
        temp.markers$cluster <- cluster
        temp.markers$gene <- row.names(temp.markers)
        row.names(temp.markers) <- paste0(temp.markers$gene,
            ".", as.character(cluster))
        markers <- rbind(markers, temp.markers)
    }
    return(markers)
}

[46]:

plan("sequential")

[47]:

library(future)
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
markers <- FindMyMarkers.temp(seu.forMarkers, slot = 'counts',
                              group.by = 'anno.by.cluster')
plan("sequential")

Warning message in log(x = mean(x = x) + pseudocount.use):
“NaNs produced”
Warning message in log(x = mean(x = x) + pseudocount.use):
“NaNs produced”

[ ]:

options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
tops <- extractTopNMarkers(df = markers, top_n = 15,
                           group.order = unique(anno.by.cluster)[order(unique(anno.by.cluster))])
p <- DoHeatmap(object = seu.sub, cells = cell.subset,
               group.by = 'anno.by.cluster',
               features = tops$gene, draw.lines = FALSE,
               size = 5, angle = 45, disp.min = -2, disp.max = 2) +
        theme(text = element_text(size = 6)) +
        labs(title = 'T cells')
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
       filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                            'T_heatmap_3rdAnnotation.jpg'))
p

[49]:

write.csv(markers, file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                            'T_markers_3rdAnnotation.csv'))

[58]:

saveRDS(seu.sub,
        file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                  'T_clean_3rdAnnotation.rds'))

4th annotation#

[18]:

annostr <- 'Dead cell       7T-12
Dead cell   8T-7
Dead cell   6T-7
Dead cell   9T-11
Dead cell   4P-3
Dead cell   6P-7
Dead cell   5T-1
Dead cell   5P-9
Dead cell   10T-8
Dead cell   7P-11
Dead cell   11P-4
Dead cell   10P-8
Dead cell   11T-9
Dead cell   8P-1
Dead cell   8T-8
Dump        9P-8
Dump        11P-12
Dump        10P-14
Dump        9P-9
Dump        7P-12
Dump        5T-11
Dump        6T-10
Dump        7T-10
Dump        8T-11
Dump        5T-2
Dump        6T-12
Dump        11P-13
Dump        7P-13
Dump        10P-15
Dump        11T-1
Dump        9T-12
Dump        10P-11
Dump        10T-12
Dump        6P-11
Dump        11T-11
Dump        10T-11
Dump        9P-5
Dump        6P-8'
annolines <- strsplit(annostr, split = '\n')[[1]]

[19]:

anno.ct <- rep(NA, length(old.clusters))
names(anno.ct) <- names(old.clusters)
for(anno in strsplit(annolines, split = '\t')){
    anno.ct[old.clusters %in% anno[2]] <- anno[1]
}

[22]:

cells.remove <- union(names(anno.ct)[!is.na(anno.ct)],
                      Cells(seu)[ seu@active.ident  %in% c(5,15,19,21,22)])

[ ]:

DimPlot(seu, reduction = 'umap_harmony',
        cells.highlight = cells.remove, pt.size = 0.1, sizes.highlight = 0.1)

[26]:

orig.ident <- seu@active.ident

[28]:

cells.keep <- setdiff(Cells(seu),cells.remove)
seu.sub <- subset(seu, cells = cells.keep)

Warning message:
“All object keys must be alphanumeric characters, followed by an underscore ('_'), setting key to 'umapharmony_'”

[29]:

seu.sub@meta.data$orig.clusters <- orig.ident[Cells(seu.sub)]

[30]:

seu.sub <- RunUMAP(object = seu.sub, reduction.name = 'umap_harmony',
               umap.method = 'umap-learn', metric = 'correlation',
               reduction = 'harmony', dims = 1:20)

[31]:

seu.sub <- FindNeighbors(seu.sub, reduction = 'harmony', dims = 1:20, verbose = F)

[57]:

anno.by.cluster <- as.character(seu.sub@meta.data$orig.clusters)
anno.by.cluster[ anno.by.cluster %in% c(11)] <- 'CD4-Treg(Th17)'
anno.by.cluster[ anno.by.cluster %in% c(4)] <- 'CD4-Treg(naïve)'
anno.by.cluster[ anno.by.cluster %in% c(13)] <- 'CD4-Th1-like'
anno.by.cluster[ anno.by.cluster %in% c(10,12)] <- 'CD4-Th17-1'
anno.by.cluster[ anno.by.cluster %in% c(18)] <- 'CD4-Th17-2'
anno.by.cluster[ anno.by.cluster %in% c(2)] <- 'naïve'
anno.by.cluster[ anno.by.cluster %in% c(3)] <- 'naïve-RPL'
anno.by.cluster[ anno.by.cluster %in% c(7)] <- 'gdT-antitumor'
anno.by.cluster[ anno.by.cluster %in% c(17)] <- 'CD8-TEM-CX3CR1'
anno.by.cluster[ anno.by.cluster %in% c(16)] <- 'CD8-IFNsignaling'
anno.by.cluster[ anno.by.cluster %in% c(0)] <- 'CD8-TRM-1'
anno.by.cluster[ anno.by.cluster %in% c(14)] <- 'CD8-TRM-2'
anno.by.cluster[ anno.by.cluster %in% c(1)] <- 'CD8-TEM'
anno.by.cluster[ anno.by.cluster %in% c(6)] <- 'CD8-CT-1'
anno.by.cluster[ anno.by.cluster %in% c(9)] <- 'CD8-CT-2'
anno.by.cluster[ anno.by.cluster %in% c(8)] <- 'CD8-Exhausted'
anno.by.cluster[ anno.by.cluster %in% c(20)] <- 'CD8-UK'
ct.order <- c('naïve','naïve-RPL',
              'CD4-Th1-like','CD4-Th17-1','CD4-Th17-2',
              'CD4-Treg(naïve)','CD4-Treg(Th17)',
              'CD8-TRM-1','CD8-TRM-2','CD8-TEM',
              'CD8-TEM-CX3CR1','CD8-CT-1','CD8-CT-2','CD8-IFNsignaling',
              'CD8-Exhausted','CD8-UK',
              'gdT-antitumor')
seu.sub@meta.data$anno.by.cluster <- factor(anno.by.cluster, levels = ct.order)

[ ]:

options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
p1 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'orig.clusters',
              reduction = 'umap_harmony') +
     labs(title = 'cluster') #+ theme(legend.position = 'none')
p2 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'anno.by.cluster',
              reduction = 'umap_harmony') +
     labs(title = 'annotation') #+ theme(legend.position = 'none')
p3 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'status',
              reduction = 'umap_harmony') +
     labs(title = 'status') #+ theme(legend.position = 'none')
p4 <- DimPlot(seu.sub, label = T, pt.size = 0.1, group.by = 'orig.ident',
              reduction = 'umap_harmony') +
     labs(title = 'orig.clusters') #+ theme(legend.position = 'none')
p <- arrangeGrob(grobs = list(p1,p2,p3,p4), ncol = 2,
                top = paste0('MIBC5-16: harmony T'))
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
       filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
                            'T_overview_4thAnnotation.jpg'))
grid.arrange(p)

[ ]:

[ ]:

library(future)
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
markers <- FindMyMarkers(seu.sub, group.by = 'anno.by.cluster')
plan("sequential")

[ ]:

top10 <- extractTopNMarkers(df = markers, top_n = 20, group.order = ct.order)
sub.cells <- c()
for(c in levels(seu.sub@meta.data$anno.by.cluster)){
    sub.cells <- c(sub.cells, sample(Cells(seu.sub)[ seu.sub@meta.data$anno.by.cluster == c ], replace = F,
                                     size = min(1000,sum(seu.sub@meta.data$anno.by.cluster == c))))
}
length(sub.cells)

[ ]:

options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)
p <- DoHeatmap(object = seu.sub, cells = sub.cells,draw.lines = FALSE, group.by = 'anno.by.cluster',
          features = top10$gene, size = 5, angle = 30, disp.min = -2, disp.max = 2) +
theme(text = element_text(size = 4))
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
       filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
                            'T_heatmap_4thAnnotation.jpg'))
p

[ ]:

saveRDS(seu.sub, file = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                                  'T_clean_4thAnnotation.rds'))

[1]:

seu.sub <- readRDS(file = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                                  'T_clean_4thAnnotation.rds'))

[3]:

saveRDS(seu.sub@meta.data, file = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                                  'V1_Celltype_T_annotation_withAnno.rds'))

[ ]:

write.csv(markers, file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
                            'T_markers_4thAnnotation.csv'))

[68]:

# X=U*Sigma*V, U and V are Unitary Matrix, e.g. V^-1=t(V)
# U=X*t(V)*(1/Sigma)
# X(nGene*nCell)=U(nGene*nPC)*Sigma(nPC*nPC)*V(nPC*nCell)
#X <- seu@assays$RNA@scale.data
Sigma <- seu.sub@reductions$pca@stdev
V <- seu.sub@reductions$pca@cell.embeddings[, 1:100]
V <- apply(V,MARGIN = 2,FUN = function(x){
    return(x/sqrt(sum(x^2)))
})
# here V(nCell*nPC) is actually t(V)
U <- seu.sub@assays$RNA@scale.data %*% V %*% diag(1/Sigma)
# after batch effect removal, X.new=U*Sigma*V.new
V.new <- seu.sub@reductions$harmony@cell.embeddings[, 1:100]
V.new <- apply(V.new,MARGIN = 2,FUN = function(x){
    return(x/sqrt(sum(x^2)))
})
#clean.data.full <- U %*% diag(Sigma) %*% t(V.new)
seu.sub <- SetAssayData(seu.sub, slot = 'scale.data',
                    new.data = U %*% diag(Sigma) %*% t(V.new))

[ ]:

options(repr.plot.width=20, repr.plot.height=20, repr.plot.res = 200)
tops <- extractTopNMarkers(df = markers, top_n = 15, group.order = ct.order)
p <- DoHeatmap(object = seu.sub, cells = sub.cells, group.by = 'anno.by.cluster',
               features = tops$gene, draw.lines = FALSE,
               size = 5, angle = 30, disp.min = -2, disp.max = 2) +
        theme(text = element_text(size = 6)) +
        labs(title = 'T cells')
ggsave(plot = p, limitsize = FALSE, width = 20, height = 20,
       filename = file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
                            'T_heatmap_4thAnnotation_recons.jpg'))
p

[ ]:

compare cell type composition of each sample, P&T#

[71]:

stats <- table(seu.sub@meta.data$anno.by.cluster, seu.sub@meta.data$orig.ident)
stats <- as.data.frame(stats)
names(stats) <- c('cluster', 'origin', 'cell.number')
color <- rep(c(rep('white',length(unique(seu.sub@meta.data$anno.by.cluster))),
               rep('black',length(unique(seu.sub@meta.data$anno.by.cluster)))), 12)
p5 <- ggplot(stats, aes(fill=cluster, y=cell.number, x=origin)) +
        geom_bar( stat="identity", position="fill", color = color) +
        theme(axis.text.x = element_text(size=10, angle = 90))
options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)
p5

../../_images/notebooks_Cell_type_analysis_T_annotation_68_0.png

[72]:

stats <- table(seu.sub@meta.data$anno.by.cluster, seu.sub@meta.data$orig.ident)
stats <- apply(stats, MARGIN = 2,
               FUN = function(x) {return(x/sum(x))})
stats <- t(apply(stats, MARGIN = 1,
               FUN = function(x) {return(x/max(x))}))
stats <- as.data.frame(as.table(stats))
colnames(stats) <- c('celltype','sample','Freq')
stats$patient <- substr(stats$sample, start = 5,
                        stop = sapply(as.character(stats$sample),FUN=nchar)-1)
stats$patient <- factor(stats$patient, levels=5:16)
stats$status <- substr(stats$sample,
                       start = sapply(as.character(stats$sample),FUN=nchar),
                       stop = sapply(as.character(stats$sample),FUN=nchar))
stats$status <- factor(stats$status, levels=c('P','T'))

[73]:

p <- ggplot(data = stats,
            aes(x=status, y=Freq, color=patient, group=patient)) +
      geom_line() + geom_point(size=4, shape=20)

[74]:

p + facet_wrap(. ~ celltype)

../../_images/notebooks_Cell_type_analysis_T_annotation_71_0.png

[75]:

library(pheatmap)

[80]:

stats <- table(seu.sub@meta.data$anno.by.cluster, seu.sub@meta.data$orig.ident)
stats <- apply(stats, MARGIN = 2,
               FUN = function(x) {return(x/sum(x))})
stats <- t(apply(stats, MARGIN = 1,
               FUN = function(x) {return(x/max(x))}))
samples <- colnames(stats)
patient <- substr(samples, start = 5,
                  stop = sapply(samples,FUN=nchar)-1)
patient <- factor(patient, levels=5:16)
status <- substr(samples, start = sapply(samples,FUN=nchar),
                  stop = sapply(samples,FUN=nchar))
status <- factor(status, levels=c('P','T'))
row.anno <- data.frame(row.names = colnames(stats),
                       patient = patient, status = status)
col.anno <- data.frame(row.names = row.names(stats),
                        CT = substr(row.names(stats),
                                   start = 1, stop=3))

[81]:

pheatmap(t(stats), cutree_rows = 6, cutree_cols = 5,
         annotation_row = row.anno, annotation_col = col.anno)

../../_images/notebooks_Cell_type_analysis_T_annotation_74_0.png

[ ]:

plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
markers <- FindMyMarkers.temp(seu.forMarkers, slot = 'counts',
                              group.by = 'anno.by.cluster')
plan("sequential")

[178]:

cells.P <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
                          seu.sub@meta.data$status == 'P']
cells.T <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
                          seu.sub@meta.data$status == 'T']
temp.cluster <- rep(0, length(Cells(seu.sub)))
names(temp.cluster) <- Cells(seu.sub)
temp.cluster[cells.P] <- 'P'
temp.cluster[cells.T] <- 'T'
seu.sub@meta.data$temp.cluster <- temp.cluster

[197]:

plan("sequential")

[198]:

markers.within.ct <- data.frame()
plan("multiprocess", workers = 12)
options(future.globals.maxSize = 10000 * 1024^2)
for(ct in row.names(stats)){
    cells.P <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
                              seu.sub@meta.data$status == 'P']
    cells.T <- Cells(seu.sub)[seu.sub@meta.data$anno.by.cluster == ct &
                              seu.sub@meta.data$status == 'T']
    temp.cluster <- rep(0, length(Cells(seu.sub)))
    names(temp.cluster) <- Cells(seu.sub)
    temp.cluster[cells.P] <- 'P'
    temp.cluster[cells.T] <- 'T'
    seu.sub@meta.data$temp.cluster <- temp.cluster
    temp.markers <- FindMarkers(object = seu.sub, group.by = 'temp.cluster',
                                ident.1 = 'P',ident.2 = 'T')
    temp.markers$cluster <- ct
    temp.markers$gene <- as.character(row.names(temp.markers))
    markers.within.ct <- rbind(markers.within.ct,temp.markers)

}
plan("sequential")

[199]:

write.csv(markers.within.ct,
          file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation',
                    'markers_compare_PT.csv'))

[185]:

genes <- row.names(seu.sub)

numbers of cells from each sample cluster to each merge cluster#

[189]:

cell.nb.table <- as.matrix(table(old.clusters[ Cells(seu.sub)],
                                 as.character(seu.sub@active.ident)))
cell.nb.table <- cbind(cell.nb.table, rowSums(cell.nb.table))

[193]:

cell.nb.table.pct <- t(apply(cell.nb.table, MARGIN = 1,
                      FUN = function(x){ return(x/x[19])}))

[194]:

write.csv(cell.nb.table,
          file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
                    'Table_sampleC-mergeC_number.csv'))
write.csv(cell.nb.table.pct,
          file.path('/home/public/ghx/wuLab/Bladder/0101_T_annotation/results',
                    'Table_sampleC-mergeC_percentage.csv'))