In [None]:
# ..... annotate cell clusters ..... #

In [2]:
library(Seurat)
library(SingleCellExperiment)
library(MetaMarkers)
library(data.table)
library(dplyr)

In [3]:
# load list of orthologs
om = read.delim('human_chicken_orthologs_biomart.txt', sep = '\t')
colnames(om) <- c('human', 'chicken', 'LCA', 'homology_type')
om <- om[om[,1]!='' & om[,2]!='' & om$homology_type=='ortholog_one2one',]
dim(om)
om[1,]

Unnamed: 0_level_0,human,chicken,LCA,homology_type
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
6,MT-ND1,ND1,Amniota,ortholog_one2one


In [4]:
# function to get top 100 markers for each cell type
get_top_markers <- function(markers, ctypes){
    markers$rank = NA
    for(ii in 1:length(ctypes)){
        id = which(markers$cell_type==ctypes[ii])
        markers$rank[id] = 1:length(id)
    }
    return(markers)
}

In [5]:
# load class markers
markers = fread('~/septation/markers/Knight-Schrijver/Knight-Schrijver_celltype_markers_F1_F2_res2.csv.gz')
markers$gene <- om$chicken[match(markers$gene, om$human)]
markers <- markers[!is.na(markers$gene),]
ctypes = unique(markers$cell_type)

# subclass markers stratified by class
markers2 = fread('~/septation/markers/Knight-Schrijver/Knight-Schrijver_celltype_markers_F1_F2_res3.csv.gz')
markers2$gene <- om$chicken[match(markers2$gene, om$human)]
markers2 <- markers2[!is.na(markers2$gene),]
ctypes2 = unique(markers2$cell_type)

# get rank of markers
markers = get_top_markers(markers, ctypes)
markers2 = get_top_markers(markers2, ctypes2)

# tibble of group, cell_type, gene, rank
top_markers = as_tibble(markers[which(markers$rank<=100),c('group', 'cell_type', 'gene')])
top_markers2 = as_tibble(markers2[which(markers2$rank<=100),c('group', 'cell_type', 'gene')])
top_markers[1:2,]
top_markers2[1:2,]

group,cell_type,gene
<chr>,<chr>,<chr>
all,Cardiomyocytes,TTN
all,Cardiomyocytes,TNNT2


group,cell_type,gene
<chr>,<chr>,<chr>
Fibroblasts,Fibroblast-like,SFRP1
Fibroblasts,Fibroblast-like,CLU


In [6]:
# get list of samples
stages = c(rep('stage23', 2), rep(c('stage28', 'stage32'), each = 3))
samples = c('sample3', 'sample10', 'sample5', 'sample10', 'sample11',
           'sample5', 'sample10', 'sample12')

In [7]:
# load data
pb = txtProgressBar(min = 0, max = length(samples), initial = 0)

for(id1 in 1:length(samples)){
    currstage1 = stages[id1]
    currsmp1 = samples[id1]
    sc1 = readRDS(paste0(currstage1, '_', currsmp1, '_data.rds'))
    
    # get SCE object
    sc3 = SingleCellExperiment(list(counts = LayerData(sc1, assay = 'RNA', layer = 'counts')))
    colData(sc3) <- DataFrame(sc1@meta.data)
    assay(sc3, "cpm") = convert_to_cpm(assay(sc3))

    # predict cell type class
    ct_scores = score_cells(log1p(cpm(sc3)), top_markers)
    ct_enrichment = compute_marker_enrichment(ct_scores)
    ct_pred = assign_cells(ct_scores)
    
    # ct_pred[1:3,]
    # table(ct_pred$predicted)

    # get cell subclass labels
    sub_scores = score_cells(log1p(cpm(sc3)), top_markers2)
    sub_enrichment = compute_marker_enrichment(sub_scores, by_group = TRUE)
    sub_pred = assign_cells(sub_scores, group_assignment = ct_pred$predicted)
    
    # sub_pred[1:3,]
    # table(sub_pred$predicted)

    # <20 cells of a type - make assignment NA
    rm_ids = names(which(table(sub_pred$predicted)<20))
    sub_pred$predicted[which(sub_pred$predicted %in% rm_ids)] <- NA

    # make combined prediction df
    newdf = data.frame(barcode = rownames(ct_pred), class = ct_pred$predicted, class_score = ct_pred$score,
                       class_enrichment = ct_pred$enrichment, celltype = sub_pred$predicted,
                      celltype_score = sub_pred$score, celltype_enrichment = sub_pred$enrichment)
    newdf$final_celltype = newdf$celltype
    
    for(ii in 1:dim(newdf)[1]){
       if(is.na(newdf$celltype[ii])){
           newdf$final_celltype[ii] = newdf$class[ii]
       }
    }
    # newdf[1,]

    # save
    write.table(newdf, file = paste0('annotations/', currstage1, '_', currsmp1, '_Knight-Schrijver_celltypes.csv'),
                sep = ',', row.names = F, col.names = T, quote = F)

    setTxtProgressBar(pb, id1)

}

“Some group assignments (Smooth_Muscle_Cells, Endothelial_Other, Pericytes, Endothelial_Venous, Endothelial_Capillaries, Endocardial, Epicardium_Meso, Immature_Cardiomyocytes, Endothelial_Arterial, Epicardium_FB-like) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Smooth_Muscle_Cells, Endocardial, Endothelial_Capillaries, Immature_Cardiomyocytes, Pericytes, Endothelial_Venous, Endothelial_Arterial, Epicardium_Meso) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Smooth_Muscle_Cells, Pericytes, Endothelial_Other, Endothelial_Arterial, Endocardial, Endothelial_Capillaries, Endothelial_Venous, Immature_Cardiomyocytes, Epicardium_FB-like, Epicardium_Meso) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”


