In [None]:
# ..... annotate cell clusters ..... #

In [2]:
library(Seurat)
library(SingleCellExperiment)
library(MetaMarkers)
library(data.table)
library(dplyr)

In [3]:
# load list of orthologs
om = read.delim('lizard_human_orthologs_eggNOG.txt', sep = '\t')
om <- om[!is.na(om$lizard_gene) & !is.na(om$ortholog_name),]
dim(om)
om[1,]

Unnamed: 0_level_0,query,orth_type,species,orthologs,lizard_gene,ortholog_gene,ortholog_name
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,XP_060608688.1,one2one,Homo sapiens(9606),ENSP00000340297,ARHGEF10,ENSG00000104728,ARHGEF10


In [4]:
# function to get top 100 markers for each cell type
get_top_markers <- function(markers, ctypes){
    markers$rank = NA
    for(ii in 1:length(ctypes)){
        id = which(markers$cell_type==ctypes[ii])
        markers$rank[id] = 1:length(id)
    }
    return(markers)
}

In [5]:
# load class markers
markers = fread('~/septation/markers/Knight-Schrijver/Knight-Schrijver_celltype_markers_F1_F2_res2.csv.gz')
markers$gene <- om$lizard_gene[match(markers$gene, om$ortholog_name)]
markers <- markers[!is.na(markers$gene),]
ctypes = unique(markers$cell_type)

# subclass markers stratified by class
markers2 = fread('~/septation/markers/Knight-Schrijver/Knight-Schrijver_celltype_markers_F1_F2_res3.csv.gz')
markers2$gene <- om$lizard_gene[match(markers2$gene, om$ortholog_name)]
markers2 <- markers2[!is.na(markers2$gene),]
ctypes2 = unique(markers2$cell_type)

# get rank of markers
markers = get_top_markers(markers, ctypes)
markers2 = get_top_markers(markers2, ctypes2)

# tibble of group, cell_type, gene, rank
top_markers = as_tibble(markers[which(markers$rank<=100),c('group', 'cell_type', 'gene')])
top_markers2 = as_tibble(markers2[which(markers2$rank<=100),c('group', 'cell_type', 'gene')])
top_markers <- top_markers[!(top_markers$cell_type %in% c('Pericytes_Stromal', 'Adipocytes')),]

top_markers[1:2,]
top_markers2[1:2,]

group,cell_type,gene
<chr>,<chr>,<chr>
all,Cardiomyocytes,LOC132767889
all,Cardiomyocytes,TNNT2


group,cell_type,gene
<chr>,<chr>,<chr>
Fibroblasts,Fibroblast-like,SFRP1
Fibroblasts,Fibroblast-like,CLU


In [6]:
# list of stages and samples
stages = rep(c('stage9', 'stage13', 'stage16'), each = 3)
samples = paste0('sample', rep(1:3, 3))

In [7]:
# load data
pb = txtProgressBar(min = 0, max = length(samples), initial = 0)

for(id in 1:length(samples)){
    file0 = paste0(stages[id], '_', samples[id], '_data.rds')
    sce = readRDS(file0)
    
    # get SCE object
    sc3 = SingleCellExperiment(list(counts = LayerData(sce, assay = 'RNA', layer = 'counts')))
    colData(sc3) <- DataFrame(sce@meta.data)
    assay(sc3, "cpm") = convert_to_cpm(assay(sc3))
    
    # predict cell type class
    ct_scores = score_cells(log1p(cpm(sc3)), top_markers)
    ct_enrichment = compute_marker_enrichment(ct_scores)
    ct_pred = assign_cells(ct_scores)
    
    # ct_pred[1:3,]
    # table(ct_pred$predicted)
    
    # get cell subclass labels
    sub_scores = score_cells(log1p(cpm(sc3)), top_markers2)
    sub_enrichment = compute_marker_enrichment(sub_scores, by_group = TRUE)
    sub_pred = assign_cells(sub_scores, group_assignment = ct_pred$predicted)
    
    # sub_pred[1:3,]
    # table(sub_pred$predicted)
    
    # <20 cells of a type - make assignment NA
    rm_ids = names(which(table(sub_pred$predicted)<20))
    sub_pred$predicted[which(sub_pred$predicted %in% rm_ids)] <- NA
    
    # make combined prediction df
    newdf = data.frame(barcode = rownames(ct_pred), class = ct_pred$predicted, class_score = ct_pred$score,
                       class_enrichment = ct_pred$enrichment, celltype = sub_pred$predicted,
                      celltype_score = sub_pred$score, celltype_enrichment = sub_pred$enrichment)
    newdf$final_celltype = newdf$celltype
    
    for(ii in 1:dim(newdf)[1]){
       if(is.na(newdf$celltype[ii])){
           newdf$final_celltype[ii] = newdf$class[ii]
       }
    }
    # newdf[1,]
    
    # save
    write.table(newdf, file = paste0('annotations/', stages[id], '_', samples[id], '_Knight-Schrijver_celltypes.csv'),
                sep = ',', row.names = F, col.names = T, quote = F)
    
    setTxtProgressBar(pb, id)
}

“Some group assignments (Endothelial_Other, Immature_Cardiomyocytes, Endothelial_Venous, Endocardial, Smooth_Muscle_Cells, Epicardium_Meso, unassigned, Endothelial_Capillaries, Endothelial_Arterial, Epicardium_FB-like) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Smooth_Muscle_Cells, Immature_Cardiomyocytes, Epicardium_FB-like, Endocardial, unassigned, Epicardium_Meso, Endothelial_Venous, Endothelial_Capillaries, Endothelial_Arterial, Pericytes) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Immature_Cardiomyocytes, Smooth_Muscle_Cells, Endothelial_Venous, Endocardial, Endothelial_Capillaries, Epicardium_Meso, Pericytes, Epicardium_FB-like, Endothelial_Arterial, unassigned) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Endocardial, Immature_Cardiomyocytes, unassigned, Smooth_Muscle_Cells, Pericytes, Epicardium_FB-like, Epicardium_Proliferating, Endothelial_Arterial, Endothelial_Venous, Endothelial_Capillaries) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Immature_Cardiomyocytes, Endocardial, Smooth_Muscle_Cells, Endothelial_Venous, Pericytes, Endothelial_Arterial, unassigned, Endothelial_Capillaries) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endocardial, Endothelial_Other, Smooth_Muscle_Cells, Pericytes, Immature_Cardiomyocytes, Endothelial_Venous, Endothelial_Arterial, Endothelial_Capillaries, Epicardium_Meso, Epicardium_FB-like, unassigned) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Endothelial_Capillaries, Endocardial, Smooth_Muscle_Cells, Immature_Cardiomyocytes, Epicardium_Meso, Pericytes, Endothelial_Venous, unassigned, Epicardium_FB-like, Endothelial_Arterial, Epicardium_Proliferating) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Immature_Cardiomyocytes, Endocardial, Endothelial_Venous, Smooth_Muscle_Cells, Epicardium_Meso, Endothelial_Capillaries, Pericytes, Endothelial_Arterial, Epicardium_FB-like, unassigned) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”




“Some group assignments (Endothelial_Other, Immature_Cardiomyocytes, Endocardial, Smooth_Muscle_Cells, Endothelial_Capillaries, Endothelial_Venous, Pericytes, Endothelial_Arterial, unassigned, Epicardium_FB-like, Epicardium_Meso) do not match groups in the score matrix (Cardiomyocytes, Fibroblasts, Immature_other, Lymphoid_Immune_Cells, Myeloid_Immune_Cells, Neuronal_Cells) and will result in NA predictions.”


