In [None]:
# label transfer with Symphony?

In [2]:
library(symphony)
library(Seurat)
library(dplyr)
library(ggplot2)
library(SingleCellExperiment)

In [3]:
# get list of samples
stages = c(rep('stage23', 2), rep(c('stage28', 'stage32'), each = 3))
samples = c('sample3', 'sample10', 'sample5', 'sample10', 'sample11',
           'sample5', 'sample10', 'sample12')

In [4]:
# load list of orthologs
om = read.delim('mouse_chicken_orthologs_biomart.txt', sep = '\t')
colnames(om) <- c('mouse', 'chicken', 'LCA', 'homology_type')
om <- om[om[,1]!='' & om[,2]!='' & om$homology_type=='ortholog_one2one',]
dim(om)
om[1,]

Unnamed: 0_level_0,mouse,chicken,LCA,homology_type
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
6,mt-Nd1,ND1,Amniota,ortholog_one2one


In [5]:
# load reference
heart.ref = readRDS('~/septation/markers/Qiu_E11.5_harmonyObj.rds')

In [11]:
# load data
sampleid = 8
file0 = paste0(stages[sampleid], '_', samples[sampleid], '_data.rds')
sce = readRDS(file0)

In [12]:
# subset to mouse 1-1 orthologs
genelist <- om$mouse[match(rownames(sce), om$chicken)]
genelist[1:10]

exp_mat2 = sce@assays$RNA@data
mtd2 = sce@meta.data
rownames(exp_mat2) = genelist
exp_mat2 <- exp_mat2[!is.na(rownames(exp_mat2)),]

In [13]:
# getting 1.5k - 3k HVGs for cross-species mapping
dim(exp_mat2)
heart.query = mapQuery(exp_mat2, mtd2, heart.ref, vars = NULL, do_normalize = T, do_umap = F)

Normalizing

Scaling and synchronizing query gene expression

Found 3079 out of 10000 reference variable genes in query dataset

Project query cells using reference gene loadings

Clustering query cells to reference centroids

Correcting query batch effects

All done!



In [14]:
heart.query = knnPredict(heart.query, heart.ref, heart.ref$meta_data$major_trajectory, k = 5)

# add predictions to data.frame to save
newdf = data.frame(barcode = rownames(heart.query$meta_data), class = heart.query$meta_data$cell_type_pred_knn, 
                   class_prob = heart.query$meta_data$cell_type_pred_knn_prob)

heart.query = knnPredict(heart.query, heart.ref, heart.ref$meta_data$celltype_update, k = 5)
newdf$celltype = heart.query$meta_data$cell_type_pred_knn
newdf$celltype_prob = heart.query$meta_data$cell_type_pred_knn_prob
newdf[1:3,]

Unnamed: 0_level_0,barcode,class,class_prob,celltype,celltype_prob
Unnamed: 0_level_1,<chr>,<fct>,<dbl>,<fct>,<dbl>
1,stage32_sample12_AAACCCAAGATCGGTG-1,Primitive_erythroid,0.6,Primitive erythroid cells,0.6
2,stage32_sample12_AAACCCAAGTCATACC-1,Muscle_cells,1.0,Muscle progenitor cells (Prdm1+),0.6
3,stage32_sample12_AAACCCAAGTTCACTG-1,Mesoderm,1.0,Lateral plate and intermediate mesoderm,0.6


In [15]:
# save
write.table(newdf, file = paste0(stages[sampleid], '_', samples[sampleid], '_Qiu_Symphony_label_transfer.csv'), 
            sep = ',', row.names = F, col.names = T, quote = F)