In [None]:
# label transfer with Symphony?

In [2]:
library(symphony)
library(Seurat)
library(dplyr)
library(ggplot2)
library(SingleCellExperiment)

In [3]:
# list of stages and samples
stages = rep(c('stage14', 'stage17', 'stage21'), each = 3)
samples = paste0('sample', c('4', '7', '8A', '3_Triangle', '5B', '5Y', '1A', '1B', '2'))

In [4]:
# load list of orthologs
om = read.delim('mouse_turtle_orthologs_biomart.txt', sep = '\t')
colnames(om) <- c('mouse', 'turtle', 'LCA', 'homology_type', 'orthology_confidence')
om <- om[om[,1]!='' & om[,2]!='' & om$homology_type=='ortholog_one2one',]
dim(om)
om[1,]

Unnamed: 0_level_0,mouse,turtle,LCA,homology_type,orthology_confidence
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<int>
11,mt-Nd2,ND2,Vertebrata,ortholog_one2one,1


In [21]:
# load reference
heart.ref = readRDS('~/septation/markers/Qiu_E11.5_harmonyObj.rds')

In [32]:
# load data
sampleid = 9
file0 = paste0(stages[sampleid], '_', samples[sampleid], '_data.rds')
sce = readRDS(file0)

In [33]:
# subset to mouse 1-1 orthologs
genelist <- om$mouse[match(rownames(sce), om$turtle)]
genelist[1:10]

exp_mat2 = sce@assays$RNA@counts
mtd2 = sce@meta.data
rownames(exp_mat2) = genelist
exp_mat2 <- exp_mat2[!is.na(rownames(exp_mat2)),]

In [34]:
# 1.5k-3k HVGs for cross-species-mapping
dim(exp_mat2)
heart.query = mapQuery(exp_mat2, mtd2, heart.ref, vars = NULL, do_normalize = T, do_umap = F)

Normalizing

Scaling and synchronizing query gene expression

Found 2859 out of 10000 reference variable genes in query dataset

Project query cells using reference gene loadings

Clustering query cells to reference centroids

Correcting query batch effects

All done!



In [35]:
heart.query = knnPredict(heart.query, heart.ref, heart.ref$meta_data$major_trajectory, k = 5)

# add predictions to data.frame to save
newdf = data.frame(barcode = rownames(heart.query$meta_data), class = heart.query$meta_data$cell_type_pred_knn, 
                   class_prob = heart.query$meta_data$cell_type_pred_knn_prob)

heart.query = knnPredict(heart.query, heart.ref, heart.ref$meta_data$celltype_update, k = 5)
newdf$celltype = heart.query$meta_data$cell_type_pred_knn
newdf$celltype_prob = heart.query$meta_data$cell_type_pred_knn_prob
newdf[1:3,]

Unnamed: 0_level_0,barcode,class,class_prob,celltype,celltype_prob
Unnamed: 0_level_1,<chr>,<fct>,<dbl>,<fct>,<dbl>
1,stage21_sample2_AAACCCAAGCAAATGT-1,Endothelium,1,Endothelium,1.0
2,stage21_sample2_AAACCCAAGCCGTCGT-1,Definitive_erythroid,1,Definitive early erythroblasts (CD36-),1.0
3,stage21_sample2_AAACCCAAGTCACAGG-1,Mesoderm,1,Facial mesenchyme,0.6


In [36]:
# save
write.table(newdf, file = paste0(stages[sampleid], '_', samples[sampleid], '_Qiu_Symphony_label_transfer.csv'), 
            sep = ',', row.names = F, col.names = T, quote = F)