In [None]:
# label transfer with Symphony?

In [2]:
library(symphony)
library(Seurat)
library(dplyr)
library(ggplot2)
library(SingleCellExperiment)

In [3]:
# list of stages and samples
stages = rep(c('stage9', 'stage13', 'stage16'), each = 3)
samples = paste0('sample', rep(1:3, 3))

In [4]:
# load list of orthologs
om = read.delim('lizard_mouse_orthologs_eggNOG.txt', sep = '\t')
om <- om[!is.na(om$lizard_gene) & !is.na(om$ortholog_name),]
dim(om)
om[1,]

Unnamed: 0_level_0,query,orth_type,species,orthologs,lizard_gene,ortholog_gene,ortholog_name
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,XP_060608688.1,one2one,Mus musculus(10090),ENSMUSP00000081225,ARHGEF10,ENSMUSG00000071176,Arhgef10


In [27]:
# load reference
heart.ref = readRDS('~/septation/markers/Qiu_E11.5_harmonyObj.rds')

In [38]:
# load data
sampleid = 9
file0 = paste0(stages[sampleid], '_', samples[sampleid], '_data.rds')
sce = readRDS(file0)

In [39]:
# subset to mouse 1-1 orthologs
genelist <- om$ortholog_name[match(rownames(sce), om$lizard_gene)]
genelist[1:10]

exp_mat2 = sce@assays$RNA@counts
mtd2 = sce@meta.data
rownames(exp_mat2) = genelist
exp_mat2 <- exp_mat2[!is.na(rownames(exp_mat2)) & rownames(exp_mat2)!='',]
exp_mat2 <- exp_mat2[!(duplicated(rownames(exp_mat2))),]

In [40]:
# 1k-3.5k HVGs for cross-species mapping
dim(exp_mat2)
heart.query = mapQuery(exp_mat2, mtd2, heart.ref, vars = NULL, do_normalize = T, do_umap = F)

Normalizing

Scaling and synchronizing query gene expression

Found 3225 out of 10000 reference variable genes in query dataset

Project query cells using reference gene loadings

Clustering query cells to reference centroids

Correcting query batch effects

All done!



In [41]:
heart.query = knnPredict(heart.query, heart.ref, heart.ref$meta_data$major_trajectory, k = 5)

# add predictions to data.frame to save
newdf = data.frame(barcode = rownames(heart.query$meta_data), class = heart.query$meta_data$cell_type_pred_knn, 
                   class_prob = heart.query$meta_data$cell_type_pred_knn_prob)

heart.query = knnPredict(heart.query, heart.ref, heart.ref$meta_data$celltype_update, k = 5)
newdf$celltype = heart.query$meta_data$cell_type_pred_knn
newdf$celltype_prob = heart.query$meta_data$cell_type_pred_knn_prob
newdf[1:3,]

Unnamed: 0_level_0,barcode,class,class_prob,celltype,celltype_prob
Unnamed: 0_level_1,<chr>,<fct>,<dbl>,<fct>,<dbl>
1,stage16_sample3_AAACCCAAGCAAATCA-1,Definitive_erythroid,0.8,Definitive early erythroblasts (CD36-),0.8
2,stage16_sample3_AAACCCAAGCGTGAAC-1,Mesoderm,1.0,Facial mesenchyme,0.6
3,stage16_sample3_AAACCCAAGGAGACCT-1,Definitive_erythroid,1.0,Definitive early erythroblasts (CD36-),1.0


In [42]:
# save
write.table(newdf, file = paste0(stages[sampleid], '_', samples[sampleid], '_Qiu_Symphony_label_transfer.csv'), 
            sep = ',', row.names = F, col.names = T, quote = F)