In [None]:
# get markers?

In [4]:
library(SingleCellExperiment)
library(dplyr)
library(MetaMarkers)

In [5]:
mtd = data.table::fread('GSE157329_cell_annotate.txt.gz')
dim(mtd)
mtd[1:2,]

cell_id,cluster_id,developmental system,annotation,final_annotation,embryo,sample,stage,dissection_part,total_UMIs,barcode
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
h0_1,neural progenitor-23,neural progenitor,pB3,pB3,emb2,h0,CS13-14,head,5927,AAACCTGAGACAATAC-1
h0_2,head mesoderm-6,head mesoderm,undefined (CYP26C1),undefined (CYP26C1),emb2,h0,CS13-14,head,8572,AAACCTGAGATCGGGT-1


In [18]:
sort(unique(mtd$`developmental system`))

In [37]:
ctype_list = c('arterial endothelium', 'vascular endothelium-1', 'vascular endothelium-2', 'endocardium-1', 'endocardium-2',
               'endocardial derived cell', 'Second heart field (SHF)', 'cardiomyocyte-1', 'epicardium', 'pericyte (myocardium)',
               'epicardial derived cell-1', 'atrioventricular canal', 'ventricle cardiomyocyte-1', 'sinoatrial node (SAN)',
               'atria cardiomyocyte-1', 'atria cardiomyocyte-2', 'ventricle cardiomyocyte-2', 'epicardial derived cell-2',
               'Schwann progenitor-2', 'Schwann progenitor-1', 'Schwann progenitor-3', 'Schwann progenitor like',
               'endothelium like', 'splanchnic LPM like', 'cardiomyocyte like', 'fibroblast-1', 'fibroblast-9', 'fibroblast-2',
               'fibroblast-5', 'fibroblast-4', 'fibroblast-8', 'fibroblast-7', 'fibroblast-6', 'fibroblast-3', 'fibroblast-10',
               'fibroblast-11', 'epithelium-1', 'epithelium-4', 'epithelium-2', 'epithelium-5', 'epithelium-3',
               'macrophage-1', 'neutrophil', 'macrophage-2', 'dendritic cell', 'macrophage-3', 'kupffer cell-1', 'megakaryocyte', 
               'lymphocyte', 'eosino/basophil/mast cell progenitor', 'primary neutrophil granules', 
               'hematopoietic stem and progenitor cell', 'kupffer cell-2', 'erythroid')

ids = which(mtd$final_annotation %in% ctype_list)

In [11]:
expression_matrix <- ReadMtx(
  mtx = "GSE157329_raw_counts.mtx.gz", features = "GSE157329_gene_annotate.txt.gz",
  cells = "GSE157329_cell_annotate.txt.gz", skip.cell = 1, skip.feature = 1
)
expression_matrix[1:3,]

  [[ suppressing 34 column names ‘h0_1’, ‘h0_2’, ‘h0_3’ ... ]]



3 x 185140 sparse Matrix of class "dgCMatrix"
                                                                               
MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
FAM138A     . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
OR4F5       . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
                  
MIR1302-2HG ......
FAM138A     ......
OR4F5       ......

 .....suppressing 185106 columns in show(); maybe adjust options(max.print=, width=)
 ..............................

In [38]:
# get SCE object
sce2 = SingleCellExperiment(list(counts = expression_matrix[,ids]), colData = DataFrame(mtd[ids,]))
assay(sce2, "cpm") = convert_to_cpm(assay(sce2))

In [39]:
# get cell type markers on whole data
markers = compute_markers(assay(sce2, "cpm"), cell_type_labels = sce2$final_annotation)
head(markers)

group,cell_type,gene,fold_change,auroc,log_fdr,population_size,population_fraction,average_expression,se_expression,detection_rate,fold_change_detection,precision,recall
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
all,Schwann progenitor like,HMGB1,1.674851,0.7777704,-476.4499,1080,0.03960541,3841.4416,46.71196,1.0,1.007455,0.0398892,1.0
all,Schwann progenitor like,H3F3A,1.48793,0.7672153,-440.7454,1080,0.03960541,4059.9521,37.36129,1.0,1.003061,0.03972195,1.0
all,Schwann progenitor like,TMSB15A,2.275658,0.7641205,-430.5543,1080,0.03960541,622.735,12.49178,0.9046296,1.448913,0.05640878,0.9046296
all,Schwann progenitor like,RPL6,1.416226,0.7602451,-417.9588,1080,0.03960541,4950.2245,41.22501,1.0,1.001605,0.03966651,1.0
all,Schwann progenitor like,NASP,2.047293,0.7377637,-348.5159,1080,0.03960541,826.7276,17.77355,0.9101852,1.164323,0.04582323,0.9101852
all,Schwann progenitor like,PEG10,3.829771,0.7359961,-343.3254,1080,0.03960541,579.0394,21.47235,0.7435185,1.799857,0.06916451,0.7435185


In [40]:
head(markers[markers$cell_type=='cardiomyocyte-1',])

group,cell_type,gene,fold_change,auroc,log_fdr,population_size,population_fraction,average_expression,se_expression,detection_rate,fold_change_detection,precision,recall
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
all,cardiomyocyte-1,NKX2-5,5.462101,0.7446793,-126.32236,363,0.01331182,207.4378,12.7829,0.5757576,6.694939,0.0836,0.5757576
all,cardiomyocyte-1,MDK,1.643578,0.7351701,-116.56715,363,0.01331182,3218.8731,76.21605,0.9972452,1.061406,0.01411856,0.9972452
all,cardiomyocyte-1,TPM1,1.472913,0.7076667,-90.54889,363,0.01331182,2031.6147,116.30607,0.9146006,1.142796,0.01518617,0.9146006
all,cardiomyocyte-1,H3F3A,1.353956,0.6931772,-78.14347,363,0.01331182,3748.0476,74.33465,0.9972452,1.000182,0.01331421,0.9972452
all,cardiomyocyte-1,RPS27,1.222505,0.6754472,-64.19969,363,0.01331182,8415.8697,129.59121,1.0,1.0,0.01331182,1.0
all,cardiomyocyte-1,BEX3,1.431141,0.6753624,-64.13647,363,0.01331182,1166.477,31.93729,0.9586777,1.033633,0.01375385,0.9586777


In [41]:
# save marker list
export_meta_markers(markers, "Xu_celltype_markers.csv", names(markers))