In [None]:
# ..... get pseudobulk matrices for each stage ..... #

In [2]:
library(Seurat)
library(SingleCellExperiment)
library(MetaMarkers)

In [3]:
# load data
get_seurat_obj <- function(currstage1){
    
    sc1 = readRDS(paste0(currstage1, '_integrated_cca.rds'))

    df1 = read.delim(paste0(currstage1, '_Knight-Schrijver_celltypes.csv'), sep = ',')
    ids = match(rownames(sc1@meta.data), df1$barcode)
    sc1$class = df1$class[ids]
    sc1$celltype = df1$final_celltype[ids]
    
    # convert only immature_other into subclass labels
    sc1$celltype2 = sc1$class
    sc1$celltype2[sc1$class=='Immature_other'] <- sc1$celltype[sc1$class=='Immature_other']
#     sc1$cluster = df1$cluster[ids]

    sc1 <- sc1[,!is.na(sc1$celltype) & sc1$celltype!='unassigned']
    
    # remove cell types with <50 cells across all samples combined
    sc1 <- sc1[,!(sc1$celltype2 %in% names(which(table(sc1$celltype2)<50)))]
    return(sc1)
}

# get pseudo-bulk for CPM data
get_cpm_pseudo_bulk <- function(scdata){      
        
    new_labels = scdata$celltype2
    nbds = unique(new_labels)
    nbds <- nbds[which(!is.na(nbds) & nbds!='unassigned')]
    sc_sub2 = matrix(NA, nrow = dim(scdata)[1], ncol = length(nbds))
    
    for (i in 1:length(nbds)){
        cols = which(new_labels==nbds[i])
    
        if(length(cols)>1){
            sc_sub2[,i] = rowSums(cpm(scdata)[,cols])/length(cols)
        }else if(length(cols)==1){            
            sc_sub2[,i] = cpm(scdata)[,cols]
        }else{
            sc_sub2[,i] = NA
        }
    }    
    rownames(sc_sub2) = rownames(scdata)
    colnames(sc_sub2) = nbds
    return(sc_sub2)
}

In [16]:
# load individual sample files
stage1 = 'stage21'
sc1 = get_seurat_obj(stage1)

In [17]:
# make SCE object
sce2 = SingleCellExperiment(list(counts = sc1@assays$RNA@counts), colData = DataFrame(sc1@meta.data))
assay(sce2, "cpm") = convert_to_cpm(assay(sce2))
sce2

class: SingleCellExperiment 
dim: 26230 25470 
metadata(0):
assays(2): counts cpm
rownames(26230): OTX2 LOC101934893 ... LOC101937213 LOC122172870
rowData names(0):
colnames(25470): Stage_21_sample1A_AAACCCAAGATAACAC-1
  Stage_21_sample1A_AAACCCAAGGTCCCGT-1 ...
  Stage_21_sample2_TTTGTTGTCATTGCGA-1
  Stage_21_sample2_TTTGTTGTCATTGTGG-1
colData names(31): orig.ident nCount_RNA ... celltype celltype2
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):

In [18]:
# create pseudo-bulk matrices for all genes
scbulk1 = get_cpm_pseudo_bulk(sce2)
scbulk1[c('TBX5','COL3A1'),]

Unnamed: 0,Smooth_Muscle_Cells,Immature_Endothelium,Immature_FB-like,Fibroblasts,Pericytes_Stromal,Cardiomyocytes,Immature_Neuralcrest,Endothelial_Capillaries,Epicardium_FB-like,Endocardial,Endothelial_Venous,Epicardium_Proliferating,Pericytes,Epicardium_Meso,Endothelial_Arterial,Immature_Cardiomyocytes,Lymphoid_Immune_Cells,Myeloid_Immune_Cells
TBX5,11.97161,4.780456,12.04751,59.31122,10.34886,337.3324,10.5031,10.27571,16.47388,7.179521,9.649388,8.592616,27.33339,44.02892,13.46522,28.20252,13.62577,0.0
COL3A1,1323.47473,347.049293,351.24124,2467.88629,1028.38635,202.9499,436.5065,1250.47412,1320.76189,1277.536034,618.656567,336.058129,1912.19433,461.56198,692.76905,400.26682,291.04398,241.5338


In [19]:
# save
save(scbulk1, file = paste0(stage1, '_pseudobulk_expression_matrices.Rdata'))