In [None]:
# ..... multiple CHD snRNA-seq data ..... #
# pseudobulk per condition

In [17]:
library(Seurat)
library(SingleCellExperiment)
library(MetaMarkers)

In [19]:
# get pseudo-bulk for CPM data
get_cpm_pseudo_bulk <- function(scdata){      
        
    new_labels = scdata$MainCellType
    nbds = unique(new_labels)
    nbds <- nbds[which(!is.na(nbds) & nbds!='unassigned')]
    sc_sub2 = matrix(NA, nrow = dim(scdata)[1], ncol = length(nbds))
    
    for (i in 1:length(nbds)){
        cols = which(new_labels==nbds[i])
    
        if(length(cols)>1){
            sc_sub2[,i] = rowSums(cpm(scdata)[,cols])/length(cols)
        }else if(length(cols)==1){            
            sc_sub2[,i] = cpm(scdata)[,cols]
        }else{
            sc_sub2[,i] = NA
        }
    }    
    rownames(sc_sub2) = rownames(scdata)
    colnames(sc_sub2) = nbds
    return(sc_sub2)
}

In [13]:
# expression data
expression_matrix <- ReadMtx(mtx = "GSE203274_expression_matrix.mtx.gz", 
                             features = "GSE203274_genes.tsv.gz", cells = "GSE203274_barcodes.tsv.gz",
                            feature.column = 1)
expression_matrix[1:2,]

  [[ suppressing 34 column names ‘P8_1_AAGCCATGTCGGCTAC-1’, ‘P40_2_GTTGTGAAGCGCCATC-1’, ‘P26_1_AGTGCCGAGATAACGT-1’ ... ]]



2 x 157293 sparse Matrix of class "dgCMatrix"
                                                                              
RP11-34P13.7 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
AL627309.1   1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
                     
RP11-34P13.7 . ......
AL627309.1   . ......

 .....suppressing 157259 columns in show(); maybe adjust 'options(max.print= *, width = *)'
 ..............................

In [1]:
# annotation from study
mtd = read.delim('GSE203274_AllNuclei_snRNA_metadata.csv', sep = ',')
mtd[1,]
dim(mtd)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,labID,procedure,age,gender,echoEF,vers10X,diagnosis,⋯,region,batch_indices,percent.mt,ClinicalRank,DEid,MainCellType,Cluster,labID2,Diagnosis,patientID
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
P8_1_AAGCCATGTCGGCTAC-1,P8_1,24524,7957,P8,Redo_Sternomoty_Norwood,0y_2m_3d,M,P8_1,v3,HLHS,⋯,RV,0,0.07339749,Neo_HLHS,CHD,CM,CM,P8,Neo_HLHS,P8


In [8]:
table(mtd$Cluster)


  Adipo      CF      CM    Endo   ENDOC    EpiC    EpiL     LEC     Mac    Mast 
    454   21034   73296   35673    1436      41      51     658    7010      92 
Neurons   PeriC     SMC  Tcells 
   1985   12037    2405    1121 

In [5]:
table(mtd$ClinicalRank)


     DCM    Donor      HCM  HF_HLHS Neo_HLHS      TOF 
   27976    54260    21850    30376     6995    15836 

In [18]:
# make SCE object
sce2 = SingleCellExperiment(list(counts = expression_matrix), colData = DataFrame(mtd))
assay(sce2, "cpm") = convert_to_cpm(assay(sce2))
sce2

class: SingleCellExperiment 
dim: 29266 157293 
metadata(0):
assays(2): counts cpm
rownames(29266): RP11-34P13.7 AL627309.1 ... RP11-352D3.2 BABAM1
rowData names(0):
colnames(157293): P8_1_AAGCCATGTCGGCTAC-1 P40_2_GTTGTGAAGCGCCATC-1 ...
  RV_198_1_GTCGTAACACAAGCCC-1 RV_198_1_ATCGTGAAGGTCATAA-1
colData names(21): orig.ident nCount_RNA ... Diagnosis patientID
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):

In [35]:
# create pseudo-bulk matrices for all genes 
cond = 'Neo_HLHS'
sce3 = sce2[,sce2$ClinicalRank==cond]
scbulk = get_cpm_pseudo_bulk(sce3)
scbulk[c('TBX5','COL3A1'),]

Unnamed: 0,CM,Neurons,Mac,Tcells,Mast,Endo,ENDOC,LEC,PeriC,SMC,EpiL,CF
TBX5,68.362433,8.721437,0.0,0,0,0.8042804,0.0,0.0,7.18254,2.655619,0.0,7.415878
COL3A1,1.548932,5.203455,7.585327,0,0,6.1575446,128.2879,51.92216,138.9104,33.921281,137.855,291.457927


In [36]:
# save
save(scbulk, file = paste0('CHD_', cond, '_pseudobulk_expression_matrices.Rdata'))