--- title: "Meta-analysis Testing" output: html_document --- ```{r} # Load required libraries library(SingleCellExperiment) library(Seurat) library(tidyverse) library(MetaNeighbor) # Load the Seurat objects hSeurat <- readRDS("hSeurat_annotated.rds") vtSeurat <- readRDS("vtSeurat_annotated.rds") # Convert to SCE objects hSCE <- as.SingleCellExperiment(hSeurat) vtSCE <- as.SingleCellExperiment(vtSeurat) # Create my_data mydata <- list(hSCE = hSCE, vtSCE = vtSCE) # Check whether the gene information align between the two datasets lapply(mydata, function(x) head(rownames(x), 3)) # cell-type information is labeled identically in all datasets lapply(mydata, function(x) colnames(colData(x))) # add the cell type information onto a new column name called "cell type" mydata$hSCE$"cell type" <- mydata$hSCE$ident mydata$vtSCE$"cell type" <- mydata$vtSCE$ident # Check that count matrices stored in the assays slot, have identical names lapply(mydata, function(x) names(assays(x))) # Create a merged dataset by using mergeSCE, which takes a list of SCE objects # as an input and outputs a single SCE object fused_data <- mergeSCE(mydata) dim(fused_data) dim(hSCE) dim(vtSCE) head(colData(fused_data)) # To obtain a cursory overview of cell-type composition by study, we cross-tabulate # cell-type annotations by study IDs table(fused_data$"cell type", fused_data$study_id) # It is okay to have identical cell type names (e.g., ductal/Ductal) at this stage. # To avoid having to recreate the merged object, save the R object to a file # by using R's RDS format. saveRDS(fused_data, "merged_2_datasets.rds") ``` : Hierarchical cell-type replicability analysis ```{r, fig.height = 8, fig.width = 10} # Load required libraries two_datasets <- readRDS("merged_2_datasets.rds") # Picking the highly variable genes global_hvgs <- variableGenes(dat = two_datasets, exp_labels = two_datasets$study_id) length(global_hvgs) # MetaNeighborUS returns a cell-type-by-cell-type matrix containing cell-type similarities. # This fast version should always be used on large datasets but can also be run on smaller datasets. aurocs <- MetaNeighborUS(var_genes = global_hvgs, dat = two_datasets, study_id = two_datasets$study_id, cell_type = two_datasets$"cell type", fast_version = TRUE) # Apparently, we can use this line of code as well. MetaNeighbor::ggPlotHeatmap(aurocs, label_size = 10) # To identify pairs of replicable cell types, run the following function top_hits <- topHits(aurocs, dat = two_datasets, study_id = two_datasets$study_id, cell_type = two_datasets$"cell type", threshold = 0.9) top_hits # When there is a clear structure in the data, we can refine AUROCs by # splitting the data. level1_split <- splitClusters(aurocs, k = 2) level1_split first_split <- level1_split[[1]] first_split # Repeat the MetaNeighbor analysis on immune cells only full_labels <- makeClusterName(two_datasets$study_id, two_datasets$`cell type`) subdata <- two_datasets[, full_labels %in% first_split] dim(subdata) # To focus on variability specific to endocrine cells, re-pick highly variable genes var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id) # recompute cell-type similarities and visualize AUROCs aurocs <- MetaNeighborUS(var_genes = var_genes, dat = subdata, fast_version = TRUE, study_id = subdata$study_id, cell_type = subdata$"cell type") plotHeatmap(aurocs, cex = 0.7) # Continue to zoom in as long as there are at least two cell types per dataset. # Repeat steps 13-16 level2_split <- splitClusters(aurocs, k = 3) level2_split my_split <- level2_split[[1]] my_split subdata <- two_datasets[, full_labels %in% my_split] var_genes <- variableGenes(dat = subdata, exp_labels = subdata$study_id) length(var_genes) aurocs <- MetaNeighborUS(var_genes = var_genes, dat = subdata, fast_version = TRUE, study_id = subdata$study_id, cell_type = subdata$"cell type") plotHeatmap(aurocs, cex = 1) ```