--- title: "Suryawanshi et al. (2018)" author: "Young-June Kim" date: "2022-11-09" output: html_document --- ```{r} # Load required libraries devtools::install_github("cellgeni/sceasy") library(sceasy) library(SingleCellExperiment) library(Seurat) # remotes::install_github("mojaveazure/seurat-disk") library(SeuratDisk) library(tidyverse) library(RColorBrewer) library(scDblFinder) library(cowplot) ``` First, we download the loom file which was documented in the human cell atlas. Since loading a loom file requires extreme amount of RAM, we will not start with the loom file locally. ```{r eval = FALSE} # loom_obj <- Connect(filename = "first-trimester-decidua-human-placenta-10XV2.loom", mode = 'r') # # seurat_loom <- as.Seurat(loom_obj) # saveRDS(seurat_loom, "seuratObj.rds") ``` After getting the .rds file we can try loading the file into R locally.. Didn't crash:) ```{r} # Load the RDS file sSeurat <- readRDS("seuratObj.rds","rb") dim(sSeurat) # Proceed to the QC step. First, we check the columns in the metadata of the Seurat object. sSeurat@meta.data %>% colnames() # There are so many column names, but we should check the percentage of mitochondrial genes for each cell. sSeurat$percent.mt <- PercentageFeatureSet(sSeurat, pattern = "^MT-") # nCount_RNA is the number of UMI counts in a cell hist(sSeurat$nCount_RNA) summary(sSeurat$nCount_RNA) # nFeature_RNA is the number of different genes that had any reads hist(sSeurat$nFeature_RNA) summary(sSeurat$nFeature_RNA) # percent.mt is the percent mitochondrial reads hist(sSeurat$percent.mt) summary(sSeurat$percent.mt) # Make a violin plot of the QC columns plt <- VlnPlot(sSeurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, raster = FALSE) plt ``` Since most of the literature follows a 5% threshold for percent.mt, we also apply this to our study. ```{r} plot1 <- FeatureScatter(sSeurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") plot2 <- FeatureScatter(sSeurat, feature1 = "nCount_RNA", feature2 = "percent.mt") plot3 <- FeatureScatter(sSeurat, feature1 = "nFeature_RNA", feature2 = "percent.mt") plot1 plot2 plot3 sSeurat <- subset(sSeurat, subset = percent.mt < 25) # Check density plot for a more stringent threshold sSeurat@meta.data %>% ggplot(aes(x = percent.mt)) + geom_density() # More stringent threshold sSeurat <- subset(sSeurat, subset = percent.mt < 10 & nFeature_RNA < 5000) dim(sSeurat) ``` ```{r} # Find for doublets sSeurat <- as.SingleCellExperiment(sSeurat) sSeurat <- scDblFinder(sSeurat) # Convert back to Seurat object. sSeurat <- as.Seurat(sSeurat) # Notice that our identifiers have changed. levels(Idents(object = sSeurat)) # We want to specify the identifier whether the cell is a singlet or doublet. table(sSeurat@meta.data$scDblFinder.class) Idents(object = sSeurat) <- "scDblFinder.class" sSeurat@meta.data$scDblFinder.class <- as.character(sSeurat@meta.data$scDblFinder.class) # Subset the Seurat object to only include singlets. sSeurat <- subset(sSeurat, idents = "singlet") levels(as.factor(sSeurat@meta.data$scDblFinder.class)) # check dimensions of sSeurat dim(sSeurat) ``` ```{r} # Next, we normalize the data sSeurat <- NormalizeData(sSeurat) # Explore the most variable features, let's use the default version first. sSeurat <- FindVariableFeatures(sSeurat) # sSeurat <- FindVariableFeatures(sSeurat, selection.method = "vst", nfeatures = 2000) # Identify the 10 most highly variable genes top10 <- head(VariableFeatures(sSeurat), 10) top10 # plot variable features with and without labels plot2 <- VariableFeaturePlot(sSeurat) plot2 <- LabelPoints(plot = plot2, points = top10, repel = TRUE) plot2 # Convert normalized gene expression to Z-score (scale the data) all.genes <- rownames(sSeurat) sSeurat <- ScaleData(sSeurat, features = all.genes) # Run PCA seurat_loom <- RunPCA(seurat_loom) # Plot the PCA DimPlot(seurat_loom, reduction = "pca") # Plot the loadings VizDimLoadings(seurat_loom, dims = 1:2, reduction = "pca") # Choose the number of principle components to keep ElbowPlot(seurat_loom) ```