---
title: "Suryawanshi et al. (2018)"
author: "Young-June Kim"
date: "2022-11-09"
output: html_document
---

```{r}
# Load required libraries
devtools::install_github("cellgeni/sceasy")
library(sceasy)
library(SingleCellExperiment)
library(Seurat)
# remotes::install_github("mojaveazure/seurat-disk")
library(SeuratDisk)
library(tidyverse)
library(RColorBrewer)
library(scDblFinder)
library(cowplot)
```

First, we download the loom file which was documented in the human cell atlas.
Since loading a loom file requires extreme amount of RAM, we will not start with the loom file locally.

```{r eval = FALSE}
# loom_obj <- Connect(filename = "first-trimester-decidua-human-placenta-10XV2.loom", mode = 'r')
# 
# seurat_loom <- as.Seurat(loom_obj)
# saveRDS(seurat_loom, "seuratObj.rds")
```

After getting the .rds file we can try loading the file into R locally..
Didn't crash:)
```{r}
# Load the RDS file
sSeurat <- readRDS("seuratObj.rds","rb")
dim(sSeurat)

# Proceed to the QC step. First, we check the columns in the metadata of the Seurat object.
sSeurat@meta.data %>% 
  colnames()

# There are so many column names, but we should check the percentage of mitochondrial genes for each cell. 
sSeurat$percent.mt <- PercentageFeatureSet(sSeurat, pattern = "^MT-")

# nCount_RNA is the number of UMI counts in a cell
hist(sSeurat$nCount_RNA)
summary(sSeurat$nCount_RNA)

# nFeature_RNA is the number of different genes that had any reads
hist(sSeurat$nFeature_RNA)
summary(sSeurat$nFeature_RNA)

# percent.mt is the percent mitochondrial reads
hist(sSeurat$percent.mt)
summary(sSeurat$percent.mt)

# Make a violin plot of the QC columns
plt <- VlnPlot(sSeurat, 
               features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
               ncol = 3,
               raster = FALSE) 
plt
```

Since most of the literature follows a 5% threshold for percent.mt, we also apply this to our study.

```{r}
plot1 <- FeatureScatter(sSeurat, 
                        feature1 = "nCount_RNA",
                        feature2 = "nFeature_RNA")
plot2 <- FeatureScatter(sSeurat, 
                        feature1 = "nCount_RNA",
                        feature2 = "percent.mt")
plot3 <- FeatureScatter(sSeurat, 
                        feature1 = "nFeature_RNA",
                        feature2 = "percent.mt")

plot1
plot2
plot3

sSeurat <- subset(sSeurat, subset = percent.mt < 25)

# Check density plot for a more stringent threshold
sSeurat@meta.data %>% 
  ggplot(aes(x = percent.mt)) +
  geom_density()

# More stringent threshold
sSeurat <- subset(sSeurat, subset = percent.mt < 10 & nFeature_RNA < 5000)
dim(sSeurat)
```


```{r}
# Find for doublets
sSeurat <- as.SingleCellExperiment(sSeurat)
sSeurat <- scDblFinder(sSeurat)
# Convert back to Seurat object.
sSeurat <- as.Seurat(sSeurat)

# Notice that our identifiers have changed.
levels(Idents(object = sSeurat))

# We want to specify the identifier whether the cell is a singlet or doublet.
table(sSeurat@meta.data$scDblFinder.class)

Idents(object = sSeurat) <- "scDblFinder.class"
sSeurat@meta.data$scDblFinder.class <- as.character(sSeurat@meta.data$scDblFinder.class)

# Subset the Seurat object to only include singlets.
sSeurat <- subset(sSeurat, idents = "singlet")
levels(as.factor(sSeurat@meta.data$scDblFinder.class))

# check dimensions of sSeurat
dim(sSeurat)
```


```{r}
# Next, we normalize the data
sSeurat <- NormalizeData(sSeurat)

# Explore the most variable features, let's use the default version first.
sSeurat <- FindVariableFeatures(sSeurat)
# sSeurat <- FindVariableFeatures(sSeurat, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(sSeurat), 10) 
top10

# plot variable features with and without labels
plot2 <- VariableFeaturePlot(sSeurat)
plot2 <- LabelPoints(plot = plot2, points = top10, repel = TRUE)
plot2

# Convert normalized gene expression to Z-score (scale the data)
all.genes <- rownames(sSeurat)
sSeurat <- ScaleData(sSeurat, features = all.genes)

# Run PCA
seurat_loom <- RunPCA(seurat_loom)

# Plot the PCA
DimPlot(seurat_loom, reduction = "pca")

# Plot the loadings
VizDimLoadings(seurat_loom, dims = 1:2, reduction = "pca")

# Choose the number of principle components to keep
ElbowPlot(seurat_loom)
```