# Load libraries
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ dplyr::slice()      masks IRanges::slice()
library(Seurat)
## Attaching SeuratObject
## Attaching sp
## 
## Attaching package: 'Seurat'
## 
## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays
library(RColorBrewer)
library(scDblFinder)
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## The following object is masked from 'package:GenomicRanges':
## 
##     subtract
library(pheatmap)

# Define color palettes
WtOrRd_pal <- colorRampPalette(c("#FFFFFF", brewer.pal(9, "OrRd")))(100)
  1. Creating the Seurat Object
# Data Wrangling Processes
Hcounts <- read.delim("GSM4008722_Placenta1_dge.txt")
Hcounts[1:10, 1:10]

# set the row names
Hcounts %<>% 
  column_to_rownames("GENE")

head(Hcounts)

# We have our prepared annotations ready as well
# Load the cell annotation data
annotations <- read_csv(file = "Placenta1_rmbatchAnno.csv")

# Notice that the number of samples (cells) in hSeurat is not identical to the number of rows of this annotation file.
dim(annotations)[1] == dim(hSeurat)[2]

sweep <- (colnames(hSeurat) %in% (annotations$Cell_barcode))

# Filter out the count data
Hcounts <- Hcounts[, sweep]

# Create the Seurat object
hSeurat <- CreateSeuratObject(counts = Hcounts)

Let’s continue adding the annotations to the metadata.

# make the annotations object compatible for data analysis.
anno_revised <- annotations %>% 
  select(Cell_barcode, ident, Celltype)

# Extract the metadata
meta_data.copy <- hSeurat@meta.data

# make new column that copies the rownames
meta_data.copy %<>% 
  mutate(Cell_barcode = rownames(meta_data.copy))

# add the annotations!!!
meta_data.copy <- 
  inner_join(meta_data.copy, anno_revised, by = 'Cell_barcode')

# Reassign the metadata
hSeurat@meta.data$Cluster <- meta_data.copy$ident
hSeurat@meta.data$Annotation <- meta_data.copy$Celltype

# The Seurat object became smaller.
dim(hSeurat)

# Use head() to inspect the metadata
head(hSeurat@meta.data)

# Let's save this object to overwrite RDS file
saveRDS(hSeurat, "hSeurat.rds")

For convenience, we can start running from this chunk of code, since we have already saved the Seurat object as a file in our directory.

# Start running from this line of code
hSeurat <- readRDS("hSeurat.rds")

# Add in the Mitochondrial PCT% information
hSeurat$percent.mt <- PercentageFeatureSet(hSeurat, pattern = "^MT-")

# nCount_RNA is the number of UMI counts in a cell
hist(hSeurat$nCount_RNA)

summary(hSeurat@meta.data$nCount_RNA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     504     913    1091    1280    1437    8040
# nFeature_RNA is the number of different genes that had any reads
hist(hSeurat$nFeature_RNA)

summary(hSeurat@meta.data$nFeature_RNA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   301.0   514.5   596.0   650.4   731.0  2227.0
# percent.mt is the percent mitochondrial reads
summary(hSeurat$percent.mt)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.4028  3.5161  5.0484  5.6198  6.8923 19.9390
# Later, we will subset the Seurat object based on a percent.mt cutoff.

# Check for the percentage of ribosomal genes
hSeurat[["percent.rb"]] <- PercentageFeatureSet(hSeurat, pattern = "^RP[SL]")
summary(hSeurat$percent.rb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.32   13.60   17.14   17.01   20.26   51.70

Another important part for QC is removing doublets.

# Find for doublets
hSeurat <- as.SingleCellExperiment(hSeurat)
hSeurat <- scDblFinder(hSeurat)
## Creating ~7676 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 1251 cells excluded from training.
## iter=1, 984 cells excluded from training.
## iter=2, 842 cells excluded from training.
## Threshold found:0.526
## 670 (7%) doublets called
# Convert back to Seurat object.
hSeurat <- as.Seurat(hSeurat)

# Notice that our identifiers have changed.
levels(Idents(object = hSeurat))
## [1] "SingleCellExperiment"
# We want to specify the identifier whether the cell is a singlet or doublet.
table(hSeurat@meta.data$scDblFinder.class)
## 
## singlet doublet 
##    8925     670
Idents(object = hSeurat) <- "scDblFinder.class"
hSeurat@meta.data$scDblFinder.class <- as.character(hSeurat@meta.data$scDblFinder.class)

# Subset the Seurat object to only include singlets.
hSeurat <- subset(hSeurat, idents = "singlet")
levels(as.factor(hSeurat@meta.data$scDblFinder.class))
## [1] "singlet"
# convert column names to make the protocol coherent
colnames(hSeurat@meta.data)[2] <- "nCount_RNA"
colnames(hSeurat@meta.data)[3] <- "nFeature_RNA"

# We draw a violin plot to explore the values in meta data
VlnPlot(hSeurat, 
        features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"),
        ncol = 4,
        pt.size = 0.1) & 
  theme(plot.title = element_text(size=10))

# The following four lines of code are for scatterplots addressing the correlations of every 2 pairs
FeatureScatter(hSeurat, feature1 = "nCount_RNA", feature2 = "percent.mt")

FeatureScatter(hSeurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

FeatureScatter(hSeurat, feature1 = "nCount_RNA", feature2 = "percent.rb")

FeatureScatter(hSeurat, feature1 = "percent.rb", feature2 = "percent.mt")

# We also check if there is correlation between number of features and the doublet score
FeatureScatter(hSeurat, feature1 = "nFeature_RNA", feature2 = "scDblFinder.score")

# Since the typical cutoff for percent.mt is 5%, we will try out < 5. 
# Last week we used 10%, and couldn't see much difference.
# Also, let't filter out cells that have less than 5% ribosomal content.
hSeurat <- subset(hSeurat, subset = percent.mt < 5 & percent.rb > 5)

dim(hSeurat)
## [1] 23929  4383

I tried to subset the dataset based on nCount_RNA > 500 as written in the publication, but the dataset seems to be already filtered. Now, we have a reduced version of the Seurat object, where the cells contain less than 5% mitochondrial genes.

# Next, we normalize the data
hSeurat <- NormalizeData(hSeurat)

# Explore the most variable features, let's use the default version first.
hSeurat <- FindVariableFeatures(hSeurat)
# hSeurat <- FindVariableFeatures(hSeurat, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(hSeurat), 10) 
top10
##  [1] "HBZ"    "CGA"    "TFPI2"  "CSH1"   "CSH2"   "MMP12"  "LAIR2"  "HBB"   
##  [9] "CHI3L1" "PSG3"
# plot variable features with and without labels
plot2 <- VariableFeaturePlot(hSeurat)
plot2 <- LabelPoints(plot = plot2, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2747 rows containing missing values (geom_point).

# Convert normalized gene expression to Z-score
all.genes <- rownames(hSeurat)
hSeurat <- ScaleData(hSeurat, features = all.genes)
## Centering and scaling data matrix
# Run PCA analysis
hSeurat <- RunPCA(hSeurat, features = VariableFeatures(object = hSeurat))
## PC_ 1 
## Positive:  FTL, TMSB4X, TYROBP, CD74, HLA-DRA, FCER1G, CTSS, HLA-DRB1, LAPTM5, HLA-DPA1 
##     HLA-DPB1, APOC1, LYZ, HLA-DRB6, TREM2, CST3, HLA-B, AIF1, SRGN, GPNMB 
##     SPP1, APOE, ITGB2, RGS1, CXCL2, BCL2A1, SOD2, CCL3, HLA-A, GPR183 
## Negative:  EBI3, KRT7, MFAP5, ISM2, HPGD, FLT1, HMGB3, S100P, KRT19, TNFSF10 
##     PRG2, PRSS8, HLA-G, TFPI, FSTL3, DIO2, NOTUM, PLAC8, KRT8, XAGE2 
##     QSOX1, FN1, LVRN, GDF15, PHLDA2, KRT18, TIMP3, ADM, TPM1, AOC1 
## PC_ 2 
## Positive:  B2M, TYROBP, PSAP, SAT1, CTSB, CD74, FTH1, NPC2, CTSD, HLA-DRB1 
##     CTSS, HLA-DPA1, HLA-DRA, HLA-DRB6, LAPTM5, FCER1G, TREM2, IFI6, HLA-DPB1, CSTB 
##     LGMN, HLA-B, ACP5, APOC1, CAPG, CTSL, SH3BGRL3, RGS1, LYZ, VAMP8 
## Negative:  COL3A1, COL1A1, COL1A2, SPARC, MGP, LUM, COL5A1, COL5A2, COL6A2, FMOD 
##     CRABP2, BDKRB1, MYL9, TGFBI, FKBP10, SFRP2, TPM2, LOX, TAGLN, OGN 
##     ACTA2, PLOD2, RGS4, CRYAB, IGFL2, NDUFA4L2, LIF, THY1, RCN3, CTGF 
## PC_ 3 
## Positive:  SPARC, LOX, COL5A1, TGFBI, PLOD2, COL5A2, COL1A2, COL1A1, LIF, MGP 
##     PDPN, FMOD, COL6A2, NNMT, LUM, CKAP4, NDUFA4L2, COL3A1, HAS1, TNFRSF12A 
##     ANXA1, SERPINE1, RCN3, P4HA2, FKBP10, IL11, GFPT2, THY1, BDKRB1, P4HB 
## Negative:  HBA2, HBA1, HBG2, HBE1, ALAS2, HBZ, HBZP1, AHSP, SLC25A37, GYPA 
##     HBM, GYPB, MT1G, HBG1, HEMGN, HMBS, SLC4A1, F13A1, HBB, FTH1 
##     RNASE1, LYVE1, MT1H, GYPC, HBQ1, NFE2, SLC25A39, FOLR2, RHAG, KLF1 
## PC_ 4 
## Positive:  F13A1, LYVE1, RNASE1, FOLR2, DAB2, CD14, VSIG4, C1QA, CD163, PLTP 
##     SEPP1, MRC1, CSF1R, CD36, STAB1, HSPA1A, HPGDS, C1QC, C1QB, CCL4 
##     CCL4L2, AGR2, MAF, CYBB, GATM, FCGBP, HSPA1B, FCGR2A, LILRB5, SPP1 
## Negative:  HLA-DRA, HLA-DRB1, HLA-DPA1, HLA-DRB6, HLA-DPB1, CD74, APOC1, FTH1, LYZ, OLR1 
##     GCHFR, CD52, LGALS3, SDS, HLA-DMA, G0S2, HLA-DMB, CD9, AHSP, HBZP1 
##     GYPB, HBZ, HBG1, ALAS2, CAPG, HLA-DQB1, ACP5, HBA2, HBA1, FBP1 
## PC_ 5 
## Positive:  CXCL8, SOD2, IL1B, FCN1, BCL2A1, CCL20, EREG, SRGN, TNFAIP6, CXCL3 
##     CXCL2, MT2A, S100A8, IL1RN, SAMSN1, HBA1, HBA2, SERPINE1, PLAUR, AQP9 
##     HBG2, TNF, APOBEC3A, PTGS2, SERPINA1, S100A12, HBE1, SAT1, C15orf48, CFLAR 
## Negative:  SFRP2, TAGLN, TPM2, GCHFR, CHI3L1, ID3, MYL9, ACTA2, STMN1, CTSC 
##     TREM2, ACP5, LIPA, FABP5, APOE, MGST1, HMGB2, ACTG2, HNMT, DBI 
##     LTA4H, CD36, CYTL1, GPNMB, NUPR1, KIAA0101, LPL, MMP9, LGMN, TYMS
# Plot the PCA
DimPlot(hSeurat, reduction = "pca")

# Plot the loadings
VizDimLoadings(hSeurat, dims = 1:2, reduction = "pca") & 
  theme(axis.text=element_text(size=8), axis.title=element_text(size = 8,face="bold"))

# Check the elbow plot to see how much variation is covered for each PC
ElbowPlot(hSeurat)

# FeaturePlot of PC1 top features
FeaturePlot(hSeurat, features = c("EBI3", 
                                   "KRT7"))

# FeaturePlot of PC2 top features
FeaturePlot(hSeurat, features = c("B2M", 
                                   "TYROBP"))

# Now perform clustering
hSeurat <- FindNeighbors(hSeurat, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
hSeurat <- FindClusters(hSeurat, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4383
## Number of edges: 148212
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8904
## Number of communities: 9
## Elapsed time: 0 seconds
# For visualization purposes, we need to generate UMAP reduced dimensionality representation
hSeurat <- RunUMAP(hSeurat, dims = 1:10, verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
# Plot the UMAP with clustering
DimPlot(hSeurat, reduction = "umap")

# Get the TSNE embedding
hSeurat <- RunTSNE(hSeurat, dims = 1:10)

# Plot the t-SNE with clustering
DimPlot(hSeurat, reduction = "tsne")

# Let's look at cluster sizes
table(hSeurat@meta.data$seurat_clusters)
## 
##    0    1    2    3    4    5    6    7    8 
## 1414  689  586  391  343  290  266  228  176
# Let's plot those clusters!
DimPlot(hSeurat,label.size = 4,repel = T,label = T)

FeaturePlot(hSeurat, features = c("MMP12",
                                   "SERPINE2",
                                   "CSH1",
                                   "CSH2"))

Let’s calculate the cell cycle scores at this point.

hSeurat <- SetIdent(hSeurat, value = "Annotation")

# Calculate the cell cycle scores
s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes

# Somehow this code does not work.
hSeurat <- CellCycleScoring(hSeurat,
                            search = TRUE,
                            s.features = s.genes, 
                            g2m.features = g2m.genes)
## Warning: The following features are not present in the object: PIMREG, JPT1,
## attempting to find updated synonyms
## Warning: No updated symbols found
## Warning: The following features are still not present in the object: PIMREG,
## JPT1
table(hSeurat[[]]$Phase)
## 
##   G1  G2M    S 
## 2239  826 1318

Around 1/3 of the cells are in the S phase.

# Check if mitochondrial genes show dependency on cell type
VlnPlot(hSeurat,
        features = "percent.mt") & 
  theme(plot.title = element_text(size = 10),
        legend.position = 'none')

# Check if ribosomal genes show dependency on cell type
VlnPlot(hSeurat,
        features = "percent.rb") & 
  theme(plot.title = element_text(size=10),
        legend.position = 'none')

# Check # of genes and # of transcripts per cluster
VlnPlot(hSeurat,features = 
          c("nCount_RNA","nFeature_RNA")) & 
  theme(plot.title = element_text(size=10))

# Cell cycle score does not seem to depend on the cell type much
FeaturePlot(hSeurat,
            features = c("S.Score","G2M.Score"),
            label.size = 4,
            repel = T,
            label = T) & 
  theme(plot.title = element_text(size = 10))

VlnPlot(hSeurat,features = c("S.Score","G2M.Score")) & 
  theme(plot.title = element_text(size = 10),
        legend.position = 'none')

In this case, it looks like most of the cell types show low cell cycle scores, except for the proliferating cells.

# Plot in UMAP
DimPlot(hSeurat, 
        group.by = "Phase")

# Most of the clusters contain all three cell cycle states.

# Plot with clusters
compTable.int <- table(hSeurat$Annotation, hSeurat$Phase)
compTable.int <- (compTable.int / rowSums(compTable.int)) * 100
pheatmap(compTable.int, color = WtOrRd_pal, breaks = 1:100)

# FeaturePlot for showing mod score
FeaturePlot(hSeurat, 
            features = c("S.Score", "G2M.Score"))

This heatmap is interesting, as most syncytiotrophoblasts are in the G1 phase, and almost all endothelial cells are in the S phase.

# This part is replaced when using the SCTransform
DefaultAssay(hSeurat) <- "RNA"

# Perform log-normalization
hSeurat <- NormalizeData(hSeurat)

# Find variable features
hSeurat <- FindVariableFeatures(hSeurat, 
                                 selection.method = "vst", 
                                 nfeatures = 2000)
all.genes <- rownames(hSeurat)
hSeurat <- ScaleData(hSeurat, features = all.genes)
## Centering and scaling data matrix
# Designate the markers
all.markers <- FindAllMarkers(hSeurat,
                              only.pos = T, 
                              min.pct = 0.5, 
                              logfc.threshold = 0.5)
## Calculating cluster Dendritic cell
## Calculating cluster Extravillous trophoblast_HPGD high
## Calculating cluster Stromal cell_LOX high
## Calculating cluster Extravillous trophoblast_LAIR2 high
## Calculating cluster Extravillous trophoblast_TIMP3 high
## Calculating cluster Stromal cell_SFRP2 high
## Calculating cluster Smooth muscle cell
## Calculating cluster Erythroid cell_AHSP high
## Calculating cluster Hofbauer cell
## Calculating cluster Proliferating cell
## Calculating cluster Syncytiotrophoblast
## Calculating cluster Epithelial cell
## Calculating cluster Cytotrophoblast
## Calculating cluster Macrophage
## Calculating cluster Endothelial cell
## Calculating cluster Neutrophil
## Calculating cluster Erythroid cell_HBE1 high
dim(all.markers)
## [1] 1106    7
table(all.markers$cluster)
## 
##                      Dendritic cell  Extravillous trophoblast_HPGD high 
##                                 102                                 167 
##               Stromal cell_LOX high Extravillous trophoblast_LAIR2 high 
##                                  59                                 150 
## Extravillous trophoblast_TIMP3 high             Stromal cell_SFRP2 high 
##                                 105                                  57 
##                  Smooth muscle cell            Erythroid cell_AHSP high 
##                                  43                                  14 
##                       Hofbauer cell                  Proliferating cell 
##                                  61                                  29 
##                 Syncytiotrophoblast                     Epithelial cell 
##                                  63                                  61 
##                     Cytotrophoblast                          Macrophage 
##                                  28                                  62 
##                    Endothelial cell                          Neutrophil 
##                                  64                                  31 
##            Erythroid cell_HBE1 high 
##                                  10
top3_markers <- as.data.frame(all.markers %>%
                              group_by(cluster) %>% 
                              top_n(n = 3, wt = avg_log2FC))
# We can check the dataframe briefly 
head(top3_markers)
##           p_val avg_log2FC pct.1 pct.2     p_val_adj
## 1  0.000000e+00   4.334272 0.956 0.170  0.000000e+00
## 2  0.000000e+00   4.332729 0.975 0.192  0.000000e+00
## 3 1.440853e-230   4.191981 0.829 0.310 3.447818e-226
## 4  0.000000e+00   3.747841 0.934 0.154  0.000000e+00
## 5 4.970391e-265   3.748173 0.997 0.273 1.189365e-260
## 6 7.773249e-252   3.555975 0.900 0.187 1.860061e-247
##                              cluster    gene
## 1                     Dendritic cell HLA-DRA
## 2                     Dendritic cell    CD74
## 3                     Dendritic cell   APOC1
## 4 Extravillous trophoblast_HPGD high TNFSF10
## 5 Extravillous trophoblast_HPGD high    HPGD
## 6 Extravillous trophoblast_HPGD high    AOC1

Clustering based on Cell Type annotation

DimPlot(hSeurat, label = T , repel = T, label.size = 3) + NoLegend()

DimPlot(hSeurat, reduction = "pca")

DimPlot(hSeurat, reduction = "tsne")

DimPlot(hSeurat, reduction = "umap")

# ggsave(filename = "Check.png", plot = p1, width = 30, height = 10)
saveRDS(hSeurat, "hSeurat_annotated.rds")

Luckily, it seems that the cell types are preserved even after changing the mitochondrial gene percentage to 5%. Another difference is that the plots show more clustering compared to the 10% threshold, which could be promising in terms of clarity.

Still Work in Progress

# Pseudotime
# Convert to monocle3 cell_data_set
hSeurat.cds <- as.cell_data_set(hSeurat)
class(hSeurat.cds)

# Re-cluster (required for trajectories in monocle3)
hSeurat.cds <- cluster_cells(hSeurat.cds)

# Learn the trajectory
hSeurat.cds <- learn_graph(hSeurat.cds)

# Plot the trajectory
plot_cells(hSeurat.cds, color_cells_by = "seurat_clusters", show_trajectory_graph = TRUE)

# Find the pseudotime order
DimPlot(hSeurat, group.by = "Phase")
hSeurat.cds <- order_cells(hSeurat.cds)

# Plot pseudotime
plot_cells(hSeurat.cds, color_cells_by = "pseudotime")

# Add pseudotime score back into the dataset
hSeurat$pseudotime_score <- hSeurat.cds@principal_graph_aux@listData$UMAP$pseudotime
hSeurat$pseudotime_score[hSeurat$pseudotime_score == Inf] <- NA

# FeaturePlot with Pseudotime
FeaturePlot(hSeurat, features = c("pseudotime_score")) 
DimPlot(hSeurat, group.by = "Annotation")
FeaturePlot(hSeurat, features = c("pseudotime_score"))