In [None]:
# ..... look at turtle heart data ..... #

In [2]:
library(Seurat)

In [7]:
ginfo = read.delim('~/cellranger_10x/chrysemys_picta_v4.gtf', sep = '\t', 
                   comment.char = '#', header = F)
dim(ginfo)

# MT genes
ginfo[ginfo[,1]=='NC_023890.1' & ginfo[,3]=='gene',]

Unnamed: 0_level_0,V1,V2,V3,V4,V5,V6,V7,V8,V9
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>
1754022,NC_023890.1,RefSeq,gene,2800,3770,.,+,.,gene_id ND1; transcript_id ; db_xref GeneID:18983007; gbkey Gene; gene ND1; gene_biotype protein_coding;
1754028,NC_023890.1,RefSeq,gene,3981,5019,.,+,.,gene_id ND2; transcript_id ; db_xref GeneID:18982995; gbkey Gene; gene ND2; gene_biotype protein_coding;
1754034,NC_023890.1,RefSeq,gene,5402,6949,.,+,.,gene_id COX1; transcript_id ; db_xref GeneID:18982996; gbkey Gene; gene COX1; gene_biotype protein_coding;
1754040,NC_023890.1,RefSeq,gene,7084,7770,.,+,.,gene_id COX2; transcript_id ; db_xref GeneID:18982997; gbkey Gene; gene COX2; gene_biotype protein_coding;
1754046,NC_023890.1,RefSeq,gene,7844,8032,.,+,.,gene_id ATP8; transcript_id ; db_xref GeneID:18982998; gbkey Gene; gene ATP8; gene_biotype protein_coding;
1754052,NC_023890.1,RefSeq,gene,8002,8685,.,+,.,gene_id ATP6; transcript_id ; db_xref GeneID:18982999; gbkey Gene; gene ATP6; gene_biotype protein_coding;
1754058,NC_023890.1,RefSeq,gene,8685,9468,.,+,.,gene_id COX3; transcript_id ; db_xref GeneID:18983000; gbkey Gene; gene COX3; gene_biotype protein_coding;
1754064,NC_023890.1,RefSeq,gene,9538,9887,.,+,.,gene_id ND3; transcript_id ; db_xref GeneID:18983001; gbkey Gene; gene ND3; gene_biotype protein_coding;
1754072,NC_023890.1,RefSeq,gene,9958,10254,.,+,.,gene_id ND4L; transcript_id ; db_xref GeneID:18983002; gbkey Gene; gene ND4L; gene_biotype protein_coding;
1754078,NC_023890.1,RefSeq,gene,10248,11628,.,+,.,gene_id ND4; transcript_id ; db_xref GeneID:18983003; gbkey Gene; gene ND4; gene_biotype protein_coding;


In [3]:
# get list of genes on MT chromosome
minfo = read.delim('turtle_gene_symbol_EnsemblID.txt', sep = '\t')
minfo[1,]

# rename MT genes with prefix
mt_genes = minfo[which(minfo[,3]=='MT'),2]
mt_genes = mt_genes[mt_genes!='']
length(mt_genes)
mt_genes[1:3]

Unnamed: 0_level_0,Gene.stable.ID,Gene.name,Chromosome.scaffold.name
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,ENSCPBG00000000002,,MT


In [4]:
# list of stages and samples
path0 = '/data/suresh/heart/turtle/'

stages = rep(c('14', '17', '21'), each = 3)
samples = c('4', '7', '8A', '3_Triangle', '5B', '5Y', '1A', '1B', '2')

In [5]:
# load data
id = 1
mat = Read10X_h5(paste0(path0, 'stage', stages[id], '_', samples[id], 
                        '/outs/filtered_feature_bc_matrix.h5'))

# rename MT genes with prefix
genes = rownames(mat)
ids = match(mt_genes, genes)
genes[ids] = paste0('MT-',rownames(mat)[ids])

In [53]:
# change column names
currstage = stages[id]
currsmp = paste0('sample', samples[id])
smp = paste0(currstage, '_', currsmp, '_')
colnames(mat) <- paste0(smp, colnames(mat))

sce = CreateSeuratObject(counts = mat)
sce

“Feature names cannot have underscores ('_'), replacing with dashes ('-')”


An object of class Seurat 
26985 features across 9682 samples within 1 assay 
Active assay: RNA (26985 features, 0 variable features)

In [54]:
# Compute percent mito ratio
sce$mitoRatio <- PercentageFeatureSet(object = sce, pattern = "^MT-")
sce$mitoRatio <- sce@meta.data$mitoRatio / 100

# Add number of genes per UMI for each cell to metadata
sce$log10GenesPerUMI <- log10(sce$nFeature_RNA) / log10(sce$nCount_RNA)
sce$stage = currstage
sce$sample = currsmp
sce@meta.data[1:2,]

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,mitoRatio,log10GenesPerUMI,stage,sample
Unnamed: 0_level_1,<fct>,<dbl>,<int>,<dbl>,<dbl>,<chr>,<chr>
stage21_sample2_AAACCCAAGCAAATGT-1,stage21,1458,1053,0.04938272,0.9553287,stage21,sample2
stage21_sample2_AAACCCAAGCCGTCGT-1,stage21,630,518,0.05079365,0.9696319,stage21,sample2


In [55]:
# save 10x filtered counts matrix w/o removing doublets, bad cells, etc.
saveRDS(sce, file = paste0(smp, 'raw_data.rds'))