################ # Matlab files # ################ # Expression data: ## Reference genome counts matrix counts.mat - counts: 33471 by 60 ## Personal genomes counts matrix personal_counts.mat - counts_per: 33374 by 60 #### note: no ERCCs in this matrix, hence different dimensions # Phenotype data: pData.mat - pData: 60 by 10 - describes the columns in the counts matrices - age, sex, lane/batch, individual, and all versions of sample IDs that were used (based on library preps and armadillo labels) # Gene data: attr.mat - attr: 33471 by 11 - gene IDs and their locations on the scaffolds - ordered same way as in counts matrix - human homologs are in the name column - genes: 33374 - ERCC spikeins: 97 # GO: GO.arm.mat # GO terms and genes as text files GO.arm.genes GO.arm.terms # Homologs: homologs.mat - homologs : 11012 by 5 - Mouse genes name and entrez ID, human gene name and entrez ID, and armadillo ensemblID ################## # R binary files # ################## # Expression and gene data: ## Reference genome counts counts.Rdata - counts_exp : 33471 by 60 - pData : 60 by 10 - attr: 33471 by 11 ## Personal genomes counts personal_counts.Rdata - counts_per : 33374 by 60 # GO: GO.arm.Rdata - GO.arm : 2014800 by 4 - GO.arm.nonIEA : 12828 by 21421 - genes.id: 12828 by 2 # Homologs: homologs.Rdata - genes.id2ids : 11012 by 5 - Mouse genes name and entrez ID, human gene name and entrez ID, and armadillo ensemblID