{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# check if species has enough data for CoCoCoNet\n", "# if it does, add the accessions with enough data to a file/folder\n", "# Date: 16 Feb 2025" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "library(dplyr)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "
A data.frame: 2 × 14
SRAclaimedSpeciestotalRunsactualSpeciesN_specieslibStratsN_libStratstumorupdatedN_RunsflagkeepInAggprop_missingranked_corr_in_expranked_corr2global
<chr><chr><dbl><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><dbl>
1ERP118309aedes_albopictus29Aedes albopictus1RNA-Seq1no29010.1627850850730060.8992067307202840.8809047
2ERP121447aedes_albopictus31Aedes albopictus1RNA-Seq1no31010.2271429146052050.5793391953061740.8023621
\n" ], "text/latex": [ "A data.frame: 2 × 14\n", "\\begin{tabular}{r|llllllllllllll}\n", " & SRA & claimedSpecies & totalRuns & actualSpecies & N\\_species & libStrats & N\\_libStrats & tumor & updatedN\\_Runs & flag & keepInAgg & prop\\_missing & ranked\\_corr\\_in\\_exp & ranked\\_corr2global\\\\\n", " & & & & & & & & & & & & & & \\\\\n", "\\hline\n", "\t1 & ERP118309 & aedes\\_albopictus & 29 & Aedes albopictus & 1 & RNA-Seq & 1 & no & 29 & 0 & 1 & 0.162785085073006 & 0.899206730720284 & 0.8809047\\\\\n", "\t2 & ERP121447 & aedes\\_albopictus & 31 & Aedes albopictus & 1 & RNA-Seq & 1 & no & 31 & 0 & 1 & 0.227142914605205 & 0.579339195306174 & 0.8023621\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 2 × 14\n", "\n", "| | SRA <chr> | claimedSpecies <chr> | totalRuns <dbl> | actualSpecies <chr> | N_species <chr> | libStrats <chr> | N_libStrats <chr> | tumor <chr> | updatedN_Runs <chr> | flag <chr> | keepInAgg <chr> | prop_missing <chr> | ranked_corr_in_exp <chr> | ranked_corr2global <dbl> |\n", "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", "| 1 | ERP118309 | aedes_albopictus | 29 | Aedes albopictus | 1 | RNA-Seq | 1 | no | 29 | 0 | 1 | 0.162785085073006 | 0.899206730720284 | 0.8809047 |\n", "| 2 | ERP121447 | aedes_albopictus | 31 | Aedes albopictus | 1 | RNA-Seq | 1 | no | 31 | 0 | 1 | 0.227142914605205 | 0.579339195306174 | 0.8023621 |\n", "\n" ], "text/plain": [ " SRA claimedSpecies totalRuns actualSpecies N_species libStrats\n", "1 ERP118309 aedes_albopictus 29 Aedes albopictus 1 RNA-Seq \n", "2 ERP121447 aedes_albopictus 31 Aedes albopictus 1 RNA-Seq \n", " N_libStrats tumor updatedN_Runs flag keepInAgg prop_missing \n", "1 1 no 29 0 1 0.162785085073006\n", "2 1 no 31 0 1 0.227142914605205\n", " ranked_corr_in_exp ranked_corr2global\n", "1 0.899206730720284 0.8809047 \n", "2 0.579339195306174 0.8023621 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cocodf = read.delim('current_SRA_list.csv', sep = ',')\n", "cocodf[1:2,]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# combining wild and domesticated varities or different strains\n", "t1 = read.delim('maize.txt', sep = ',')\n", "t2 = read.delim('domesticated_maize.txt', sep = ',')\n", "mat1 = rbind(t1, t2)\n", "mat1 <- mat1[!duplicated(mat1),]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df <- read.csv(file=\"my.large.file.csv\",nrows=2000)\n" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 8257
  2. 47
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 8257\n", "\\item 47\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 8257\n", "2. 47\n", "\n", "\n" ], "text/plain": [ "[1] 8257 47" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
  1. 'Apis mellifera ligustica'
  2. 'Apis mellifera'
  3. 'Varroa destructor'
  4. 'metagenome'
  5. 'Lotmaria passim'
  6. 'insect metagenome'
  7. 'insect gut metagenome'
  8. 'bee metagenome'
  9. 'Apis mellifera carnica'
  10. 'pollen metagenome'
  11. 'mixed sample'
  12. 'Apis mellifera mellifera'
  13. 'Geotrigona acapulconis'
  14. 'Varroa jacobsoni'
  15. 'Deformed wing virus'
  16. 'Vairimorpha ceranae'
  17. 'Snodgrassella alvi'
  18. 'Apis'
  19. 'Apis cerana'
  20. 'Sinaivirus'
  21. 'honeybee metagenome'
  22. 'Apoidea'
  23. 'Bifidobacterium asteroides'
  24. 'invertebrate metagenome'
  25. 'Ascosphaera apis'
  26. 'Black queen cell virus'
  27. 'Tropilaelaps mercedesae'
  28. 'Andrena camellia'
  29. 'Galleria mellonella'
  30. 'Frieseomelitta varia'
  31. 'Centris analis'
  32. 'Apis mellifera capensis'
  33. 'Apis mellifera scutellata'
  34. 'Apis mellifera syriaca'
  35. 'Apis mellifera intermissa'
  36. 'gut metagenome'
  37. 'Sindbis virus'
  38. 'Snodgrassella alvi wkB2'
  39. 'Varroa destructor virus'
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'Apis mellifera ligustica'\n", "\\item 'Apis mellifera'\n", "\\item 'Varroa destructor'\n", "\\item 'metagenome'\n", "\\item 'Lotmaria passim'\n", "\\item 'insect metagenome'\n", "\\item 'insect gut metagenome'\n", "\\item 'bee metagenome'\n", "\\item 'Apis mellifera carnica'\n", "\\item 'pollen metagenome'\n", "\\item 'mixed sample'\n", "\\item 'Apis mellifera mellifera'\n", "\\item 'Geotrigona acapulconis'\n", "\\item 'Varroa jacobsoni'\n", "\\item 'Deformed wing virus'\n", "\\item 'Vairimorpha ceranae'\n", "\\item 'Snodgrassella alvi'\n", "\\item 'Apis'\n", "\\item 'Apis cerana'\n", "\\item 'Sinaivirus'\n", "\\item 'honeybee metagenome'\n", "\\item 'Apoidea'\n", "\\item 'Bifidobacterium asteroides'\n", "\\item 'invertebrate metagenome'\n", "\\item 'Ascosphaera apis'\n", "\\item 'Black queen cell virus'\n", "\\item 'Tropilaelaps mercedesae'\n", "\\item 'Andrena camellia'\n", "\\item 'Galleria mellonella'\n", "\\item 'Frieseomelitta varia'\n", "\\item 'Centris analis'\n", "\\item 'Apis mellifera capensis'\n", "\\item 'Apis mellifera scutellata'\n", "\\item 'Apis mellifera syriaca'\n", "\\item 'Apis mellifera intermissa'\n", "\\item 'gut metagenome'\n", "\\item 'Sindbis virus'\n", "\\item 'Snodgrassella alvi wkB2'\n", "\\item 'Varroa destructor virus'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'Apis mellifera ligustica'\n", "2. 'Apis mellifera'\n", "3. 'Varroa destructor'\n", "4. 'metagenome'\n", "5. 'Lotmaria passim'\n", "6. 'insect metagenome'\n", "7. 'insect gut metagenome'\n", "8. 'bee metagenome'\n", "9. 'Apis mellifera carnica'\n", "10. 'pollen metagenome'\n", "11. 'mixed sample'\n", "12. 'Apis mellifera mellifera'\n", "13. 'Geotrigona acapulconis'\n", "14. 'Varroa jacobsoni'\n", "15. 'Deformed wing virus'\n", "16. 'Vairimorpha ceranae'\n", "17. 'Snodgrassella alvi'\n", "18. 'Apis'\n", "19. 'Apis cerana'\n", "20. 'Sinaivirus'\n", "21. 'honeybee metagenome'\n", "22. 'Apoidea'\n", "23. 'Bifidobacterium asteroides'\n", "24. 'invertebrate metagenome'\n", "25. 'Ascosphaera apis'\n", "26. 'Black queen cell virus'\n", "27. 'Tropilaelaps mercedesae'\n", "28. 'Andrena camellia'\n", "29. 'Galleria mellonella'\n", "30. 'Frieseomelitta varia'\n", "31. 'Centris analis'\n", "32. 'Apis mellifera capensis'\n", "33. 'Apis mellifera scutellata'\n", "34. 'Apis mellifera syriaca'\n", "35. 'Apis mellifera intermissa'\n", "36. 'gut metagenome'\n", "37. 'Sindbis virus'\n", "38. 'Snodgrassella alvi wkB2'\n", "39. 'Varroa destructor virus'\n", "\n", "\n" ], "text/plain": [ " [1] \"Apis mellifera ligustica\" \"Apis mellifera\" \n", " [3] \"Varroa destructor\" \"metagenome\" \n", " [5] \"Lotmaria passim\" \"insect metagenome\" \n", " [7] \"insect gut metagenome\" \"bee metagenome\" \n", " [9] \"Apis mellifera carnica\" \"pollen metagenome\" \n", "[11] \"mixed sample\" \"Apis mellifera mellifera\" \n", "[13] \"Geotrigona acapulconis\" \"Varroa jacobsoni\" \n", "[15] \"Deformed wing virus\" \"Vairimorpha ceranae\" \n", "[17] \"Snodgrassella alvi\" \"Apis\" \n", "[19] \"Apis cerana\" \"Sinaivirus\" \n", "[21] \"honeybee metagenome\" \"Apoidea\" \n", "[23] \"Bifidobacterium asteroides\" \"invertebrate metagenome\" \n", "[25] \"Ascosphaera apis\" \"Black queen cell virus\" \n", "[27] \"Tropilaelaps mercedesae\" \"Andrena camellia\" \n", "[29] \"Galleria mellonella\" \"Frieseomelitta varia\" \n", "[31] \"Centris analis\" \"Apis mellifera capensis\" \n", "[33] \"Apis mellifera scutellata\" \"Apis mellifera syriaca\" \n", "[35] \"Apis mellifera intermissa\" \"gut metagenome\" \n", "[37] \"Sindbis virus\" \"Snodgrassella alvi wkB2\" \n", "[39] \"Varroa destructor virus\" " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\n", "
A data.frame: 1 × 47
RunReleaseDateLoadDatespotsbasesspots_with_matesavgLengthsize_MBAssemblyNamedownload_pathAffection_StatusAnalyte_TypeHistological_TypeBody_SiteCenterNameSubmissiondbgap_study_accessionConsentRunHashReadHash
<chr><chr><chr><int><dbl><int><int><int><chr><chr><lgl><lgl><lgl><chr><chr><chr><lgl><chr><chr><chr>
1SRR325241002025-02-28 08:06:492025-02-28 07:56:40236448097093442700236448093002023https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32524/SRR32524100/SRR32524100.lite.1NANANAYANGZHOU UNIVERSITYSRA2085895NApublicE218074D522158458D7BEA72E4B1D8FE56C7986E6724785571494052CE63CB80
\n" ], "text/latex": [ "A data.frame: 1 × 47\n", "\\begin{tabular}{r|lllllllllllllllllllll}\n", " & Run & ReleaseDate & LoadDate & spots & bases & spots\\_with\\_mates & avgLength & size\\_MB & AssemblyName & download\\_path & ⋯ & Affection\\_Status & Analyte\\_Type & Histological\\_Type & Body\\_Site & CenterName & Submission & dbgap\\_study\\_accession & Consent & RunHash & ReadHash\\\\\n", " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", "\\hline\n", "\t1 & SRR32524100 & 2025-02-28 08:06:49 & 2025-02-28 07:56:40 & 23644809 & 7093442700 & 23644809 & 300 & 2023 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32524/SRR32524100/SRR32524100.lite.1 & ⋯ & NA & NA & NA & & YANGZHOU UNIVERSITY & SRA2085895 & NA & public & E218074D522158458D7BEA72E4B1D8FE & 56C7986E6724785571494052CE63CB80\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 1 × 47\n", "\n", "| | Run <chr> | ReleaseDate <chr> | LoadDate <chr> | spots <int> | bases <dbl> | spots_with_mates <int> | avgLength <int> | size_MB <int> | AssemblyName <chr> | download_path <chr> | ⋯ ⋯ | Affection_Status <lgl> | Analyte_Type <lgl> | Histological_Type <lgl> | Body_Site <chr> | CenterName <chr> | Submission <chr> | dbgap_study_accession <lgl> | Consent <chr> | RunHash <chr> | ReadHash <chr> |\n", "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", "| 1 | SRR32524100 | 2025-02-28 08:06:49 | 2025-02-28 07:56:40 | 23644809 | 7093442700 | 23644809 | 300 | 2023 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32524/SRR32524100/SRR32524100.lite.1 | ⋯ | NA | NA | NA | | YANGZHOU UNIVERSITY | SRA2085895 | NA | public | E218074D522158458D7BEA72E4B1D8FE | 56C7986E6724785571494052CE63CB80 |\n", "\n" ], "text/plain": [ " Run ReleaseDate LoadDate spots bases \n", "1 SRR32524100 2025-02-28 08:06:49 2025-02-28 07:56:40 23644809 7093442700\n", " spots_with_mates avgLength size_MB AssemblyName\n", "1 23644809 300 2023 \n", " download_path \n", "1 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32524/SRR32524100/SRR32524100.lite.1\n", " ⋯ Affection_Status Analyte_Type Histological_Type Body_Site\n", "1 ⋯ NA NA NA \n", " CenterName Submission dbgap_study_accession Consent\n", "1 YANGZHOU UNIVERSITY SRA2085895 NA public \n", " RunHash ReadHash \n", "1 E218074D522158458D7BEA72E4B1D8FE 56C7986E6724785571494052CE63CB80" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mat1 = read.delim('bee.txt', sep = ',')\n", "# mat1 = read.table('mouse.txt', sep = ',', header = TRUE, stringsAsFactors = FALSE)\n", "dim(mat1)\n", "unique(mat1$ScientificName)\n", "mat1[1,]" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "'Apis mellifera'" ], "text/latex": [ "'Apis mellifera'" ], "text/markdown": [ "'Apis mellifera'" ], "text/plain": [ "[1] \"Apis mellifera\"" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
  1. 5871
  2. 47
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 5871\n", "\\item 47\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 5871\n", "2. 47\n", "\n", "\n" ], "text/plain": [ "[1] 5871 47" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sra_rna = mat1[mat1$ScientificName %in% c('Apis mellifera') & mat1$LibrarySource=='TRANSCRIPTOMIC' & mat1$LibraryStrategy=='RNA-Seq' & mat1$Tumor=='no' & is.na(mat1$Disease),]\n", "\n", "unique(sra_rna$ScientificName)\n", "dim(sra_rna)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "'TRANSCRIPTOMIC'" ], "text/latex": [ "'TRANSCRIPTOMIC'" ], "text/markdown": [ "'TRANSCRIPTOMIC'" ], "text/plain": [ "[1] \"TRANSCRIPTOMIC\"" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "238" ], "text/latex": [ "238" ], "text/markdown": [ "238" ], "text/plain": [ "[1] 238" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
  1. 'DRP004203'
  2. 'DRP004516'
  3. 'DRP007970'
  4. 'DRP010751'
  5. 'DRP012354'
  6. 'DRP012463'
  7. 'ERP002625'
  8. 'ERP004639'
  9. 'ERP006052'
  10. 'ERP118192'
  11. 'ERP122879'
  12. 'ERP130053'
  13. 'ERP133598'
  14. 'SRP002417'
  15. 'SRP004440'
  16. 'SRP011484'
  17. 'SRP013261'
  18. 'SRP014715'
  19. 'SRP015803'
  20. 'SRP015870'
  21. 'SRP019238'
  22. 'SRP019244'
  23. 'SRP020361'
  24. 'SRP022058'
  25. 'SRP022191'
  26. 'SRP022586'
  27. 'SRP022939'
  28. 'SRP024289'
  29. 'SRP027395'
  30. 'SRP030016'
  31. 'SRP032943'
  32. 'SRP033246'
  33. 'SRP039391'
  34. 'SRP041189'
  35. 'SRP041288'
  36. 'SRP043101'
  37. 'SRP045310'
  38. 'SRP046740'
  39. 'SRP047076'
  40. 'SRP048941'
  41. 'SRP050250'
  42. 'SRP053236'
  43. 'SRP053932'
  44. 'SRP056006'
  45. 'SRP062064'
  46. 'SRP062201'
  47. 'SRP067582'
  48. 'SRP068248'
  49. 'SRP068487'
  50. 'SRP069272'
  51. 'SRP070830'
  52. 'SRP075447'
  53. 'SRP075526'
  54. 'SRP076673'
  55. 'SRP080981'
  56. 'SRP081226'
  57. 'SRP082457'
  58. 'SRP082626'
  59. 'SRP083114'
  60. 'SRP089994'
  61. 'SRP090842'
  62. 'SRP092516'
  63. 'SRP095071'
  64. 'SRP095247'
  65. 'SRP095846'
  66. 'SRP098716'
  67. 'SRP099565'
  68. 'SRP102676'
  69. 'SRP106949'
  70. 'SRP107945'
  71. 'SRP113298'
  72. 'SRP114989'
  73. 'SRP117554'
  74. 'SRP117804'
  75. 'SRP130760'
  76. 'SRP132898'
  77. 'SRP133690'
  78. 'SRP133700'
  79. 'SRP136261'
  80. 'SRP136506'
  81. 'SRP139941'
  82. 'SRP140405'
  83. 'SRP151761'
  84. 'SRP152109'
  85. 'SRP153124'
  86. 'SRP159176'
  87. 'SRP161743'
  88. 'SRP162734'
  89. 'SRP166960'
  90. 'SRP167074'
  91. 'SRP173018'
  92. 'SRP173746'
  93. 'SRP173769'
  94. 'SRP185712'
  95. 'SRP186879'
  96. 'SRP188544'
  97. 'SRP188881'
  98. 'SRP188882'
  99. 'SRP190001'
  100. 'SRP191454'
  101. 'SRP193058'
  102. 'SRP194957'
  103. 'SRP194959'
  104. 'SRP194960'
  105. 'SRP197606'
  106. 'SRP201650'
  107. 'SRP219501'
  108. 'SRP220650'
  109. 'SRP224376'
  110. 'SRP226889'
  111. 'SRP228312'
  112. 'SRP228482'
  113. 'SRP234934'
  114. 'SRP235124'
  115. 'SRP239636'
  116. 'SRP242105'
  117. 'SRP242676'
  118. 'SRP250993'
  119. 'SRP253306'
  120. 'SRP254532'
  121. 'SRP258959'
  122. 'SRP259631'
  123. 'SRP261514'
  124. 'SRP266225'
  125. 'SRP267617'
  126. 'SRP268601'
  127. 'SRP271763'
  128. 'SRP273773'
  129. 'SRP276926'
  130. 'SRP278975'
  131. 'SRP280012'
  132. 'SRP282851'
  133. 'SRP286523'
  134. 'SRP287115'
  135. 'SRP287323'
  136. 'SRP287470'
  137. 'SRP289190'
  138. 'SRP293596'
  139. 'SRP295605'
  140. 'SRP296909'
  141. 'SRP297180'
  142. 'SRP297182'
  143. 'SRP300500'
  144. 'SRP301120'
  145. 'SRP301902'
  146. 'SRP303074'
  147. 'SRP309600'
  148. 'SRP309738'
  149. 'SRP310279'
  150. 'SRP316175'
  151. 'SRP318094'
  152. 'SRP321347'
  153. 'SRP321937'
  154. 'SRP323153'
  155. 'SRP323393'
  156. 'SRP325318'
  157. 'SRP326785'
  158. 'SRP327880'
  159. 'SRP329186'
  160. 'SRP329700'
  161. 'SRP330157'
  162. 'SRP331646'
  163. 'SRP332785'
  164. 'SRP333595'
  165. 'SRP335517'
  166. 'SRP335728'
  167. 'SRP336271'
  168. 'SRP336520'
  169. 'SRP338028'
  170. 'SRP338321'
  171. 'SRP341126'
  172. 'SRP345699'
  173. 'SRP347143'
  174. 'SRP347150'
  175. 'SRP347151'
  176. 'SRP347159'
  177. 'SRP347162'
  178. 'SRP347301'
  179. 'SRP347305'
  180. 'SRP347526'
  181. 'SRP348408'
  182. 'SRP349292'
  183. 'SRP349675'
  184. 'SRP353286'
  185. 'SRP353848'
  186. 'SRP360517'
  187. 'SRP362460'
  188. 'SRP366221'
  189. 'SRP366895'
  190. 'SRP367312'
  191. 'SRP369521'
  192. 'SRP373323'
  193. 'SRP373402'
  194. 'SRP383573'
  195. 'SRP385632'
  196. 'SRP390051'
  197. 'SRP395037'
  198. 'SRP396943'
  199. 'SRP399486'
  200. 'SRP403478'
  201. 'SRP405969'
  202. 'SRP409344'
  203. 'SRP412451'
  204. 'SRP413636'
  205. 'SRP418585'
  206. 'SRP420177'
  207. 'SRP425972'
  208. 'SRP427089'
  209. 'SRP427803'
  210. 'SRP429415'
  211. 'SRP429660'
  212. 'SRP434040'
  213. 'SRP441076'
  214. 'SRP450409'
  215. 'SRP450883'
  216. 'SRP456927'
  217. 'SRP459553'
  218. 'SRP460271'
  219. 'SRP460577'
  220. 'SRP462725'
  221. 'SRP467219'
  222. 'SRP467389'
  223. 'SRP471943'
  224. 'SRP475554'
  225. 'SRP475910'
  226. 'SRP492541'
  227. 'SRP495213'
  228. 'SRP502519'
  229. 'SRP505210'
  230. 'SRP514379'
  231. 'SRP514725'
  232. 'SRP519950'
  233. 'SRP522157'
  234. 'SRP534898'
  235. 'SRP538739'
  236. 'SRP545341'
  237. 'SRP549999'
  238. 'SRP562763'
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'DRP004203'\n", "\\item 'DRP004516'\n", "\\item 'DRP007970'\n", "\\item 'DRP010751'\n", "\\item 'DRP012354'\n", "\\item 'DRP012463'\n", "\\item 'ERP002625'\n", "\\item 'ERP004639'\n", "\\item 'ERP006052'\n", "\\item 'ERP118192'\n", "\\item 'ERP122879'\n", "\\item 'ERP130053'\n", "\\item 'ERP133598'\n", "\\item 'SRP002417'\n", "\\item 'SRP004440'\n", "\\item 'SRP011484'\n", "\\item 'SRP013261'\n", "\\item 'SRP014715'\n", "\\item 'SRP015803'\n", "\\item 'SRP015870'\n", "\\item 'SRP019238'\n", "\\item 'SRP019244'\n", "\\item 'SRP020361'\n", "\\item 'SRP022058'\n", "\\item 'SRP022191'\n", "\\item 'SRP022586'\n", "\\item 'SRP022939'\n", "\\item 'SRP024289'\n", "\\item 'SRP027395'\n", "\\item 'SRP030016'\n", "\\item 'SRP032943'\n", "\\item 'SRP033246'\n", "\\item 'SRP039391'\n", "\\item 'SRP041189'\n", "\\item 'SRP041288'\n", "\\item 'SRP043101'\n", "\\item 'SRP045310'\n", "\\item 'SRP046740'\n", "\\item 'SRP047076'\n", "\\item 'SRP048941'\n", "\\item 'SRP050250'\n", "\\item 'SRP053236'\n", "\\item 'SRP053932'\n", "\\item 'SRP056006'\n", "\\item 'SRP062064'\n", "\\item 'SRP062201'\n", "\\item 'SRP067582'\n", "\\item 'SRP068248'\n", "\\item 'SRP068487'\n", "\\item 'SRP069272'\n", "\\item 'SRP070830'\n", "\\item 'SRP075447'\n", "\\item 'SRP075526'\n", "\\item 'SRP076673'\n", "\\item 'SRP080981'\n", "\\item 'SRP081226'\n", "\\item 'SRP082457'\n", "\\item 'SRP082626'\n", "\\item 'SRP083114'\n", "\\item 'SRP089994'\n", "\\item 'SRP090842'\n", "\\item 'SRP092516'\n", "\\item 'SRP095071'\n", "\\item 'SRP095247'\n", "\\item 'SRP095846'\n", "\\item 'SRP098716'\n", "\\item 'SRP099565'\n", "\\item 'SRP102676'\n", "\\item 'SRP106949'\n", "\\item 'SRP107945'\n", "\\item 'SRP113298'\n", "\\item 'SRP114989'\n", "\\item 'SRP117554'\n", "\\item 'SRP117804'\n", "\\item 'SRP130760'\n", "\\item 'SRP132898'\n", "\\item 'SRP133690'\n", "\\item 'SRP133700'\n", "\\item 'SRP136261'\n", "\\item 'SRP136506'\n", "\\item 'SRP139941'\n", "\\item 'SRP140405'\n", "\\item 'SRP151761'\n", "\\item 'SRP152109'\n", "\\item 'SRP153124'\n", "\\item 'SRP159176'\n", "\\item 'SRP161743'\n", "\\item 'SRP162734'\n", "\\item 'SRP166960'\n", "\\item 'SRP167074'\n", "\\item 'SRP173018'\n", "\\item 'SRP173746'\n", "\\item 'SRP173769'\n", "\\item 'SRP185712'\n", "\\item 'SRP186879'\n", "\\item 'SRP188544'\n", "\\item 'SRP188881'\n", "\\item 'SRP188882'\n", "\\item 'SRP190001'\n", "\\item 'SRP191454'\n", "\\item 'SRP193058'\n", "\\item 'SRP194957'\n", "\\item 'SRP194959'\n", "\\item 'SRP194960'\n", "\\item 'SRP197606'\n", "\\item 'SRP201650'\n", "\\item 'SRP219501'\n", "\\item 'SRP220650'\n", "\\item 'SRP224376'\n", "\\item 'SRP226889'\n", "\\item 'SRP228312'\n", "\\item 'SRP228482'\n", "\\item 'SRP234934'\n", "\\item 'SRP235124'\n", "\\item 'SRP239636'\n", "\\item 'SRP242105'\n", "\\item 'SRP242676'\n", "\\item 'SRP250993'\n", "\\item 'SRP253306'\n", "\\item 'SRP254532'\n", "\\item 'SRP258959'\n", "\\item 'SRP259631'\n", "\\item 'SRP261514'\n", "\\item 'SRP266225'\n", "\\item 'SRP267617'\n", "\\item 'SRP268601'\n", "\\item 'SRP271763'\n", "\\item 'SRP273773'\n", "\\item 'SRP276926'\n", "\\item 'SRP278975'\n", "\\item 'SRP280012'\n", "\\item 'SRP282851'\n", "\\item 'SRP286523'\n", "\\item 'SRP287115'\n", "\\item 'SRP287323'\n", "\\item 'SRP287470'\n", "\\item 'SRP289190'\n", "\\item 'SRP293596'\n", "\\item 'SRP295605'\n", "\\item 'SRP296909'\n", "\\item 'SRP297180'\n", "\\item 'SRP297182'\n", "\\item 'SRP300500'\n", "\\item 'SRP301120'\n", "\\item 'SRP301902'\n", "\\item 'SRP303074'\n", "\\item 'SRP309600'\n", "\\item 'SRP309738'\n", "\\item 'SRP310279'\n", "\\item 'SRP316175'\n", "\\item 'SRP318094'\n", "\\item 'SRP321347'\n", "\\item 'SRP321937'\n", "\\item 'SRP323153'\n", "\\item 'SRP323393'\n", "\\item 'SRP325318'\n", "\\item 'SRP326785'\n", "\\item 'SRP327880'\n", "\\item 'SRP329186'\n", "\\item 'SRP329700'\n", "\\item 'SRP330157'\n", "\\item 'SRP331646'\n", "\\item 'SRP332785'\n", "\\item 'SRP333595'\n", "\\item 'SRP335517'\n", "\\item 'SRP335728'\n", "\\item 'SRP336271'\n", "\\item 'SRP336520'\n", "\\item 'SRP338028'\n", "\\item 'SRP338321'\n", "\\item 'SRP341126'\n", "\\item 'SRP345699'\n", "\\item 'SRP347143'\n", "\\item 'SRP347150'\n", "\\item 'SRP347151'\n", "\\item 'SRP347159'\n", "\\item 'SRP347162'\n", "\\item 'SRP347301'\n", "\\item 'SRP347305'\n", "\\item 'SRP347526'\n", "\\item 'SRP348408'\n", "\\item 'SRP349292'\n", "\\item 'SRP349675'\n", "\\item 'SRP353286'\n", "\\item 'SRP353848'\n", "\\item 'SRP360517'\n", "\\item 'SRP362460'\n", "\\item 'SRP366221'\n", "\\item 'SRP366895'\n", "\\item 'SRP367312'\n", "\\item 'SRP369521'\n", "\\item 'SRP373323'\n", "\\item 'SRP373402'\n", "\\item 'SRP383573'\n", "\\item 'SRP385632'\n", "\\item 'SRP390051'\n", "\\item 'SRP395037'\n", "\\item 'SRP396943'\n", "\\item 'SRP399486'\n", "\\item 'SRP403478'\n", "\\item 'SRP405969'\n", "\\item 'SRP409344'\n", "\\item 'SRP412451'\n", "\\item 'SRP413636'\n", "\\item 'SRP418585'\n", "\\item 'SRP420177'\n", "\\item 'SRP425972'\n", "\\item 'SRP427089'\n", "\\item 'SRP427803'\n", "\\item 'SRP429415'\n", "\\item 'SRP429660'\n", "\\item 'SRP434040'\n", "\\item 'SRP441076'\n", "\\item 'SRP450409'\n", "\\item 'SRP450883'\n", "\\item 'SRP456927'\n", "\\item 'SRP459553'\n", "\\item 'SRP460271'\n", "\\item 'SRP460577'\n", "\\item 'SRP462725'\n", "\\item 'SRP467219'\n", "\\item 'SRP467389'\n", "\\item 'SRP471943'\n", "\\item 'SRP475554'\n", "\\item 'SRP475910'\n", "\\item 'SRP492541'\n", "\\item 'SRP495213'\n", "\\item 'SRP502519'\n", "\\item 'SRP505210'\n", "\\item 'SRP514379'\n", "\\item 'SRP514725'\n", "\\item 'SRP519950'\n", "\\item 'SRP522157'\n", "\\item 'SRP534898'\n", "\\item 'SRP538739'\n", "\\item 'SRP545341'\n", "\\item 'SRP549999'\n", "\\item 'SRP562763'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'DRP004203'\n", "2. 'DRP004516'\n", "3. 'DRP007970'\n", "4. 'DRP010751'\n", "5. 'DRP012354'\n", "6. 'DRP012463'\n", "7. 'ERP002625'\n", "8. 'ERP004639'\n", "9. 'ERP006052'\n", "10. 'ERP118192'\n", "11. 'ERP122879'\n", "12. 'ERP130053'\n", "13. 'ERP133598'\n", "14. 'SRP002417'\n", "15. 'SRP004440'\n", "16. 'SRP011484'\n", "17. 'SRP013261'\n", "18. 'SRP014715'\n", "19. 'SRP015803'\n", "20. 'SRP015870'\n", "21. 'SRP019238'\n", "22. 'SRP019244'\n", "23. 'SRP020361'\n", "24. 'SRP022058'\n", "25. 'SRP022191'\n", "26. 'SRP022586'\n", "27. 'SRP022939'\n", "28. 'SRP024289'\n", "29. 'SRP027395'\n", "30. 'SRP030016'\n", "31. 'SRP032943'\n", "32. 'SRP033246'\n", "33. 'SRP039391'\n", "34. 'SRP041189'\n", "35. 'SRP041288'\n", "36. 'SRP043101'\n", "37. 'SRP045310'\n", "38. 'SRP046740'\n", "39. 'SRP047076'\n", "40. 'SRP048941'\n", "41. 'SRP050250'\n", "42. 'SRP053236'\n", "43. 'SRP053932'\n", "44. 'SRP056006'\n", "45. 'SRP062064'\n", "46. 'SRP062201'\n", "47. 'SRP067582'\n", "48. 'SRP068248'\n", "49. 'SRP068487'\n", "50. 'SRP069272'\n", "51. 'SRP070830'\n", "52. 'SRP075447'\n", "53. 'SRP075526'\n", "54. 'SRP076673'\n", "55. 'SRP080981'\n", "56. 'SRP081226'\n", "57. 'SRP082457'\n", "58. 'SRP082626'\n", "59. 'SRP083114'\n", "60. 'SRP089994'\n", "61. 'SRP090842'\n", "62. 'SRP092516'\n", "63. 'SRP095071'\n", "64. 'SRP095247'\n", "65. 'SRP095846'\n", "66. 'SRP098716'\n", "67. 'SRP099565'\n", "68. 'SRP102676'\n", "69. 'SRP106949'\n", "70. 'SRP107945'\n", "71. 'SRP113298'\n", "72. 'SRP114989'\n", "73. 'SRP117554'\n", "74. 'SRP117804'\n", "75. 'SRP130760'\n", "76. 'SRP132898'\n", "77. 'SRP133690'\n", "78. 'SRP133700'\n", "79. 'SRP136261'\n", "80. 'SRP136506'\n", "81. 'SRP139941'\n", "82. 'SRP140405'\n", "83. 'SRP151761'\n", "84. 'SRP152109'\n", "85. 'SRP153124'\n", "86. 'SRP159176'\n", "87. 'SRP161743'\n", "88. 'SRP162734'\n", "89. 'SRP166960'\n", "90. 'SRP167074'\n", "91. 'SRP173018'\n", "92. 'SRP173746'\n", "93. 'SRP173769'\n", "94. 'SRP185712'\n", "95. 'SRP186879'\n", "96. 'SRP188544'\n", "97. 'SRP188881'\n", "98. 'SRP188882'\n", "99. 'SRP190001'\n", "100. 'SRP191454'\n", "101. 'SRP193058'\n", "102. 'SRP194957'\n", "103. 'SRP194959'\n", "104. 'SRP194960'\n", "105. 'SRP197606'\n", "106. 'SRP201650'\n", "107. 'SRP219501'\n", "108. 'SRP220650'\n", "109. 'SRP224376'\n", "110. 'SRP226889'\n", "111. 'SRP228312'\n", "112. 'SRP228482'\n", "113. 'SRP234934'\n", "114. 'SRP235124'\n", "115. 'SRP239636'\n", "116. 'SRP242105'\n", "117. 'SRP242676'\n", "118. 'SRP250993'\n", "119. 'SRP253306'\n", "120. 'SRP254532'\n", "121. 'SRP258959'\n", "122. 'SRP259631'\n", "123. 'SRP261514'\n", "124. 'SRP266225'\n", "125. 'SRP267617'\n", "126. 'SRP268601'\n", "127. 'SRP271763'\n", "128. 'SRP273773'\n", "129. 'SRP276926'\n", "130. 'SRP278975'\n", "131. 'SRP280012'\n", "132. 'SRP282851'\n", "133. 'SRP286523'\n", "134. 'SRP287115'\n", "135. 'SRP287323'\n", "136. 'SRP287470'\n", "137. 'SRP289190'\n", "138. 'SRP293596'\n", "139. 'SRP295605'\n", "140. 'SRP296909'\n", "141. 'SRP297180'\n", "142. 'SRP297182'\n", "143. 'SRP300500'\n", "144. 'SRP301120'\n", "145. 'SRP301902'\n", "146. 'SRP303074'\n", "147. 'SRP309600'\n", "148. 'SRP309738'\n", "149. 'SRP310279'\n", "150. 'SRP316175'\n", "151. 'SRP318094'\n", "152. 'SRP321347'\n", "153. 'SRP321937'\n", "154. 'SRP323153'\n", "155. 'SRP323393'\n", "156. 'SRP325318'\n", "157. 'SRP326785'\n", "158. 'SRP327880'\n", "159. 'SRP329186'\n", "160. 'SRP329700'\n", "161. 'SRP330157'\n", "162. 'SRP331646'\n", "163. 'SRP332785'\n", "164. 'SRP333595'\n", "165. 'SRP335517'\n", "166. 'SRP335728'\n", "167. 'SRP336271'\n", "168. 'SRP336520'\n", "169. 'SRP338028'\n", "170. 'SRP338321'\n", "171. 'SRP341126'\n", "172. 'SRP345699'\n", "173. 'SRP347143'\n", "174. 'SRP347150'\n", "175. 'SRP347151'\n", "176. 'SRP347159'\n", "177. 'SRP347162'\n", "178. 'SRP347301'\n", "179. 'SRP347305'\n", "180. 'SRP347526'\n", "181. 'SRP348408'\n", "182. 'SRP349292'\n", "183. 'SRP349675'\n", "184. 'SRP353286'\n", "185. 'SRP353848'\n", "186. 'SRP360517'\n", "187. 'SRP362460'\n", "188. 'SRP366221'\n", "189. 'SRP366895'\n", "190. 'SRP367312'\n", "191. 'SRP369521'\n", "192. 'SRP373323'\n", "193. 'SRP373402'\n", "194. 'SRP383573'\n", "195. 'SRP385632'\n", "196. 'SRP390051'\n", "197. 'SRP395037'\n", "198. 'SRP396943'\n", "199. 'SRP399486'\n", "200. 'SRP403478'\n", "201. 'SRP405969'\n", "202. 'SRP409344'\n", "203. 'SRP412451'\n", "204. 'SRP413636'\n", "205. 'SRP418585'\n", "206. 'SRP420177'\n", "207. 'SRP425972'\n", "208. 'SRP427089'\n", "209. 'SRP427803'\n", "210. 'SRP429415'\n", "211. 'SRP429660'\n", "212. 'SRP434040'\n", "213. 'SRP441076'\n", "214. 'SRP450409'\n", "215. 'SRP450883'\n", "216. 'SRP456927'\n", "217. 'SRP459553'\n", "218. 'SRP460271'\n", "219. 'SRP460577'\n", "220. 'SRP462725'\n", "221. 'SRP467219'\n", "222. 'SRP467389'\n", "223. 'SRP471943'\n", "224. 'SRP475554'\n", "225. 'SRP475910'\n", "226. 'SRP492541'\n", "227. 'SRP495213'\n", "228. 'SRP502519'\n", "229. 'SRP505210'\n", "230. 'SRP514379'\n", "231. 'SRP514725'\n", "232. 'SRP519950'\n", "233. 'SRP522157'\n", "234. 'SRP534898'\n", "235. 'SRP538739'\n", "236. 'SRP545341'\n", "237. 'SRP549999'\n", "238. 'SRP562763'\n", "\n", "\n" ], "text/plain": [ " [1] \"DRP004203\" \"DRP004516\" \"DRP007970\" \"DRP010751\" \"DRP012354\" \"DRP012463\"\n", " [7] \"ERP002625\" \"ERP004639\" \"ERP006052\" \"ERP118192\" \"ERP122879\" \"ERP130053\"\n", " [13] \"ERP133598\" \"SRP002417\" \"SRP004440\" \"SRP011484\" \"SRP013261\" \"SRP014715\"\n", " [19] \"SRP015803\" \"SRP015870\" \"SRP019238\" \"SRP019244\" \"SRP020361\" \"SRP022058\"\n", " [25] \"SRP022191\" \"SRP022586\" \"SRP022939\" \"SRP024289\" \"SRP027395\" \"SRP030016\"\n", " [31] \"SRP032943\" \"SRP033246\" \"SRP039391\" \"SRP041189\" \"SRP041288\" \"SRP043101\"\n", " [37] \"SRP045310\" \"SRP046740\" \"SRP047076\" \"SRP048941\" \"SRP050250\" \"SRP053236\"\n", " [43] \"SRP053932\" \"SRP056006\" \"SRP062064\" \"SRP062201\" \"SRP067582\" \"SRP068248\"\n", " [49] \"SRP068487\" \"SRP069272\" \"SRP070830\" \"SRP075447\" \"SRP075526\" \"SRP076673\"\n", " [55] \"SRP080981\" \"SRP081226\" \"SRP082457\" \"SRP082626\" \"SRP083114\" \"SRP089994\"\n", " [61] \"SRP090842\" \"SRP092516\" \"SRP095071\" \"SRP095247\" \"SRP095846\" \"SRP098716\"\n", " [67] \"SRP099565\" \"SRP102676\" \"SRP106949\" \"SRP107945\" \"SRP113298\" \"SRP114989\"\n", " [73] \"SRP117554\" \"SRP117804\" \"SRP130760\" \"SRP132898\" \"SRP133690\" \"SRP133700\"\n", " [79] \"SRP136261\" \"SRP136506\" \"SRP139941\" \"SRP140405\" \"SRP151761\" \"SRP152109\"\n", " [85] \"SRP153124\" \"SRP159176\" \"SRP161743\" \"SRP162734\" \"SRP166960\" \"SRP167074\"\n", " [91] \"SRP173018\" \"SRP173746\" \"SRP173769\" \"SRP185712\" \"SRP186879\" \"SRP188544\"\n", " [97] \"SRP188881\" \"SRP188882\" \"SRP190001\" \"SRP191454\" \"SRP193058\" \"SRP194957\"\n", "[103] \"SRP194959\" \"SRP194960\" \"SRP197606\" \"SRP201650\" \"SRP219501\" \"SRP220650\"\n", "[109] \"SRP224376\" \"SRP226889\" \"SRP228312\" \"SRP228482\" \"SRP234934\" \"SRP235124\"\n", "[115] \"SRP239636\" \"SRP242105\" \"SRP242676\" \"SRP250993\" \"SRP253306\" \"SRP254532\"\n", "[121] \"SRP258959\" \"SRP259631\" \"SRP261514\" \"SRP266225\" \"SRP267617\" \"SRP268601\"\n", "[127] \"SRP271763\" \"SRP273773\" \"SRP276926\" \"SRP278975\" \"SRP280012\" \"SRP282851\"\n", "[133] \"SRP286523\" \"SRP287115\" \"SRP287323\" \"SRP287470\" \"SRP289190\" \"SRP293596\"\n", "[139] \"SRP295605\" \"SRP296909\" \"SRP297180\" \"SRP297182\" \"SRP300500\" \"SRP301120\"\n", "[145] \"SRP301902\" \"SRP303074\" \"SRP309600\" \"SRP309738\" \"SRP310279\" \"SRP316175\"\n", "[151] \"SRP318094\" \"SRP321347\" \"SRP321937\" \"SRP323153\" \"SRP323393\" \"SRP325318\"\n", "[157] \"SRP326785\" \"SRP327880\" \"SRP329186\" \"SRP329700\" \"SRP330157\" \"SRP331646\"\n", "[163] \"SRP332785\" \"SRP333595\" \"SRP335517\" \"SRP335728\" \"SRP336271\" \"SRP336520\"\n", "[169] \"SRP338028\" \"SRP338321\" \"SRP341126\" \"SRP345699\" \"SRP347143\" \"SRP347150\"\n", "[175] \"SRP347151\" \"SRP347159\" \"SRP347162\" \"SRP347301\" \"SRP347305\" \"SRP347526\"\n", "[181] \"SRP348408\" \"SRP349292\" \"SRP349675\" \"SRP353286\" \"SRP353848\" \"SRP360517\"\n", "[187] \"SRP362460\" \"SRP366221\" \"SRP366895\" \"SRP367312\" \"SRP369521\" \"SRP373323\"\n", "[193] \"SRP373402\" \"SRP383573\" \"SRP385632\" \"SRP390051\" \"SRP395037\" \"SRP396943\"\n", "[199] \"SRP399486\" \"SRP403478\" \"SRP405969\" \"SRP409344\" \"SRP412451\" \"SRP413636\"\n", "[205] \"SRP418585\" \"SRP420177\" \"SRP425972\" \"SRP427089\" \"SRP427803\" \"SRP429415\"\n", "[211] \"SRP429660\" \"SRP434040\" \"SRP441076\" \"SRP450409\" \"SRP450883\" \"SRP456927\"\n", "[217] \"SRP459553\" \"SRP460271\" \"SRP460577\" \"SRP462725\" \"SRP467219\" \"SRP467389\"\n", "[223] \"SRP471943\" \"SRP475554\" \"SRP475910\" \"SRP492541\" \"SRP495213\" \"SRP502519\"\n", "[229] \"SRP505210\" \"SRP514379\" \"SRP514725\" \"SRP519950\" \"SRP522157\" \"SRP534898\"\n", "[235] \"SRP538739\" \"SRP545341\" \"SRP549999\" \"SRP562763\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "unique(sra_rna$LibrarySource)\n", "length(unique(sra_rna$SRAStudy))\n", "sort(unique(sra_rna$SRAStudy))" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 'SRR32285056'
  2. 'SRR32285057'
  3. 'SRR32285059'
  4. 'SRR32285058'
  5. 'SRR32285060'
  6. 'SRR32285061'
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'SRR32285056'\n", "\\item 'SRR32285057'\n", "\\item 'SRR32285059'\n", "\\item 'SRR32285058'\n", "\\item 'SRR32285060'\n", "\\item 'SRR32285061'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'SRR32285056'\n", "2. 'SRR32285057'\n", "3. 'SRR32285059'\n", "4. 'SRR32285058'\n", "5. 'SRR32285060'\n", "6. 'SRR32285061'\n", "\n", "\n" ], "text/plain": [ "[1] \"SRR32285056\" \"SRR32285057\" \"SRR32285059\" \"SRR32285058\" \"SRR32285060\"\n", "[6] \"SRR32285061\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# get all runs\n", "expt = sra_rna$Run\n", "head(expt)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 47
RunReleaseDateLoadDatespotsbasesspots_with_matesavgLengthsize_MBAssemblyNamedownload_pathAffection_StatusAnalyte_TypeHistological_TypeBody_SiteCenterNameSubmissiondbgap_study_accessionConsentRunHashReadHash
<chr><chr><chr><int><dbl><int><int><int><chr><chr><lgl><lgl><lgl><chr><chr><chr><lgl><chr><chr><chr>
6SRR322850562025-02-10 04:23:572025-02-10 04:21:28218910436567312900218910433002024https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285056/SRR32285056.lite.1NANANAFUJIAN AGRICULTURE AND FORESTRY UNIVERSITYSRA2075092NApublicFB49D54DB2E676B8ED0B1A8B855BA5C82EC8803B5546954E4015B8AD36447C43
7SRR322850572025-02-10 04:23:572025-02-10 04:21:27193278885798366400193278883001774https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285057/SRR32285057.lite.1NANANAFUJIAN AGRICULTURE AND FORESTRY UNIVERSITYSRA2075092NApublic9709911C0CF67F272929C590ABDBDF733E123C4D86F09B11873E04118229DD25
8SRR322850592025-02-10 04:23:572025-02-10 04:22:18180717505421525000180717503001650https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285059/SRR32285059.lite.1NANANAFUJIAN AGRICULTURE AND FORESTRY UNIVERSITYSRA2075092NApublicEB1443902F83262E7F8331F92E6B53D3738AA9417DB969CF7B26C83E28912C46
9SRR322850582025-02-10 04:24:362025-02-10 04:22:59218979776569393100218979773002025https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285058/SRR32285058.lite.1NANANAFUJIAN AGRICULTURE AND FORESTRY UNIVERSITYSRA2075092NApublic34BDBD8D18C7B746A8BA1B4E6068FCE2270D65EF6C63D5C9568794046AEA8ECE
11SRR322850602025-02-10 04:24:362025-02-10 04:22:49222183616665508300222183613002054https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285060/SRR32285060.lite.1NANANAFUJIAN AGRICULTURE AND FORESTRY UNIVERSITYSRA2075092NApublic30045B86AEF20201859D9BB16399E1784BEBAF2149C4C74133F84176964F85BF
12SRR322850612025-02-10 04:31:002025-02-10 04:24:26296212458886373500296212453002734https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285061/SRR32285061.lite.1NANANAFUJIAN AGRICULTURE AND FORESTRY UNIVERSITYSRA2075092NApublicFBD32D386A9A7AC3589C56CF7D66C6337E84A19BBF6492AE9D362F940F2A6C82
\n" ], "text/latex": [ "A data.frame: 6 × 47\n", "\\begin{tabular}{r|lllllllllllllllllllll}\n", " & Run & ReleaseDate & LoadDate & spots & bases & spots\\_with\\_mates & avgLength & size\\_MB & AssemblyName & download\\_path & ⋯ & Affection\\_Status & Analyte\\_Type & Histological\\_Type & Body\\_Site & CenterName & Submission & dbgap\\_study\\_accession & Consent & RunHash & ReadHash\\\\\n", " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", "\\hline\n", "\t6 & SRR32285056 & 2025-02-10 04:23:57 & 2025-02-10 04:21:28 & 21891043 & 6567312900 & 21891043 & 300 & 2024 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285056/SRR32285056.lite.1 & ⋯ & NA & NA & NA & & FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY & SRA2075092 & NA & public & FB49D54DB2E676B8ED0B1A8B855BA5C8 & 2EC8803B5546954E4015B8AD36447C43\\\\\n", "\t7 & SRR32285057 & 2025-02-10 04:23:57 & 2025-02-10 04:21:27 & 19327888 & 5798366400 & 19327888 & 300 & 1774 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285057/SRR32285057.lite.1 & ⋯ & NA & NA & NA & & FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY & SRA2075092 & NA & public & 9709911C0CF67F272929C590ABDBDF73 & 3E123C4D86F09B11873E04118229DD25\\\\\n", "\t8 & SRR32285059 & 2025-02-10 04:23:57 & 2025-02-10 04:22:18 & 18071750 & 5421525000 & 18071750 & 300 & 1650 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285059/SRR32285059.lite.1 & ⋯ & NA & NA & NA & & FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY & SRA2075092 & NA & public & EB1443902F83262E7F8331F92E6B53D3 & 738AA9417DB969CF7B26C83E28912C46\\\\\n", "\t9 & SRR32285058 & 2025-02-10 04:24:36 & 2025-02-10 04:22:59 & 21897977 & 6569393100 & 21897977 & 300 & 2025 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285058/SRR32285058.lite.1 & ⋯ & NA & NA & NA & & FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY & SRA2075092 & NA & public & 34BDBD8D18C7B746A8BA1B4E6068FCE2 & 270D65EF6C63D5C9568794046AEA8ECE\\\\\n", "\t11 & SRR32285060 & 2025-02-10 04:24:36 & 2025-02-10 04:22:49 & 22218361 & 6665508300 & 22218361 & 300 & 2054 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285060/SRR32285060.lite.1 & ⋯ & NA & NA & NA & & FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY & SRA2075092 & NA & public & 30045B86AEF20201859D9BB16399E178 & 4BEBAF2149C4C74133F84176964F85BF\\\\\n", "\t12 & SRR32285061 & 2025-02-10 04:31:00 & 2025-02-10 04:24:26 & 29621245 & 8886373500 & 29621245 & 300 & 2734 & & https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285061/SRR32285061.lite.1 & ⋯ & NA & NA & NA & & FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY & SRA2075092 & NA & public & FBD32D386A9A7AC3589C56CF7D66C633 & 7E84A19BBF6492AE9D362F940F2A6C82\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 47\n", "\n", "| | Run <chr> | ReleaseDate <chr> | LoadDate <chr> | spots <int> | bases <dbl> | spots_with_mates <int> | avgLength <int> | size_MB <int> | AssemblyName <chr> | download_path <chr> | ⋯ ⋯ | Affection_Status <lgl> | Analyte_Type <lgl> | Histological_Type <lgl> | Body_Site <chr> | CenterName <chr> | Submission <chr> | dbgap_study_accession <lgl> | Consent <chr> | RunHash <chr> | ReadHash <chr> |\n", "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", "| 6 | SRR32285056 | 2025-02-10 04:23:57 | 2025-02-10 04:21:28 | 21891043 | 6567312900 | 21891043 | 300 | 2024 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285056/SRR32285056.lite.1 | ⋯ | NA | NA | NA | | FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY | SRA2075092 | NA | public | FB49D54DB2E676B8ED0B1A8B855BA5C8 | 2EC8803B5546954E4015B8AD36447C43 |\n", "| 7 | SRR32285057 | 2025-02-10 04:23:57 | 2025-02-10 04:21:27 | 19327888 | 5798366400 | 19327888 | 300 | 1774 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285057/SRR32285057.lite.1 | ⋯ | NA | NA | NA | | FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY | SRA2075092 | NA | public | 9709911C0CF67F272929C590ABDBDF73 | 3E123C4D86F09B11873E04118229DD25 |\n", "| 8 | SRR32285059 | 2025-02-10 04:23:57 | 2025-02-10 04:22:18 | 18071750 | 5421525000 | 18071750 | 300 | 1650 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285059/SRR32285059.lite.1 | ⋯ | NA | NA | NA | | FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY | SRA2075092 | NA | public | EB1443902F83262E7F8331F92E6B53D3 | 738AA9417DB969CF7B26C83E28912C46 |\n", "| 9 | SRR32285058 | 2025-02-10 04:24:36 | 2025-02-10 04:22:59 | 21897977 | 6569393100 | 21897977 | 300 | 2025 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285058/SRR32285058.lite.1 | ⋯ | NA | NA | NA | | FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY | SRA2075092 | NA | public | 34BDBD8D18C7B746A8BA1B4E6068FCE2 | 270D65EF6C63D5C9568794046AEA8ECE |\n", "| 11 | SRR32285060 | 2025-02-10 04:24:36 | 2025-02-10 04:22:49 | 22218361 | 6665508300 | 22218361 | 300 | 2054 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285060/SRR32285060.lite.1 | ⋯ | NA | NA | NA | | FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY | SRA2075092 | NA | public | 30045B86AEF20201859D9BB16399E178 | 4BEBAF2149C4C74133F84176964F85BF |\n", "| 12 | SRR32285061 | 2025-02-10 04:31:00 | 2025-02-10 04:24:26 | 29621245 | 8886373500 | 29621245 | 300 | 2734 | | https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285061/SRR32285061.lite.1 | ⋯ | NA | NA | NA | | FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY | SRA2075092 | NA | public | FBD32D386A9A7AC3589C56CF7D66C633 | 7E84A19BBF6492AE9D362F940F2A6C82 |\n", "\n" ], "text/plain": [ " Run ReleaseDate LoadDate spots bases \n", "6 SRR32285056 2025-02-10 04:23:57 2025-02-10 04:21:28 21891043 6567312900\n", "7 SRR32285057 2025-02-10 04:23:57 2025-02-10 04:21:27 19327888 5798366400\n", "8 SRR32285059 2025-02-10 04:23:57 2025-02-10 04:22:18 18071750 5421525000\n", "9 SRR32285058 2025-02-10 04:24:36 2025-02-10 04:22:59 21897977 6569393100\n", "11 SRR32285060 2025-02-10 04:24:36 2025-02-10 04:22:49 22218361 6665508300\n", "12 SRR32285061 2025-02-10 04:31:00 2025-02-10 04:24:26 29621245 8886373500\n", " spots_with_mates avgLength size_MB AssemblyName\n", "6 21891043 300 2024 \n", "7 19327888 300 1774 \n", "8 18071750 300 1650 \n", "9 21897977 300 2025 \n", "11 22218361 300 2054 \n", "12 29621245 300 2734 \n", " download_path \n", "6 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285056/SRR32285056.lite.1\n", "7 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285057/SRR32285057.lite.1\n", "8 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285059/SRR32285059.lite.1\n", "9 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285058/SRR32285058.lite.1\n", "11 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285060/SRR32285060.lite.1\n", "12 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos7/sra-pub-zq-41/SRR032/32285/SRR32285061/SRR32285061.lite.1\n", " ⋯ Affection_Status Analyte_Type Histological_Type Body_Site\n", "6 ⋯ NA NA NA \n", "7 ⋯ NA NA NA \n", "8 ⋯ NA NA NA \n", "9 ⋯ NA NA NA \n", "11 ⋯ NA NA NA \n", "12 ⋯ NA NA NA \n", " CenterName Submission dbgap_study_accession\n", "6 FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY SRA2075092 NA \n", "7 FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY SRA2075092 NA \n", "8 FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY SRA2075092 NA \n", "9 FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY SRA2075092 NA \n", "11 FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY SRA2075092 NA \n", "12 FUJIAN AGRICULTURE AND FORESTRY UNIVERSITY SRA2075092 NA \n", " Consent RunHash ReadHash \n", "6 public FB49D54DB2E676B8ED0B1A8B855BA5C8 2EC8803B5546954E4015B8AD36447C43\n", "7 public 9709911C0CF67F272929C590ABDBDF73 3E123C4D86F09B11873E04118229DD25\n", "8 public EB1443902F83262E7F8331F92E6B53D3 738AA9417DB969CF7B26C83E28912C46\n", "9 public 34BDBD8D18C7B746A8BA1B4E6068FCE2 270D65EF6C63D5C9568794046AEA8ECE\n", "11 public 30045B86AEF20201859D9BB16399E178 4BEBAF2149C4C74133F84176964F85BF\n", "12 public FBD32D386A9A7AC3589C56CF7D66C633 7E84A19BBF6492AE9D362F940F2A6C82" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "newsra_runinfo = sra_rna[match(expt, sra_rna$Run),]\n", "head(newsra_runinfo)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "acclist = unique(sra_rna$SRAStudy)\n", "lens = matrix(0, nrow = length(acclist))\n", "newdf = data.frame(accession = acclist, count = NA)\n", "\n", "for (ii in 1:length(acclist)){ \n", " runlist = newsra_runinfo$Run[which(as.character(newsra_runinfo$SRAStudy)==as.character(acclist[ii]))]\n", " lens[ii] = length(runlist)\n", " newdf$count[ii] = length(runlist)\n", "}\n", "ids = which(newdf$count>9)\n", "list2 = cbind(ids, acclist[ids], lens[ids])" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "126" ], "text/latex": [ "126" ], "text/markdown": [ "126" ], "text/plain": [ "[1] 126" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sum(newdf$count>=10)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 126
  2. 3
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 126\n", "\\item 3\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 126\n", "2. 3\n", "\n", "\n" ], "text/plain": [ "[1] 126 3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A matrix: 6 × 3 of type chr
ids
[121,]228SRP01148412
[122,]229SRP51437915
[123,]234SRP46027147
[124,]235SRP45088324
[125,]237SRP41363612
[126,]238SRP34967580
\n" ], "text/latex": [ "A matrix: 6 × 3 of type chr\n", "\\begin{tabular}{r|lll}\n", " & ids & & \\\\\n", "\\hline\n", "\t{[}121,{]} & 228 & SRP011484 & 12\\\\\n", "\t{[}122,{]} & 229 & SRP514379 & 15\\\\\n", "\t{[}123,{]} & 234 & SRP460271 & 47\\\\\n", "\t{[}124,{]} & 235 & SRP450883 & 24\\\\\n", "\t{[}125,{]} & 237 & SRP413636 & 12\\\\\n", "\t{[}126,{]} & 238 & SRP349675 & 80\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A matrix: 6 × 3 of type chr\n", "\n", "| | ids | | |\n", "|---|---|---|---|\n", "| [121,] | 228 | SRP011484 | 12 |\n", "| [122,] | 229 | SRP514379 | 15 |\n", "| [123,] | 234 | SRP460271 | 47 |\n", "| [124,] | 235 | SRP450883 | 24 |\n", "| [125,] | 237 | SRP413636 | 12 |\n", "| [126,] | 238 | SRP349675 | 80 |\n", "\n" ], "text/plain": [ " ids \n", "[121,] 228 SRP011484 12\n", "[122,] 229 SRP514379 15\n", "[123,] 234 SRP460271 47\n", "[124,] 235 SRP450883 24\n", "[125,] 237 SRP413636 12\n", "[126,] 238 SRP349675 80" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dim(list2)\n", "tail(list2)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A matrix: 6 × 3 of type chr
ids
17 SRP462725439
52 SRP353286308
106SRP280012189
184SRP082457180
119SRP250993177
158SRP139941176
\n" ], "text/latex": [ "A matrix: 6 × 3 of type chr\n", "\\begin{tabular}{lll}\n", " ids & & \\\\\n", "\\hline\n", "\t 17 & SRP462725 & 439\\\\\n", "\t 52 & SRP353286 & 308\\\\\n", "\t 106 & SRP280012 & 189\\\\\n", "\t 184 & SRP082457 & 180\\\\\n", "\t 119 & SRP250993 & 177\\\\\n", "\t 158 & SRP139941 & 176\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A matrix: 6 × 3 of type chr\n", "\n", "| ids | | |\n", "|---|---|---|\n", "| 17 | SRP462725 | 439 |\n", "| 52 | SRP353286 | 308 |\n", "| 106 | SRP280012 | 189 |\n", "| 184 | SRP082457 | 180 |\n", "| 119 | SRP250993 | 177 |\n", "| 158 | SRP139941 | 176 |\n", "\n" ], "text/plain": [ " ids \n", "[1,] 17 SRP462725 439\n", "[2,] 52 SRP353286 308\n", "[3,] 106 SRP280012 189\n", "[4,] 184 SRP082457 180\n", "[5,] 119 SRP250993 177\n", "[6,] 158 SRP139941 176" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# re-order from max to min no. of runs per study\n", "num_datasets = as.numeric(unlist(list2[,3]))\n", "list2 <- list2[rev(order(num_datasets)),]\n", "head(list2)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "# get top 200 and remove potential single-cell data\n", "mini = min(200, dim(list2)[1])\n", "list3 = list2[1:mini,]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A matrix: 20 × 3 of type chr
ids
17 SRP462725439
52 SRP353286308
106SRP280012189
184SRP082457180
119SRP250993177
158SRP139941176
215SRP015870164
45 SRP369521138
32 SRP418585115
90 SRP309600113
70 SRP33652099
136SRP19495796
20 SRP46057794
132SRP19305887
238SRP34967580
3 DRP01235480
82 SRP32531879
139SRP18571275
131SRP21950172
92 SRP30973868
\n" ], "text/latex": [ "A matrix: 20 × 3 of type chr\n", "\\begin{tabular}{lll}\n", " ids & & \\\\\n", "\\hline\n", "\t 17 & SRP462725 & 439\\\\\n", "\t 52 & SRP353286 & 308\\\\\n", "\t 106 & SRP280012 & 189\\\\\n", "\t 184 & SRP082457 & 180\\\\\n", "\t 119 & SRP250993 & 177\\\\\n", "\t 158 & SRP139941 & 176\\\\\n", "\t 215 & SRP015870 & 164\\\\\n", "\t 45 & SRP369521 & 138\\\\\n", "\t 32 & SRP418585 & 115\\\\\n", "\t 90 & SRP309600 & 113\\\\\n", "\t 70 & SRP336520 & 99 \\\\\n", "\t 136 & SRP194957 & 96 \\\\\n", "\t 20 & SRP460577 & 94 \\\\\n", "\t 132 & SRP193058 & 87 \\\\\n", "\t 238 & SRP349675 & 80 \\\\\n", "\t 3 & DRP012354 & 80 \\\\\n", "\t 82 & SRP325318 & 79 \\\\\n", "\t 139 & SRP185712 & 75 \\\\\n", "\t 131 & SRP219501 & 72 \\\\\n", "\t 92 & SRP309738 & 68 \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A matrix: 20 × 3 of type chr\n", "\n", "| ids | | |\n", "|---|---|---|\n", "| 17 | SRP462725 | 439 |\n", "| 52 | SRP353286 | 308 |\n", "| 106 | SRP280012 | 189 |\n", "| 184 | SRP082457 | 180 |\n", "| 119 | SRP250993 | 177 |\n", "| 158 | SRP139941 | 176 |\n", "| 215 | SRP015870 | 164 |\n", "| 45 | SRP369521 | 138 |\n", "| 32 | SRP418585 | 115 |\n", "| 90 | SRP309600 | 113 |\n", "| 70 | SRP336520 | 99 |\n", "| 136 | SRP194957 | 96 |\n", "| 20 | SRP460577 | 94 |\n", "| 132 | SRP193058 | 87 |\n", "| 238 | SRP349675 | 80 |\n", "| 3 | DRP012354 | 80 |\n", "| 82 | SRP325318 | 79 |\n", "| 139 | SRP185712 | 75 |\n", "| 131 | SRP219501 | 72 |\n", "| 92 | SRP309738 | 68 |\n", "\n" ], "text/plain": [ " ids \n", " [1,] 17 SRP462725 439\n", " [2,] 52 SRP353286 308\n", " [3,] 106 SRP280012 189\n", " [4,] 184 SRP082457 180\n", " [5,] 119 SRP250993 177\n", " [6,] 158 SRP139941 176\n", " [7,] 215 SRP015870 164\n", " [8,] 45 SRP369521 138\n", " [9,] 32 SRP418585 115\n", "[10,] 90 SRP309600 113\n", "[11,] 70 SRP336520 99 \n", "[12,] 136 SRP194957 96 \n", "[13,] 20 SRP460577 94 \n", "[14,] 132 SRP193058 87 \n", "[15,] 238 SRP349675 80 \n", "[16,] 3 DRP012354 80 \n", "[17,] 82 SRP325318 79 \n", "[18,] 139 SRP185712 75 \n", "[19,] 131 SRP219501 72 \n", "[20,] 92 SRP309738 68 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# check how many runs per acc\n", "list3[1:20,]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 'SRP109982'
  2. 'SRP489692'
  3. 'SRP115430'
  4. 'SRP189094'
  5. 'SRP172989'
  6. 'SRP188049'
  7. 'SRP031491'
  8. 'SRP185636'
  9. 'SRP188143'
  10. 'SRP216764'
  11. 'SRP282059'
  12. 'SRP303454'
  13. 'SRP468677'
  14. 'ERP119888'
  15. 'SRP384003'
  16. 'ERP140164'
  17. 'SRP269007'
  18. 'SRP255582'
  19. 'SRP272774'
  20. 'SRP089936'
  21. 'SRP491557'
  22. 'SRP301981'
  23. 'ERP169256'
  24. 'SRP173523'
  25. 'SRP247465'
  26. 'SRP248152'
  27. 'SRP271431'
  28. 'SRP277313'
  29. 'SRP061929'
  30. 'SRP435613'
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'SRP109982'\n", "\\item 'SRP489692'\n", "\\item 'SRP115430'\n", "\\item 'SRP189094'\n", "\\item 'SRP172989'\n", "\\item 'SRP188049'\n", "\\item 'SRP031491'\n", "\\item 'SRP185636'\n", "\\item 'SRP188143'\n", "\\item 'SRP216764'\n", "\\item 'SRP282059'\n", "\\item 'SRP303454'\n", "\\item 'SRP468677'\n", "\\item 'ERP119888'\n", "\\item 'SRP384003'\n", "\\item 'ERP140164'\n", "\\item 'SRP269007'\n", "\\item 'SRP255582'\n", "\\item 'SRP272774'\n", "\\item 'SRP089936'\n", "\\item 'SRP491557'\n", "\\item 'SRP301981'\n", "\\item 'ERP169256'\n", "\\item 'SRP173523'\n", "\\item 'SRP247465'\n", "\\item 'SRP248152'\n", "\\item 'SRP271431'\n", "\\item 'SRP277313'\n", "\\item 'SRP061929'\n", "\\item 'SRP435613'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'SRP109982'\n", "2. 'SRP489692'\n", "3. 'SRP115430'\n", "4. 'SRP189094'\n", "5. 'SRP172989'\n", "6. 'SRP188049'\n", "7. 'SRP031491'\n", "8. 'SRP185636'\n", "9. 'SRP188143'\n", "10. 'SRP216764'\n", "11. 'SRP282059'\n", "12. 'SRP303454'\n", "13. 'SRP468677'\n", "14. 'ERP119888'\n", "15. 'SRP384003'\n", "16. 'ERP140164'\n", "17. 'SRP269007'\n", "18. 'SRP255582'\n", "19. 'SRP272774'\n", "20. 'SRP089936'\n", "21. 'SRP491557'\n", "22. 'SRP301981'\n", "23. 'ERP169256'\n", "24. 'SRP173523'\n", "25. 'SRP247465'\n", "26. 'SRP248152'\n", "27. 'SRP271431'\n", "28. 'SRP277313'\n", "29. 'SRP061929'\n", "30. 'SRP435613'\n", "\n", "\n" ], "text/plain": [ " [1] \"SRP109982\" \"SRP489692\" \"SRP115430\" \"SRP189094\" \"SRP172989\" \"SRP188049\"\n", " [7] \"SRP031491\" \"SRP185636\" \"SRP188143\" \"SRP216764\" \"SRP282059\" \"SRP303454\"\n", "[13] \"SRP468677\" \"ERP119888\" \"SRP384003\" \"ERP140164\" \"SRP269007\" \"SRP255582\"\n", "[19] \"SRP272774\" \"SRP089936\" \"SRP491557\" \"SRP301981\" \"ERP169256\" \"SRP173523\"\n", "[25] \"SRP247465\" \"SRP248152\" \"SRP271431\" \"SRP277313\" \"SRP061929\" \"SRP435613\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "list2[1:30,2]" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1] \"2 SRP412906 tech: Illumina NovaSeq 6000\"\n", "[1] \"4 SRP480849 tech: Illumina NovaSeq 6000\"\n", "[1] \"5 SRP345958 tech: Illumina NovaSeq 6000\"\n", "[1] \"9 SRP493011 tech: Illumina NovaSeq 6000\"\n", "[1] \"14 SRP455672 tech: Illumina NovaSeq 6000\"\n", "[1] \"21 SRP534543 tech: Illumina NovaSeq 6000\"\n", "[1] \"25 SRP332511 tech: Illumina NovaSeq 6000\"\n", "[1] \"26 SRP468046 tech: Illumina NovaSeq 6000\"\n", "[1] \"27 SRP455600 tech: Illumina NovaSeq 6000\"\n", "[1] \"28 SRP485307 tech: Illumina NovaSeq 6000\"\n", "[1] \"29 SRP341921 tech: Illumina NovaSeq 6000\"\n", "[1] \"35 SRP405443 tech: Illumina NovaSeq 6000\"\n", "[1] \"39 SRP530118 tech: Illumina NovaSeq X Plus\"\n", "[1] \"49 SRP387734 tech: Illumina NovaSeq 6000\"\n", "[1] \"51 SRP408295 tech: Illumina NovaSeq 6000\"\n", "[1] \"52 SRP438222 tech: Illumina NovaSeq 6000\"\n", "[1] \"66 SRP294406 tech: Illumina NovaSeq 6000\"\n", "[1] \"77 SRP320722 tech: Illumina NovaSeq 6000\"\n", "[1] \"78 SRP322960 tech: Illumina NovaSeq 6000\"\n", "[1] \"80 SRP400385 tech: Illumina NovaSeq 6000\"\n", "[1] \"83 SRP526473 tech: Illumina NovaSeq X Plus\"\n", "[1] \"84 SRP541945 tech: Illumina NovaSeq 6000\"\n", "[1] \"88 SRP454107 tech: Illumina NovaSeq 6000\"\n", "[1] \"93 SRP521698 tech: Illumina NovaSeq 6000\"\n", "[1] \"103 SRP461459 tech: Illumina NovaSeq 6000\"\n", "[1] \"106 SRP443219 tech: Illumina NovaSeq 6000\"\n", "[1] \"112 SRP459314 tech: Illumina NovaSeq 6000\"\n", "[1] \"116 SRP526034 tech: Illumina NovaSeq 6000\"\n", "[1] \"129 SRP318006 tech: Illumina NovaSeq 6000\"\n", "[1] \"134 SRP390844 tech: Illumina NovaSeq 6000\"\n", "[1] \"135 SRP395813 tech: Illumina NovaSeq 6000\"\n", "[1] \"136 SRP398455 tech: Illumina NovaSeq 6000\"\n", "[1] \"138 SRP423420 tech: Illumina NovaSeq 6000\"\n", "[1] \"140 SRP461268 tech: Illumina NovaSeq 6000\"\n", "[1] \"142 SRP483274 tech: Illumina NovaSeq 6000\"\n", "[1] \"144 SRP488552 tech: Illumina NovaSeq 6000\"\n", "[1] \"146 SRP561343 tech: Illumina NovaSeq 6000\"\n", "[1] \"150 SRP331500 tech: Illumina NovaSeq 6000\"\n", "[1] \"165 SRP461235 tech: Illumina NovaSeq 6000\"\n", "[1] \"166 SRP439407 tech: Illumina NovaSeq 6000\"\n", "[1] \"167 SRP517146 tech: Illumina NovaSeq 6000\"\n", "[1] \"179 SRP300102 tech: Illumina NovaSeq 6000\"\n", "[1] \"180 SRP358001 tech: Illumina NovaSeq 6000\"\n", "[1] \"181 SRP382889 tech: Illumina NovaSeq 6000\"\n", "[1] \"184 SRP392080 tech: Illumina NovaSeq 6000\"\n", "[1] \"185 SRP395923 tech: Illumina NovaSeq 6000\"\n", "[1] \"186 SRP354877 tech: Illumina NovaSeq 6000\"\n", "[1] \"187 SRP411511 tech: Illumina NovaSeq 6000\"\n", "[1] \"189 SRP417578 tech: Illumina NovaSeq 6000\"\n", "[1] \"190 SRP438194 tech: Illumina NovaSeq 6000\"\n", "[1] \"193 SRP461388 tech: Illumina NovaSeq 6000\"\n", "[1] \"195 SRP471994 tech: Illumina NovaSeq 6000\"\n", "[1] \"196 SRP486975 tech: Illumina NovaSeq 6000\"\n", "[1] \"198 SRP490436 tech: Illumina NovaSeq 6000\"\n", "[1] \"199 SRP502972 tech: Illumina NovaSeq 6000\"\n", "[1] \"200 SRP503851 tech: Illumina NovaSeq 6000\"\n" ] } ], "source": [ "# single-cell - Too many runs or NovaSeq 6000/AB 5500xl or multiple SRA files tech might be indicative?\n", "for(ii in 1:dim(list3)[1]){\n", " tt = which(newsra_runinfo$SRAStudy==list3[ii,2])\n", " model1 = newsra_runinfo$Model[tt[1]]\n", " \n", " if(length(grep('NovaSeq|5500xl', model1))){\n", " print(paste0(ii, ' ', list3[ii,2], ' tech: ', model1))\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 718
  2. 3
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 718\n", "\\item 3\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 718\n", "2. 3\n", "\n", "\n" ], "text/plain": [ "[1] 718 3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A matrix: 6 × 3 of type chr
ids
540 SRP288011609
1066SRP000401328
70 SRP431186205
784 SRP165288203
870 SRP136035154
881 SRP133277152
\n" ], "text/latex": [ "A matrix: 6 × 3 of type chr\n", "\\begin{tabular}{lll}\n", " ids & & \\\\\n", "\\hline\n", "\t 540 & SRP288011 & 609\\\\\n", "\t 1066 & SRP000401 & 328\\\\\n", "\t 70 & SRP431186 & 205\\\\\n", "\t 784 & SRP165288 & 203\\\\\n", "\t 870 & SRP136035 & 154\\\\\n", "\t 881 & SRP133277 & 152\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A matrix: 6 × 3 of type chr\n", "\n", "| ids | | |\n", "|---|---|---|\n", "| 540 | SRP288011 | 609 |\n", "| 1066 | SRP000401 | 328 |\n", "| 70 | SRP431186 | 205 |\n", "| 784 | SRP165288 | 203 |\n", "| 870 | SRP136035 | 154 |\n", "| 881 | SRP133277 | 152 |\n", "\n" ], "text/plain": [ " ids \n", "[1,] 540 SRP288011 609\n", "[2,] 1066 SRP000401 328\n", "[3,] 70 SRP431186 205\n", "[4,] 784 SRP165288 203\n", "[5,] 870 SRP136035 154\n", "[6,] 881 SRP133277 152" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "rm_sra = c('SRP076816', 'SRP112706', 'SRP106481', 'SRP186643', 'SRP070155', 'SRP029448',\n", " 'SRP090699', 'SRP045778')\n", "list2 <- list2[-match(rm_sra, list2[,2]),]\n", "dim(list2)\n", "head(list2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# NOT if we're updating all runs\n", "alldone = cocodf$SRA[cocodf$actualSpecies=='Gallus gallus']\n", "rmids = which(list2[,2] %in% alldone)\n", "list2 <- list2[-rmids,]\n", "\n", "dim(list2)\n", "tail(list2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# write to individual project acclist files\n", "# \n", "for (j in 1:dim(list2)[1]){\n", " i = as.numeric(list2[j,1])\n", " runlist = newsra_runinfo$Run[which(as.character(newsra_runinfo$SRAStudy)==as.character(acclist[i]))]\n", " runfilename = paste(acclist[i], '_accList_1.txt', sep = '')\n", " write.table(runlist, file = runfilename, sep = ',', row.names = F, col.names = F, \n", " quote = F)\n", "}" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# save\n", "write.table(list2[1:125,2], file = 'runthis2.txt', sep = ',', row.names = F, col.names = F, \n", " quote = F)\n", "\n", "# write.csv(cbind(as.character(acclist[as.numeric(list2[,1])]), list2[,3]), file = 'pimp_accs.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.4.2" } }, "nbformat": 4, "nbformat_minor": 4 }