{ "cells": [ { "cell_type": "code", "execution_count": 10, "id": "edd28874", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "from scipy.stats import mannwhitneyu" ] }, { "cell_type": "code", "execution_count": 11, "id": "29a93616", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "def run_egad(go, nw, **kwargs):\n", " \"\"\"EGAD running function\n", " \n", " Wrapper to lower level functions for EGAD\n", "\n", " EGAD measures modularity of gene lists in co-expression networks. \n", "\n", " This was translated from the MATLAB version, which does tiled Cross Validation\n", " \n", " The useful kwargs are:\n", " int - nFold : Number of CV folds to do, default is 3, \n", " int - {min,max}_count : limits for number of terms in each gene list, these are exclusive values\n", "\n", "\n", " Arguments:\n", " go {pd.DataFrame} -- dataframe of genes x terms of values [0,1], where 1 is included in gene lists\n", " nw {pd.DataFrame} -- dataframe of co-expression network, genes x genes\n", " **kwargs \n", " \n", " Returns:\n", " pd.DataFrame -- dataframe of terms x metrics where the metrics are \n", " ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " \"\"\"\n", " assert nw.shape[0] == nw.shape[1] , 'Network is not square'\n", " #print(nw.index)\n", " #nw.columns = nw.columns.astype(int)\n", " #print(nw.columns.astype(int))\n", " assert np.all(nw.index == nw.columns) , 'Network index and columns are not in the same order'\n", "\n", " #nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " #nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " #np.fill_diagonal(nw.values, 1)\n", " return _runNV(go, nw, **kwargs)\n", "\n", "def _runNV(go, nw, nFold=3, min_count=1, max_count=1000000):\n", "\n", " #Make sure genes are same in go and nw\n", " #go.index = go.index.map(str) \n", " #nw.index = nw.index.map(str)\n", " #nw.index = nw.index.str.replace('_', '')\n", " #go.index = go.index.str.replace('_', '')\n", " #print (nw)\n", " genes_intersect = go.index.intersection(nw.index)\n", "\n", "\n", " #print (genes_intersect)\n", " go = go.loc[genes_intersect, :]\n", " nw = nw.loc[genes_intersect, genes_intersect]\n", " #print (go)\n", " print (nw.shape)\n", " print (go.shape)\n", " sparsity = 1.0 - np.count_nonzero(go) / go.size\n", " print (sparsity)\n", " sparsity = 1.0 - np.count_nonzero(nw) / nw.size\n", " print (sparsity)\n", " #print(nw\n", " #print(go\n", " nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " np.fill_diagonal(nw.values, 1)\n", " #Make sure there aren't duplicates\n", " duplicates = nw.index.duplicated(keep='first')\n", " nw = nw.loc[~duplicates, ~duplicates]\n", "\n", " go = go.loc[:, (go.sum(axis=0) > min_count) & (go.sum(axis=0) < max_count)]\n", " go = go.loc[~go.index.duplicated(keep='first'), :]\n", " #print(go)\n", "\n", " roc = _new_egad(go.values, nw.values, nFold)\n", "\n", " col_names = ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " #Put output in dataframe\n", " return pd.DataFrame(dict(zip(col_names, roc)), index=go.columns), go\n", "\n", "def _new_egad(go, nw, nFold):\n", "\n", " #Build Cross validated Positive\n", " x, y = np.where(go)\n", " #print(x, y)\n", " cvgo = {}\n", " for i in np.arange(nFold):\n", " a = x[i::nFold]\n", " #print(a)\n", " b = y[i::nFold]\n", " dat = np.ones_like(a)\n", " mask = sparse.coo_matrix((dat, (a, b)), shape=go.shape)\n", " cvgo[i] = go - mask.toarray()\n", "\n", " CVgo = np.concatenate(list(cvgo.values()), axis=1)\n", " #print(CVgo)\n", "\n", " sumin = np.matmul(nw.T, CVgo)\n", "\n", " degree = np.sum(nw, axis=0)\n", " #print(degree)\n", " #print(degree[:, None])\n", "\n", " predicts = sumin / degree[:, None]\n", " #print(predicts)\n", "\n", " np.place(predicts, CVgo > 0, np.nan)\n", "\n", " #print(predicts)\n", "\n", " #Calculate ranks of positives\n", " rank_abs = lambda x: stats.rankdata(np.abs(x))\n", " predicts2 = np.apply_along_axis(rank_abs, 0, predicts)\n", " #print(predicts2)\n", "\n", " #Masking Nans that were ranked (how tiedrank works in matlab)\n", " predicts2[np.isnan(predicts)] = np.nan\n", " #print(predicts2)\n", "\n", " filtering = np.tile(go, nFold)\n", " #print(filtering)\n", "\n", " #negatives :filtering == 0\n", " #Sets Ranks of negatives to 0\n", " np.place(predicts2, filtering == 0, 0)\n", "\n", " #Sum of ranks for each prediction\n", " p = bottleneck.nansum(predicts2, axis=0)\n", " n_p = np.sum(filtering, axis=0) - np.sum(CVgo, axis=0)\n", "\n", " #Number of negatives\n", " #Number of GO terms - number of postiive\n", " n_n = filtering.shape[0] - np.sum(filtering, axis=0)\n", "\n", " roc = (p / n_p - (n_p + 1) / 2) / n_n\n", " U = roc * n_p * n_n\n", " Z = (np.abs(U - (n_p * n_n / 2))) / np.sqrt(n_p * n_n *\n", " (n_p + n_n + 1) / 12)\n", " roc = roc.reshape(nFold, go.shape[1])\n", " Z = Z.reshape(nFold, go.shape[1])\n", " #Stouffer Z method\n", " Z = bottleneck.nansum(Z, axis=0) / np.sqrt(nFold)\n", " #Calc ROC of Neighbor Voting\n", " roc = bottleneck.nanmean(roc, axis=0)\n", " P = stats.norm.sf(Z)\n", "\n", " #Average degree for nodes in each go term\n", " avg_degree = degree.dot(go) / np.sum(go, axis=0)\n", "\n", " #Calc null auc for degree\n", " ranks = np.tile(stats.rankdata(degree), (go.shape[1], 1)).T\n", "\n", " np.place(ranks, go == 0, 0)\n", "\n", " n_p = bottleneck.nansum(go, axis=0)\n", " nn = go.shape[0] - n_p\n", " p = bottleneck.nansum(ranks, axis=0)\n", "\n", " roc_null = (p / n_p - ((n_p + 1) / 2)) / nn\n", " #print(roc)\n", " return roc, avg_degree, roc_null, P" ] }, { "cell_type": "code", "execution_count": 12, "id": "cef71417", "metadata": {}, "outputs": [], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler" ] }, { "cell_type": "code", "execution_count": 13, "id": "9c63a61a", "metadata": {}, "outputs": [], "source": [ "\n", "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/software/CoCoCoNet/networks/human_prioAggNet.h5'\n", "\n", "jac_exp = hm.hiCMatrix(exp_file_path)\n", "all_genes = [x[3].decode() for x in jac_exp.cut_intervals]\n", "df_exp_corr = pd.DataFrame(jac_exp.matrix.toarray() , index=all_genes, columns = all_genes)" ] }, { "cell_type": "code", "execution_count": 14, "id": "75a5646c", "metadata": {}, "outputs": [], "source": [ "df_gene_chr = pd.DataFrame(list(zip([x[3].decode() for x in jac_exp.cut_intervals], [x[0] for x in jac_exp.cut_intervals])),\n", " columns =['gene', 'chrom'])" ] }, { "cell_type": "code", "execution_count": 54, "id": "144684bc", "metadata": {}, "outputs": [], "source": [ "df_gene_chr['val'] = 1" ] }, { "cell_type": "code", "execution_count": 56, "id": "25cd86f3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('chr1', 0, 1, b'ENSG00000278267'),\n", " ('chr1', 0, 1, b'ENSG00000233750'),\n", " ('chr1', 0, 1, b'ENSG00000268903'),\n", " ('chr1', 0, 1, b'ENSG00000269981'),\n", " ('chr1', 0, 1, b'ENSG00000241860'),\n", " ('chr1', 0, 1, b'ENSG00000279928'),\n", " ('chr1', 0, 1, b'ENSG00000279457'),\n", " ('chr1', 0, 1, b'ENSG00000228463'),\n", " ('chr1', 0, 1, b'ENSG00000237094'),\n", " ('chr1', 0, 1, b'ENSG00000225972'),\n", " ('chr1', 0, 1, b'ENSG00000225630'),\n", " ('chr1', 0, 1, b'ENSG00000237973'),\n", " ('chr1', 0, 1, b'ENSG00000229344'),\n", " ('chr1', 0, 1, b'ENSG00000240409'),\n", " ('chr1', 0, 1, b'ENSG00000248527'),\n", " ('chr1', 0, 1, b'ENSG00000198744'),\n", " ('chr1', 0, 1, b'ENSG00000228327'),\n", " ('chr1', 0, 1, b'ENSG00000237491'),\n", " ('chr1', 0, 1, b'ENSG00000230092'),\n", " ('chr1', 0, 1, b'ENSG00000177757'),\n", " ('chr1', 0, 1, b'ENSG00000228794'),\n", " ('chr1', 0, 1, b'ENSG00000225880'),\n", " ('chr1', 0, 1, b'ENSG00000230699'),\n", " ('chr1', 0, 1, b'ENSG00000223764'),\n", " ('chr1', 0, 1, b'ENSG00000187634'),\n", " ('chr1', 0, 1, b'ENSG00000188976'),\n", " ('chr1', 0, 1, b'ENSG00000187961'),\n", " ('chr1', 0, 1, b'ENSG00000187583'),\n", " ('chr1', 0, 1, b'ENSG00000187642'),\n", " ('chr1', 0, 1, b'ENSG00000272512'),\n", " ('chr1', 0, 1, b'ENSG00000188290'),\n", " ('chr1', 0, 1, b'ENSG00000187608'),\n", " ('chr1', 0, 1, b'ENSG00000188157'),\n", " ('chr1', 0, 1, b'ENSG00000217801'),\n", " ('chr1', 0, 1, b'ENSG00000237330'),\n", " ('chr1', 0, 1, b'ENSG00000131591'),\n", " ('chr1', 0, 1, b'ENSG00000162571'),\n", " ('chr1', 0, 1, b'ENSG00000186891'),\n", " ('chr1', 0, 1, b'ENSG00000186827'),\n", " ('chr1', 0, 1, b'ENSG00000078808'),\n", " ('chr1', 0, 1, b'ENSG00000176022'),\n", " ('chr1', 0, 1, b'ENSG00000184163'),\n", " ('chr1', 0, 1, b'ENSG00000260179'),\n", " ('chr1', 0, 1, b'ENSG00000160087'),\n", " ('chr1', 0, 1, b'ENSG00000230415'),\n", " ('chr1', 0, 1, b'ENSG00000162572'),\n", " ('chr1', 0, 1, b'ENSG00000131584'),\n", " ('chr1', 0, 1, b'ENSG00000169972'),\n", " ('chr1', 0, 1, b'ENSG00000127054'),\n", " ('chr1', 0, 1, b'ENSG00000240731'),\n", " ('chr1', 0, 1, b'ENSG00000224051'),\n", " ('chr1', 0, 1, b'ENSG00000169962'),\n", " ('chr1', 0, 1, b'ENSG00000107404'),\n", " ('chr1', 0, 1, b'ENSG00000162576'),\n", " ('chr1', 0, 1, b'ENSG00000175756'),\n", " ('chr1', 0, 1, b'ENSG00000221978'),\n", " ('chr1', 0, 1, b'ENSG00000224870'),\n", " ('chr1', 0, 1, b'ENSG00000242485'),\n", " ('chr1', 0, 1, b'ENSG00000272455'),\n", " ('chr1', 0, 1, b'ENSG00000235098'),\n", " ('chr1', 0, 1, b'ENSG00000225285'),\n", " ('chr1', 0, 1, b'ENSG00000179403'),\n", " ('chr1', 0, 1, b'ENSG00000215915'),\n", " ('chr1', 0, 1, b'ENSG00000160072'),\n", " ('chr1', 0, 1, b'ENSG00000284740'),\n", " ('chr1', 0, 1, b'ENSG00000197785'),\n", " ('chr1', 0, 1, b'ENSG00000205090'),\n", " ('chr1', 0, 1, b'ENSG00000160075'),\n", " ('chr1', 0, 1, b'ENSG00000215014'),\n", " ('chr1', 0, 1, b'ENSG00000228594'),\n", " ('chr1', 0, 1, b'ENSG00000272106'),\n", " ('chr1', 0, 1, b'ENSG00000197530'),\n", " ('chr1', 0, 1, b'ENSG00000189409'),\n", " ('chr1', 0, 1, b'ENSG00000248333'),\n", " ('chr1', 0, 1, b'ENSG00000189339'),\n", " ('chr1', 0, 1, b'ENSG00000269737'),\n", " ('chr1', 0, 1, b'ENSG00000008128'),\n", " ('chr1', 0, 1, b'ENSG00000268575'),\n", " ('chr1', 0, 1, b'ENSG00000227775'),\n", " ('chr1', 0, 1, b'ENSG00000215790'),\n", " ('chr1', 0, 1, b'ENSG00000008130'),\n", " ('chr1', 0, 1, b'ENSG00000078369'),\n", " ('chr1', 0, 1, b'ENSG00000231050'),\n", " ('chr1', 0, 1, b'ENSG00000169885'),\n", " ('chr1', 0, 1, b'ENSG00000178821'),\n", " ('chr1', 0, 1, b'ENSG00000142609'),\n", " ('chr1', 0, 1, b'ENSG00000187730'),\n", " ('chr1', 0, 1, b'ENSG00000067606'),\n", " ('chr1', 0, 1, b'ENSG00000271806'),\n", " ('chr1', 0, 1, b'ENSG00000182873'),\n", " ('chr1', 0, 1, b'ENSG00000162585'),\n", " ('chr1', 0, 1, b'ENSG00000157933'),\n", " ('chr1', 0, 1, b'ENSG00000116151'),\n", " ('chr1', 0, 1, b'ENSG00000269896'),\n", " ('chr1', 0, 1, b'ENSG00000272420'),\n", " ('chr1', 0, 1, b'ENSG00000157916'),\n", " ('chr1', 0, 1, b'ENSG00000157911'),\n", " ('chr1', 0, 1, b'ENSG00000149527'),\n", " ('chr1', 0, 1, b'ENSG00000177133'),\n", " ('chr1', 0, 1, b'ENSG00000142611'),\n", " ('chr1', 0, 1, b'ENSG00000272235'),\n", " ('chr1', 0, 1, b'ENSG00000130762'),\n", " ('chr1', 0, 1, b'ENSG00000162591'),\n", " ('chr1', 0, 1, b'ENSG00000238260'),\n", " ('chr1', 0, 1, b'ENSG00000158109'),\n", " ('chr1', 0, 1, b'ENSG00000116213'),\n", " ('chr1', 0, 1, b'ENSG00000078900'),\n", " ('chr1', 0, 1, b'ENSG00000227372'),\n", " ('chr1', 0, 1, b'ENSG00000235169'),\n", " ('chr1', 0, 1, b'ENSG00000130764'),\n", " ('chr1', 0, 1, b'ENSG00000272153'),\n", " ('chr1', 0, 1, b'ENSG00000116198'),\n", " ('chr1', 0, 1, b'ENSG00000169598'),\n", " ('chr1', 0, 1, b'ENSG00000198912'),\n", " ('chr1', 0, 1, b'ENSG00000236423'),\n", " ('chr1', 0, 1, b'ENSG00000196581'),\n", " ('chr1', 0, 1, b'ENSG00000131697'),\n", " ('chr1', 0, 1, b'ENSG00000069424'),\n", " ('chr1', 0, 1, b'ENSG00000116254'),\n", " ('chr1', 0, 1, b'ENSG00000116251'),\n", " ('chr1', 0, 1, b'ENSG00000226944'),\n", " ('chr1', 0, 1, b'ENSG00000158286'),\n", " ('chr1', 0, 1, b'ENSG00000116237'),\n", " ('chr1', 0, 1, b'ENSG00000158292'),\n", " ('chr1', 0, 1, b'ENSG00000097021'),\n", " ('chr1', 0, 1, b'ENSG00000069812'),\n", " ('chr1', 0, 1, b'ENSG00000187017'),\n", " ('chr1', 0, 1, b'ENSG00000215788'),\n", " ('chr1', 0, 1, b'ENSG00000171680'),\n", " ('chr1', 0, 1, b'ENSG00000162408'),\n", " ('chr1', 0, 1, b'ENSG00000229519'),\n", " ('chr1', 0, 1, b'ENSG00000173662'),\n", " ('chr1', 0, 1, b'ENSG00000204859'),\n", " ('chr1', 0, 1, b'ENSG00000162413'),\n", " ('chr1', 0, 1, b'ENSG00000116273'),\n", " ('chr1', 0, 1, b'ENSG00000041988'),\n", " ('chr1', 0, 1, b'ENSG00000007923'),\n", " ('chr1', 0, 1, b'ENSG00000237436'),\n", " ('chr1', 0, 1, b'ENSG00000171735'),\n", " ('chr1', 0, 1, b'ENSG00000049245'),\n", " ('chr1', 0, 1, b'ENSG00000269925'),\n", " ('chr1', 0, 1, b'ENSG00000049246'),\n", " ('chr1', 0, 1, b'ENSG00000236266'),\n", " ('chr1', 0, 1, b'ENSG00000049247'),\n", " ('chr1', 0, 1, b'ENSG00000049249'),\n", " ('chr1', 0, 1, b'ENSG00000116288'),\n", " ('chr1', 0, 1, b'ENSG00000284747'),\n", " ('chr1', 0, 1, b'ENSG00000116285'),\n", " ('chr1', 0, 1, b'ENSG00000238290'),\n", " ('chr1', 0, 1, b'ENSG00000162426'),\n", " ('chr1', 0, 1, b'ENSG00000142599'),\n", " ('chr1', 0, 1, b'ENSG00000232912'),\n", " ('chr1', 0, 1, b'ENSG00000234619'),\n", " ('chr1', 0, 1, b'ENSG00000224315'),\n", " ('chr1', 0, 1, b'ENSG00000074800'),\n", " ('chr1', 0, 1, b'ENSG00000238249'),\n", " ('chr1', 0, 1, b'ENSG00000232208'),\n", " ('chr1', 0, 1, b'ENSG00000131686'),\n", " ('chr1', 0, 1, b'ENSG00000142583'),\n", " ('chr1', 0, 1, b'ENSG00000180758'),\n", " ('chr1', 0, 1, b'ENSG00000228526'),\n", " ('chr1', 0, 1, b'ENSG00000234546'),\n", " ('chr1', 0, 1, b'ENSG00000049239'),\n", " ('chr1', 0, 1, b'ENSG00000171621'),\n", " ('chr1', 0, 1, b'ENSG00000284652'),\n", " ('chr1', 0, 1, b'ENSG00000284693'),\n", " ('chr1', 0, 1, b'ENSG00000171612'),\n", " ('chr1', 0, 1, b'ENSG00000231181'),\n", " ('chr1', 0, 1, b'ENSG00000188807'),\n", " ('chr1', 0, 1, b'ENSG00000171608'),\n", " ('chr1', 0, 1, b'ENSG00000179840'),\n", " ('chr1', 0, 1, b'ENSG00000231789'),\n", " ('chr1', 0, 1, b'ENSG00000171603'),\n", " ('chr1', 0, 1, b'ENSG00000178585'),\n", " ('chr1', 0, 1, b'ENSG00000162441'),\n", " ('chr1', 0, 1, b'ENSG00000228150'),\n", " ('chr1', 0, 1, b'ENSG00000173614'),\n", " ('chr1', 0, 1, b'ENSG00000162444'),\n", " ('chr1', 0, 1, b'ENSG00000130939'),\n", " ('chr1', 0, 1, b'ENSG00000233623'),\n", " ('chr1', 0, 1, b'ENSG00000054523'),\n", " ('chr1', 0, 1, b'ENSG00000199562'),\n", " ('chr1', 0, 1, b'ENSG00000284735'),\n", " ('chr1', 0, 1, b'ENSG00000284642'),\n", " ('chr1', 0, 1, b'ENSG00000142657'),\n", " ('chr1', 0, 1, b'ENSG00000175279'),\n", " ('chr1', 0, 1, b'ENSG00000241563'),\n", " ('chr1', 0, 1, b'ENSG00000160049'),\n", " ('chr1', 0, 1, b'ENSG00000142655'),\n", " ('chr1', 0, 1, b'ENSG00000130940'),\n", " ('chr1', 0, 1, b'ENSG00000272078'),\n", " ('chr1', 0, 1, b'ENSG00000175262'),\n", " ('chr1', 0, 1, b'ENSG00000120948'),\n", " ('chr1', 0, 1, b'ENSG00000009724'),\n", " ('chr1', 0, 1, b'ENSG00000271895'),\n", " ('chr1', 0, 1, b'ENSG00000116649'),\n", " ('chr1', 0, 1, b'ENSG00000171824'),\n", " ('chr1', 0, 1, b'ENSG00000226849'),\n", " ('chr1', 0, 1, b'ENSG00000198793'),\n", " ('chr1', 0, 1, b'ENSG00000171819'),\n", " ('chr1', 0, 1, b'ENSG00000120942'),\n", " ('chr1', 0, 1, b'ENSG00000238199'),\n", " ('chr1', 0, 1, b'ENSG00000204624'),\n", " ('chr1', 0, 1, b'ENSG00000284708'),\n", " ('chr1', 0, 1, b'ENSG00000116661'),\n", " ('chr1', 0, 1, b'ENSG00000132879'),\n", " ('chr1', 0, 1, b'ENSG00000116663'),\n", " ('chr1', 0, 1, b'ENSG00000116670'),\n", " ('chr1', 0, 1, b'ENSG00000162490'),\n", " ('chr1', 0, 1, b'ENSG00000177674'),\n", " ('chr1', 0, 1, b'ENSG00000177000'),\n", " ('chr1', 0, 1, b'ENSG00000011021'),\n", " ('chr1', 0, 1, b'ENSG00000242349'),\n", " ('chr1', 0, 1, b'ENSG00000175206'),\n", " ('chr1', 0, 1, b'ENSG00000199347'),\n", " ('chr1', 0, 1, b'ENSG00000116685'),\n", " ('chr1', 0, 1, b'ENSG00000083444'),\n", " ('chr1', 0, 1, b'ENSG00000116688'),\n", " ('chr1', 0, 1, b'ENSG00000116691'),\n", " ('chr1', 0, 1, b'ENSG00000120949'),\n", " ('chr1', 0, 1, b'ENSG00000028137'),\n", " ('chr1', 0, 1, b'ENSG00000048707'),\n", " ('chr1', 0, 1, b'ENSG00000162496'),\n", " ('chr1', 0, 1, b'ENSG00000272482'),\n", " ('chr1', 0, 1, b'ENSG00000157330'),\n", " ('chr1', 0, 1, b'ENSG00000162494'),\n", " ('chr1', 0, 1, b'ENSG00000162493'),\n", " ('chr1', 0, 1, b'ENSG00000116731'),\n", " ('chr1', 0, 1, b'ENSG00000189337'),\n", " ('chr1', 0, 1, b'ENSG00000175147'),\n", " ('chr1', 0, 1, b'ENSG00000171729'),\n", " ('chr1', 0, 1, b'ENSG00000204464'),\n", " ('chr1', 0, 1, b'ENSG00000142621'),\n", " ('chr1', 0, 1, b'ENSG00000228140'),\n", " ('chr1', 0, 1, b'ENSG00000142634'),\n", " ('chr1', 0, 1, b'ENSG00000162438'),\n", " ('chr1', 0, 1, b'ENSG00000215704'),\n", " ('chr1', 0, 1, b'ENSG00000132906'),\n", " ('chr1', 0, 1, b'ENSG00000116138'),\n", " ('chr1', 0, 1, b'ENSG00000116771'),\n", " ('chr1', 0, 1, b'ENSG00000237301'),\n", " ('chr1', 0, 1, b'ENSG00000235084'),\n", " ('chr1', 0, 1, b'ENSG00000197312'),\n", " ('chr1', 0, 1, b'ENSG00000116786'),\n", " ('chr1', 0, 1, b'ENSG00000237938'),\n", " ('chr1', 0, 1, b'ENSG00000162461'),\n", " ('chr1', 0, 1, b'ENSG00000162458'),\n", " ('chr1', 0, 1, b'ENSG00000224321'),\n", " ('chr1', 0, 1, b'ENSG00000233954'),\n", " ('chr1', 0, 1, b'ENSG00000178715'),\n", " ('chr1', 0, 1, b'ENSG00000179743'),\n", " ('chr1', 0, 1, b'ENSG00000065526'),\n", " ('chr1', 0, 1, b'ENSG00000116809'),\n", " ('chr1', 0, 1, b'ENSG00000234607'),\n", " ('chr1', 0, 1, b'ENSG00000183888'),\n", " ('chr1', 0, 1, b'ENSG00000173641'),\n", " ('chr1', 0, 1, b'ENSG00000186510'),\n", " ('chr1', 0, 1, b'ENSG00000184908'),\n", " ('chr1', 0, 1, b'ENSG00000185519'),\n", " ('chr1', 0, 1, b'ENSG00000142627'),\n", " ('chr1', 0, 1, b'ENSG00000227959'),\n", " ('chr1', 0, 1, b'ENSG00000142632'),\n", " ('chr1', 0, 1, b'ENSG00000237276'),\n", " ('chr1', 0, 1, b'ENSG00000132881'),\n", " ('chr1', 0, 1, b'ENSG00000233929'),\n", " ('chr1', 0, 1, b'ENSG00000037637'),\n", " ('chr1', 0, 1, b'ENSG00000055070'),\n", " ('chr1', 0, 1, b'ENSG00000226457'),\n", " ('chr1', 0, 1, b'ENSG00000187144'),\n", " ('chr1', 0, 1, b'ENSG00000157191'),\n", " ('chr1', 0, 1, b'ENSG00000226029'),\n", " ('chr1', 0, 1, b'ENSG00000080947'),\n", " ('chr1', 0, 1, b'ENSG00000206652'),\n", " ('chr1', 0, 1, b'ENSG00000261135'),\n", " ('chr1', 0, 1, b'ENSG00000233421'),\n", " ('chr1', 0, 1, b'ENSG00000219481'),\n", " ('chr1', 0, 1, b'ENSG00000215908'),\n", " ('chr1', 0, 1, b'ENSG00000186301'),\n", " ('chr1', 0, 1, b'ENSG00000236698'),\n", " ('chr1', 0, 1, b'ENSG00000058453'),\n", " ('chr1', 0, 1, b'ENSG00000186715'),\n", " ('chr1', 0, 1, b'ENSG00000228549'),\n", " ('chr1', 0, 1, b'ENSG00000238142'),\n", " ('chr1', 0, 1, b'ENSG00000207005'),\n", " ('chr1', 0, 1, b'ENSG00000117122'),\n", " ('chr1', 0, 1, b'ENSG00000159363'),\n", " ('chr1', 0, 1, b'ENSG00000117118'),\n", " ('chr1', 0, 1, b'ENSG00000117115'),\n", " ('chr1', 0, 1, b'ENSG00000074964'),\n", " ('chr1', 0, 1, b'ENSG00000117154'),\n", " ('chr1', 0, 1, b'ENSG00000280222'),\n", " ('chr1', 0, 1, b'ENSG00000179023'),\n", " ('chr1', 0, 1, b'ENSG00000009709'),\n", " ('chr1', 0, 1, b'ENSG00000159423'),\n", " ('chr1', 0, 1, b'ENSG00000169991'),\n", " ('chr1', 0, 1, b'ENSG00000272084'),\n", " ('chr1', 0, 1, b'ENSG00000127481'),\n", " ('chr1', 0, 1, b'ENSG00000230424'),\n", " ('chr1', 0, 1, b'ENSG00000127463'),\n", " ('chr1', 0, 1, b'ENSG00000053372'),\n", " ('chr1', 0, 1, b'ENSG00000211454'),\n", " ('chr1', 0, 1, b'ENSG00000162482'),\n", " ('chr1', 0, 1, b'ENSG00000270728'),\n", " ('chr1', 0, 1, b'ENSG00000053371'),\n", " ('chr1', 0, 1, b'ENSG00000040487'),\n", " ('chr1', 0, 1, b'ENSG00000077549'),\n", " ('chr1', 0, 1, b'ENSG00000173436'),\n", " ('chr1', 0, 1, b'ENSG00000158747'),\n", " ('chr1', 0, 1, b'ENSG00000226396'),\n", " ('chr1', 0, 1, b'ENSG00000158748'),\n", " ('chr1', 0, 1, b'ENSG00000162542'),\n", " ('chr1', 0, 1, b'ENSG00000169914'),\n", " ('chr1', 0, 1, b'ENSG00000188257'),\n", " ('chr1', 0, 1, b'ENSG00000127472'),\n", " ('chr1', 0, 1, b'ENSG00000117215'),\n", " ('chr1', 0, 1, b'ENSG00000225986'),\n", " ('chr1', 0, 1, b'ENSG00000162543'),\n", " ('chr1', 0, 1, b'ENSG00000158816'),\n", " ('chr1', 0, 1, b'ENSG00000162545'),\n", " ('chr1', 0, 1, b'ENSG00000090432'),\n", " ('chr1', 0, 1, b'ENSG00000183114'),\n", " ('chr1', 0, 1, b'ENSG00000158825'),\n", " ('chr1', 0, 1, b'ENSG00000158828'),\n", " ('chr1', 0, 1, b'ENSG00000117242'),\n", " ('chr1', 0, 1, b'ENSG00000244038'),\n", " ('chr1', 0, 1, b'ENSG00000117245'),\n", " ('chr1', 0, 1, b'ENSG00000189410'),\n", " ('chr1', 0, 1, b'ENSG00000127483'),\n", " ('chr1', 0, 1, b'ENSG00000075151'),\n", " ('chr1', 0, 1, b'ENSG00000233072'),\n", " ('chr1', 0, 1, b'ENSG00000235112'),\n", " ('chr1', 0, 1, b'ENSG00000117298'),\n", " ('chr1', 0, 1, b'ENSG00000236936'),\n", " ('chr1', 0, 1, b'ENSG00000231105'),\n", " ('chr1', 0, 1, b'ENSG00000227001'),\n", " ('chr1', 0, 1, b'ENSG00000187952'),\n", " ('chr1', 0, 1, b'ENSG00000142794'),\n", " ('chr1', 0, 1, b'ENSG00000162551'),\n", " ('chr1', 0, 1, b'ENSG00000076864'),\n", " ('chr1', 0, 1, b'ENSG00000090686'),\n", " ('chr1', 0, 1, b'ENSG00000187942'),\n", " ('chr1', 0, 1, b'ENSG00000142798'),\n", " ('chr1', 0, 1, b'ENSG00000218510'),\n", " ('chr1', 0, 1, b'ENSG00000070831'),\n", " ('chr1', 0, 1, b'ENSG00000230068'),\n", " ('chr1', 0, 1, b'ENSG00000271428'),\n", " ('chr1', 0, 1, b'ENSG00000271840'),\n", " ('chr1', 0, 1, b'ENSG00000162552'),\n", " ('chr1', 0, 1, b'ENSG00000184677'),\n", " ('chr1', 0, 1, b'ENSG00000237200'),\n", " ('chr1', 0, 1, b'ENSG00000070886'),\n", " ('chr1', 0, 1, b'ENSG00000173372'),\n", " ('chr1', 0, 1, b'ENSG00000159189'),\n", " ('chr1', 0, 1, b'ENSG00000173369'),\n", " ('chr1', 0, 1, b'ENSG00000133216'),\n", " ('chr1', 0, 1, b'ENSG00000004487'),\n", " ('chr1', 0, 1, b'ENSG00000240553'),\n", " ('chr1', 0, 1, b'ENSG00000169641'),\n", " ('chr1', 0, 1, b'ENSG00000179546'),\n", " ('chr1', 0, 1, b'ENSG00000197880'),\n", " ('chr1', 0, 1, b'ENSG00000142676'),\n", " ('chr1', 0, 1, b'ENSG00000011007'),\n", " ('chr1', 0, 1, b'ENSG00000236810'),\n", " ('chr1', 0, 1, b'ENSG00000057757'),\n", " ('chr1', 0, 1, b'ENSG00000011009'),\n", " ('chr1', 0, 1, b'ENSG00000117308'),\n", " ('chr1', 0, 1, b'ENSG00000117305'),\n", " ('chr1', 0, 1, b'ENSG00000179163'),\n", " ('chr1', 0, 1, b'ENSG00000188822'),\n", " ('chr1', 0, 1, b'ENSG00000229106'),\n", " ('chr1', 0, 1, b'ENSG00000189266'),\n", " ('chr1', 0, 1, b'ENSG00000188529'),\n", " ('chr1', 0, 1, b'ENSG00000142661'),\n", " ('chr1', 0, 1, b'ENSG00000142677'),\n", " ('chr1', 0, 1, b'ENSG00000185436'),\n", " ('chr1', 0, 1, b'ENSG00000158055'),\n", " ('chr1', 0, 1, b'ENSG00000001460'),\n", " ('chr1', 0, 1, b'ENSG00000001461'),\n", " ('chr1', 0, 1, b'ENSG00000117602'),\n", " ('chr1', 0, 1, b'ENSG00000184454'),\n", " ('chr1', 0, 1, b'ENSG00000133226'),\n", " ('chr1', 0, 1, b'ENSG00000169504'),\n", " ('chr1', 0, 1, b'ENSG00000020633'),\n", " ('chr1', 0, 1, b'ENSG00000229162'),\n", " ('chr1', 0, 1, b'ENSG00000117614'),\n", " ('chr1', 0, 1, b'ENSG00000284602'),\n", " ('chr1', 0, 1, b'ENSG00000284657'),\n", " ('chr1', 0, 1, b'ENSG00000117616'),\n", " ('chr1', 0, 1, b'ENSG00000272432'),\n", " ('chr1', 0, 1, b'ENSG00000261349'),\n", " ('chr1', 0, 1, b'ENSG00000187010'),\n", " ('chr1', 0, 1, b'ENSG00000224183'),\n", " ('chr1', 0, 1, b'ENSG00000259984'),\n", " ('chr1', 0, 1, b'ENSG00000183726'),\n", " ('chr1', 0, 1, b'ENSG00000188672'),\n", " ('chr1', 0, 1, b'ENSG00000204178'),\n", " ('chr1', 0, 1, b'ENSG00000157978'),\n", " ('chr1', 0, 1, b'ENSG00000225643'),\n", " ('chr1', 0, 1, b'ENSG00000117643'),\n", " ('chr1', 0, 1, b'ENSG00000233478'),\n", " ('chr1', 0, 1, b'ENSG00000162430'),\n", " ('chr1', 0, 1, b'ENSG00000228172'),\n", " ('chr1', 0, 1, b'ENSG00000117640'),\n", " ('chr1', 0, 1, b'ENSG00000127423'),\n", " ('chr1', 0, 1, b'ENSG00000182749'),\n", " ('chr1', 0, 1, b'ENSG00000117632'),\n", " ('chr1', 0, 1, b'ENSG00000158006'),\n", " ('chr1', 0, 1, b'ENSG00000158008'),\n", " ('chr1', 0, 1, b'ENSG00000158014'),\n", " ('chr1', 0, 1, b'ENSG00000158022'),\n", " ('chr1', 0, 1, b'ENSG00000175087'),\n", " ('chr1', 0, 1, b'ENSG00000197245'),\n", " ('chr1', 0, 1, b'ENSG00000236782'),\n", " ('chr1', 0, 1, b'ENSG00000142675'),\n", " ('chr1', 0, 1, b'ENSG00000236155'),\n", " ('chr1', 0, 1, b'ENSG00000130695'),\n", " ('chr1', 0, 1, b'ENSG00000142669'),\n", " ('chr1', 0, 1, b'ENSG00000158062'),\n", " ('chr1', 0, 1, b'ENSG00000169442'),\n", " ('chr1', 0, 1, b'ENSG00000176092'),\n", " ('chr1', 0, 1, b'ENSG00000176083'),\n", " ('chr1', 0, 1, b'ENSG00000117682'),\n", " ('chr1', 0, 1, b'ENSG00000223583'),\n", " ('chr1', 0, 1, b'ENSG00000225891'),\n", " ('chr1', 0, 1, b'ENSG00000198830'),\n", " ('chr1', 0, 1, b'ENSG00000260063'),\n", " ('chr1', 0, 1, b'ENSG00000117713'),\n", " ('chr1', 0, 1, b'ENSG00000060642'),\n", " ('chr1', 0, 1, b'ENSG00000204160'),\n", " ('chr1', 0, 1, b'ENSG00000175793'),\n", " ('chr1', 0, 1, b'ENSG00000142751'),\n", " ('chr1', 0, 1, b'ENSG00000198746'),\n", " ('chr1', 0, 1, b'ENSG00000090273'),\n", " ('chr1', 0, 1, b'ENSG00000131910'),\n", " ('chr1', 0, 1, b'ENSG00000175707'),\n", " ('chr1', 0, 1, b'ENSG00000253368'),\n", " ('chr1', 0, 1, b'ENSG00000158246'),\n", " ('chr1', 0, 1, b'ENSG00000090020'),\n", " ('chr1', 0, 1, b'ENSG00000225159'),\n", " ('chr1', 0, 1, b'ENSG00000142784'),\n", " ('chr1', 0, 1, b'ENSG00000243659'),\n", " ('chr1', 0, 1, b'ENSG00000186501'),\n", " ('chr1', 0, 1, b'ENSG00000241547'),\n", " ('chr1', 0, 1, b'ENSG00000142765'),\n", " ('chr1', 0, 1, b'ENSG00000142733'),\n", " ('chr1', 0, 1, b'ENSG00000142748'),\n", " ('chr1', 0, 1, b'ENSG00000181773'),\n", " ('chr1', 0, 1, b'ENSG00000158195'),\n", " ('chr1', 0, 1, b'ENSG00000241169'),\n", " ('chr1', 0, 1, b'ENSG00000237429'),\n", " ('chr1', 0, 1, b'ENSG00000126705'),\n", " ('chr1', 0, 1, b'ENSG00000000938'),\n", " ('chr1', 0, 1, b'ENSG00000235912'),\n", " ('chr1', 0, 1, b'ENSG00000126709'),\n", " ('chr1', 0, 1, b'ENSG00000225886'),\n", " ('chr1', 0, 1, b'ENSG00000009780'),\n", " ('chr1', 0, 1, b'ENSG00000117758'),\n", " ('chr1', 0, 1, b'ENSG00000269971'),\n", " ('chr1', 0, 1, b'ENSG00000270031'),\n", " ('chr1', 0, 1, b'ENSG00000117751'),\n", " ('chr1', 0, 1, b'ENSG00000252947'),\n", " ('chr1', 0, 1, b'ENSG00000130775'),\n", " ('chr1', 0, 1, b'ENSG00000117748'),\n", " ('chr1', 0, 1, b'ENSG00000130768'),\n", " ('chr1', 0, 1, b'ENSG00000158156'),\n", " ('chr1', 0, 1, b'ENSG00000158161'),\n", " ('chr1', 0, 1, b'ENSG00000240750'),\n", " ('chr1', 0, 1, b'ENSG00000228589'),\n", " ('chr1', 0, 1, b'ENSG00000169403'),\n", " ('chr1', 0, 1, b'ENSG00000126698'),\n", " ('chr1', 0, 1, b'ENSG00000204138'),\n", " ('chr1', 0, 1, b'ENSG00000180198'),\n", " ('chr1', 0, 1, b'ENSG00000242125'),\n", " ('chr1', 0, 1, b'ENSG00000200087'),\n", " ('chr1', 0, 1, b'ENSG00000279443'),\n", " ('chr1', 0, 1, b'ENSG00000180098'),\n", " ('chr1', 0, 1, b'ENSG00000197989'),\n", " ('chr1', 0, 1, b'ENSG00000221539'),\n", " ('chr1', 0, 1, b'ENSG00000120656'),\n", " ('chr1', 0, 1, b'ENSG00000188060'),\n", " ('chr1', 0, 1, b'ENSG00000229388'),\n", " ('chr1', 0, 1, b'ENSG00000162419'),\n", " ('chr1', 0, 1, b'ENSG00000198492'),\n", " ('chr1', 0, 1, b'ENSG00000116329'),\n", " ('chr1', 0, 1, b'ENSG00000159023'),\n", " ('chr1', 0, 1, b'ENSG00000225616'),\n", " ('chr1', 0, 1, b'ENSG00000253304'),\n", " ('chr1', 0, 1, b'ENSG00000116350'),\n", " ('chr1', 0, 1, b'ENSG00000116353'),\n", " ('chr1', 0, 1, b'ENSG00000237934'),\n", " ('chr1', 0, 1, b'ENSG00000060656'),\n", " ('chr1', 0, 1, b'ENSG00000162510'),\n", " ('chr1', 0, 1, b'ENSG00000186056'),\n", " ('chr1', 0, 1, b'ENSG00000162511'),\n", " ('chr1', 0, 1, b'ENSG00000264773'),\n", " ('chr1', 0, 1, b'ENSG00000162512'),\n", " ('chr1', 0, 1, b'ENSG00000134644'),\n", " ('chr1', 0, 1, b'ENSG00000237329'),\n", " ('chr1', 0, 1, b'ENSG00000084628'),\n", " ('chr1', 0, 1, b'ENSG00000060688'),\n", " ('chr1', 0, 1, b'ENSG00000229447'),\n", " ('chr1', 0, 1, b'ENSG00000121766'),\n", " ('chr1', 0, 1, b'ENSG00000121769'),\n", " ('chr1', 0, 1, b'ENSG00000168528'),\n", " ('chr1', 0, 1, b'ENSG00000284543'),\n", " ('chr1', 0, 1, b'ENSG00000142910'),\n", " ('chr1', 0, 1, b'ENSG00000121764'),\n", " ('chr1', 0, 1, b'ENSG00000162517'),\n", " ('chr1', 0, 1, b'ENSG00000235790'),\n", " ('chr1', 0, 1, b'ENSG00000084636'),\n", " ('chr1', 0, 1, b'ENSG00000121753'),\n", " ('chr1', 0, 1, b'ENSG00000134668'),\n", " ('chr1', 0, 1, b'ENSG00000269967'),\n", " ('chr1', 0, 1, b'ENSG00000184007'),\n", " ('chr1', 0, 1, b'ENSG00000228634'),\n", " ('chr1', 0, 1, b'ENSG00000121774'),\n", " ('chr1', 0, 1, b'ENSG00000203325'),\n", " ('chr1', 0, 1, b'ENSG00000121775'),\n", " ('chr1', 0, 1, b'ENSG00000025800'),\n", " ('chr1', 0, 1, b'ENSG00000084652'),\n", " ('chr1', 0, 1, b'ENSG00000160050'),\n", " ('chr1', 0, 1, b'ENSG00000160051'),\n", " ('chr1', 0, 1, b'ENSG00000222046'),\n", " ('chr1', 0, 1, b'ENSG00000160055'),\n", " ('chr1', 0, 1, b'ENSG00000084623'),\n", " ('chr1', 0, 1, b'ENSG00000220785'),\n", " ('chr1', 0, 1, b'ENSG00000183615'),\n", " ('chr1', 0, 1, b'ENSG00000182866'),\n", " ('chr1', 0, 1, b'ENSG00000116478'),\n", " ('chr1', 0, 1, b'ENSG00000175130'),\n", " ('chr1', 0, 1, b'ENSG00000162526'),\n", " ('chr1', 0, 1, b'ENSG00000225828'),\n", " ('chr1', 0, 1, b'ENSG00000160058'),\n", " ('chr1', 0, 1, b'ENSG00000273274'),\n", " ('chr1', 0, 1, b'ENSG00000160062'),\n", " ('chr1', 0, 1, b'ENSG00000176261'),\n", " ('chr1', 0, 1, b'ENSG00000162521'),\n", " ('chr1', 0, 1, b'ENSG00000162520'),\n", " ('chr1', 0, 1, b'ENSG00000162522'),\n", " ('chr1', 0, 1, b'ENSG00000134684'),\n", " ('chr1', 0, 1, b'ENSG00000116497'),\n", " ('chr1', 0, 1, b'ENSG00000160097'),\n", " ('chr1', 0, 1, b'ENSG00000121905'),\n", " ('chr1', 0, 1, b'ENSG00000121900'),\n", " ('chr1', 0, 1, b'ENSG00000116514'),\n", " ('chr1', 0, 1, b'ENSG00000217644'),\n", " ('chr1', 0, 1, b'ENSG00000236065'),\n", " ('chr1', 0, 1, b'ENSG00000004455'),\n", " ('chr1', 0, 1, b'ENSG00000142920'),\n", " ('chr1', 0, 1, b'ENSG00000278997'),\n", " ('chr1', 0, 1, b'ENSG00000116525'),\n", " ('chr1', 0, 1, b'ENSG00000279179'),\n", " ('chr1', 0, 1, b'ENSG00000160094'),\n", " ('chr1', 0, 1, b'ENSG00000270115'),\n", " ('chr1', 0, 1, b'ENSG00000225313'),\n", " ('chr1', 0, 1, b'ENSG00000134686'),\n", " ('chr1', 0, 1, b'ENSG00000222112'),\n", " ('chr1', 0, 1, b'ENSG00000121903'),\n", " ('chr1', 0, 1, b'ENSG00000121904'),\n", " ('chr1', 0, 1, b'ENSG00000163866'),\n", " ('chr1', 0, 1, b'ENSG00000189280'),\n", " ('chr1', 0, 1, b'ENSG00000188910'),\n", " ('chr1', 0, 1, b'ENSG00000187513'),\n", " ('chr1', 0, 1, b'ENSG00000230163'),\n", " ('chr1', 0, 1, b'ENSG00000116544'),\n", " ('chr1', 0, 1, b'ENSG00000284773'),\n", " ('chr1', 0, 1, b'ENSG00000241014'),\n", " ('chr1', 0, 1, b'ENSG00000163867'),\n", " ('chr1', 0, 1, b'ENSG00000197056'),\n", " ('chr1', 0, 1, b'ENSG00000116560'),\n", " ('chr1', 0, 1, b'ENSG00000146463'),\n", " ('chr1', 0, 1, b'ENSG00000229994'),\n", " ('chr1', 0, 1, b'ENSG00000142687'),\n", " ('chr1', 0, 1, b'ENSG00000236274'),\n", " ('chr1', 0, 1, b'ENSG00000020129'),\n", " ('chr1', 0, 1, b'ENSG00000239636'),\n", " ('chr1', 0, 1, b'ENSG00000116819'),\n", " ('chr1', 0, 1, b'ENSG00000126067'),\n", " ('chr1', 0, 1, b'ENSG00000142686'),\n", " ('chr1', 0, 1, b'ENSG00000092853'),\n", " ('chr1', 0, 1, b'ENSG00000134698'),\n", " ('chr1', 0, 1, b'ENSG00000092847'),\n", " ('chr1', 0, 1, b'ENSG00000126070'),\n", " ('chr1', 0, 1, b'ENSG00000271554'),\n", " ('chr1', 0, 1, b'ENSG00000232862'),\n", " ('chr1', 0, 1, b'ENSG00000092850'),\n", " ('chr1', 0, 1, b'ENSG00000116863'),\n", " ('chr1', 0, 1, b'ENSG00000171812'),\n", " ('chr1', 0, 1, b'ENSG00000054116'),\n", " ('chr1', 0, 1, b'ENSG00000116871'),\n", " ('chr1', 0, 1, b'ENSG00000054118'),\n", " ('chr1', 0, 1, b'ENSG00000214193'),\n", " ('chr1', 0, 1, b'ENSG00000142694'),\n", " ('chr1', 0, 1, b'ENSG00000116883'),\n", " ('chr1', 0, 1, b'ENSG00000196182'),\n", " ('chr1', 0, 1, b'ENSG00000181817'),\n", " ('chr1', 0, 1, b'ENSG00000116885'),\n", " ('chr1', 0, 1, b'ENSG00000201448'),\n", " ('chr1', 0, 1, b'ENSG00000116898'),\n", " ('chr1', 0, 1, b'ENSG00000119535'),\n", " ('chr1', 0, 1, b'ENSG00000163873'),\n", " ('chr1', 0, 1, b'ENSG00000233621'),\n", " ('chr1', 0, 1, b'ENSG00000163874'),\n", " ('chr1', 0, 1, b'ENSG00000163875'),\n", " ('chr1', 0, 1, b'ENSG00000263675'),\n", " ('chr1', 0, 1, b'ENSG00000163877'),\n", " ('chr1', 0, 1, b'ENSG00000232273'),\n", " ('chr1', 0, 1, b'ENSG00000237749'),\n", " ('chr1', 0, 1, b'ENSG00000163879'),\n", " ('chr1', 0, 1, b'ENSG00000134697'),\n", " ('chr1', 0, 1, b'ENSG00000169218'),\n", " ('chr1', 0, 1, b'ENSG00000116922'),\n", " ('chr1', 0, 1, b'ENSG00000134690'),\n", " ('chr1', 0, 1, b'ENSG00000183317'),\n", " ('chr1', 0, 1, b'ENSG00000185090'),\n", " ('chr1', 0, 1, b'ENSG00000196449'),\n", " ('chr1', 0, 1, b'ENSG00000197982'),\n", " ('chr1', 0, 1, b'ENSG00000188786'),\n", " ('chr1', 0, 1, b'ENSG00000204084'),\n", " ('chr1', 0, 1, b'ENSG00000183431'),\n", " ('chr1', 0, 1, b'ENSG00000212541'),\n", " ('chr1', 0, 1, b'ENSG00000183386'),\n", " ('chr1', 0, 1, b'ENSG00000183520'),\n", " ('chr1', 0, 1, b'ENSG00000185668'),\n", " ('chr1', 0, 1, b'ENSG00000215895'),\n", " ('chr1', 0, 1, b'ENSG00000116954'),\n", " ('chr1', 0, 1, b'ENSG00000228436'),\n", " ('chr1', 0, 1, b'ENSG00000214114'),\n", " ('chr1', 0, 1, b'ENSG00000158315'),\n", " ('chr1', 0, 1, b'ENSG00000174574'),\n", " ('chr1', 0, 1, b'ENSG00000168653'),\n", " ('chr1', 0, 1, b'ENSG00000127603'),\n", " ('chr1', 0, 1, b'ENSG00000183682'),\n", " ('chr1', 0, 1, b'ENSG00000237624'),\n", " ('chr1', 0, 1, b'ENSG00000243970'),\n", " ('chr1', 0, 1, b'ENSG00000090621'),\n", " ('chr1', 0, 1, b'ENSG00000228060'),\n", " ('chr1', 0, 1, b'ENSG00000201457'),\n", " ('chr1', 0, 1, b'ENSG00000163909'),\n", " ('chr1', 0, 1, b'ENSG00000116981'),\n", " ('chr1', 0, 1, b'ENSG00000116983'),\n", " ('chr1', 0, 1, b'ENSG00000084072'),\n", " ('chr1', 0, 1, b'ENSG00000116985'),\n", " ('chr1', 0, 1, b'ENSG00000198754'),\n", " ('chr1', 0, 1, b'ENSG00000284719'),\n", " ('chr1', 0, 1, b'ENSG00000043514'),\n", " ('chr1', 0, 1, b'ENSG00000116990'),\n", " ('chr1', 0, 1, b'ENSG00000168389'),\n", " ('chr1', 0, 1, b'ENSG00000228477'),\n", " ('chr1', 0, 1, b'ENSG00000131236'),\n", " ('chr1', 0, 1, b'ENSG00000131238'),\n", " ('chr1', 0, 1, b'ENSG00000117000'),\n", " ('chr1', 0, 1, b'ENSG00000259943'),\n", " ('chr1', 0, 1, b'ENSG00000084073'),\n", " ('chr1', 0, 1, b'ENSG00000049089'),\n", " ('chr1', 0, 1, b'ENSG00000227311'),\n", " ('chr1', 0, 1, b'ENSG00000084070'),\n", " ('chr1', 0, 1, b'ENSG00000187801'),\n", " ('chr1', 0, 1, b'ENSG00000260920'),\n", " ('chr1', 0, 1, b'ENSG00000279667'),\n", " ('chr1', 0, 1, b'ENSG00000187815'),\n", " ('chr1', 0, 1, b'ENSG00000238287'),\n", " ('chr1', 0, 1, b'ENSG00000164002'),\n", " ('chr1', 0, 1, b'ENSG00000238186'),\n", " ('chr1', 0, 1, b'ENSG00000117010'),\n", " ('chr1', 0, 1, b'ENSG00000117016'),\n", " ('chr1', 0, 1, b'ENSG00000272145'),\n", " ('chr1', 0, 1, b'ENSG00000066136'),\n", " ('chr1', 0, 1, b'ENSG00000117013'),\n", " ('chr1', 0, 1, b'ENSG00000179862'),\n", " ('chr1', 0, 1, b'ENSG00000171793'),\n", " ('chr1', 0, 1, b'ENSG00000281207'),\n", " ('chr1', 0, 1, b'ENSG00000171790'),\n", " ('chr1', 0, 1, b'ENSG00000010803'),\n", " ('chr1', 0, 1, b'ENSG00000127129'),\n", " ('chr1', 0, 1, b'ENSG00000127124'),\n", " ('chr1', 0, 1, b'ENSG00000230638'),\n", " ('chr1', 0, 1, b'ENSG00000198815'),\n", " ('chr1', 0, 1, b'ENSG00000227527'),\n", " ('chr1', 0, 1, b'ENSG00000177181'),\n", " ('chr1', 0, 1, b'ENSG00000066185'),\n", " ('chr1', 0, 1, b'ENSG00000127125'),\n", " ('chr1', 0, 1, b'ENSG00000186409'),\n", " ('chr1', 0, 1, b'ENSG00000236876'),\n", " ('chr1', 0, 1, b'ENSG00000171960'),\n", " ('chr1', 0, 1, b'ENSG00000234917'),\n", " ('chr1', 0, 1, b'ENSG00000065978'),\n", " ('chr1', 0, 1, b'ENSG00000117385'),\n", " ('chr1', 0, 1, b'ENSG00000164008'),\n", " ('chr1', 0, 1, b'ENSG00000274386'),\n", " ('chr1', 0, 1, b'ENSG00000177868'),\n", " ('chr1', 0, 1, b'ENSG00000164010'),\n", " ('chr1', 0, 1, b'ENSG00000228192'),\n", " ('chr1', 0, 1, b'ENSG00000164011'),\n", " ('chr1', 0, 1, b'ENSG00000117394'),\n", " ('chr1', 0, 1, b'ENSG00000284138'),\n", " ('chr1', 0, 1, b'ENSG00000227533'),\n", " ('chr1', 0, 1, b'ENSG00000186973'),\n", " ('chr1', 0, 1, b'ENSG00000117395'),\n", " ('chr1', 0, 1, b'ENSG00000243710'),\n", " ('chr1', 0, 1, b'ENSG00000179178'),\n", " ('chr1', 0, 1, b'ENSG00000253313'),\n", " ('chr1', 0, 1, b'ENSG00000066056'),\n", " ('chr1', 0, 1, b'ENSG00000117400'),\n", " ('chr1', 0, 1, b'ENSG00000234694'),\n", " ('chr1', 0, 1, b'ENSG00000117399'),\n", " ('chr1', 0, 1, b'ENSG00000066322'),\n", " ('chr1', 0, 1, b'ENSG00000159479'),\n", " ('chr1', 0, 1, b'ENSG00000229431'),\n", " ('chr1', 0, 1, b'ENSG00000198198'),\n", " ('chr1', 0, 1, b'ENSG00000142949'),\n", " ('chr1', 0, 1, b'ENSG00000066135'),\n", " ('chr1', 0, 1, b'ENSG00000236200'),\n", " ('chr1', 0, 1, b'ENSG00000126091'),\n", " ('chr1', 0, 1, b'ENSG00000117407'),\n", " ('chr1', 0, 1, b'ENSG00000237950'),\n", " ('chr1', 0, 1, b'ENSG00000117408'),\n", " ('chr1', 0, 1, b'ENSG00000132768'),\n", " ('chr1', 0, 1, b'ENSG00000117410'),\n", " ('chr1', 0, 1, b'ENSG00000117411'),\n", " ('chr1', 0, 1, b'ENSG00000159214'),\n", " ('chr1', 0, 1, b'ENSG00000196517'),\n", " ('chr1', 0, 1, b'ENSG00000230615'),\n", " ('chr1', 0, 1, b'ENSG00000178028'),\n", " ('chr1', 0, 1, b'ENSG00000117419'),\n", " ('chr1', 0, 1, b'ENSG00000233602'),\n", " ('chr1', 0, 1, b'ENSG00000187147'),\n", " ('chr1', 0, 1, b'ENSG00000126106'),\n", " ('chr1', 0, 1, b'ENSG00000198520'),\n", " ('chr1', 0, 1, b'ENSG00000199377'),\n", " ('chr1', 0, 1, b'ENSG00000200169'),\n", " ('chr1', 0, 1, b'ENSG00000142945'),\n", " ('chr1', 0, 1, b'ENSG00000142937'),\n", " ('chr1', 0, 1, b'ENSG00000200913'),\n", " ('chr1', 0, 1, b'ENSG00000234093'),\n", " ('chr1', 0, 1, b'ENSG00000142959'),\n", " ('chr1', 0, 1, b'ENSG00000173846'),\n", " ('chr1', 0, 1, b'ENSG00000188396'),\n", " ('chr1', 0, 1, b'ENSG00000222009'),\n", " ('chr1', 0, 1, b'ENSG00000117425'),\n", " ('chr1', 0, 1, b'ENSG00000226499'),\n", " ('chr1', 0, 1, b'ENSG00000070785'),\n", " ('chr1', 0, 1, b'ENSG00000126107'),\n", " ('chr1', 0, 1, b'ENSG00000126088'),\n", " ('chr1', 0, 1, b'ENSG00000162415'),\n", " ('chr1', 0, 1, b'ENSG00000281912'),\n", " ('chr1', 0, 1, b'ENSG00000186603'),\n", " ('chr1', 0, 1, b'ENSG00000132781'),\n", " ('chr1', 0, 1, b'ENSG00000132773'),\n", " ('chr1', 0, 1, b'ENSG00000070759'),\n", " ('chr1', 0, 1, b'ENSG00000280670'),\n", " ('chr1', 0, 1, b'ENSG00000132763'),\n", " ('chr1', 0, 1, b'ENSG00000117450'),\n", " ('chr1', 0, 1, b'ENSG00000117448'),\n", " ('chr1', 0, 1, b'ENSG00000132780'),\n", " ('chr1', 0, 1, b'ENSG00000159588'),\n", " ('chr1', 0, 1, b'ENSG00000159592'),\n", " ('chr1', 0, 1, b'ENSG00000225447'),\n", " ('chr1', 0, 1, b'ENSG00000234329'),\n", " ('chr1', 0, 1, b'ENSG00000159596'),\n", " ('chr1', 0, 1, b'ENSG00000197429'),\n", " ('chr1', 0, 1, b'ENSG00000230896'),\n", " ('chr1', 0, 1, b'ENSG00000086015'),\n", " ('chr1', 0, 1, b'ENSG00000117461'),\n", " ('chr1', 0, 1, b'ENSG00000117472'),\n", " ('chr1', 0, 1, b'ENSG00000085998'),\n", " ('chr1', 0, 1, b'ENSG00000171357'),\n", " ('chr1', 0, 1, b'ENSG00000085999'),\n", " ('chr1', 0, 1, b'ENSG00000132128'),\n", " ('chr1', 0, 1, b'ENSG00000173660'),\n", " ('chr1', 0, 1, b'ENSG00000117481'),\n", " ('chr1', 0, 1, b'ENSG00000117480'),\n", " ('chr1', 0, 1, b'ENSG00000232022'),\n", " ('chr1', 0, 1, b'ENSG00000269956'),\n", " ('chr1', 0, 1, b'ENSG00000079277'),\n", " ('chr1', 0, 1, b'ENSG00000142961'),\n", " ('chr1', 0, 1, b'ENSG00000123472'),\n", " ('chr1', 0, 1, b'ENSG00000228237'),\n", " ('chr1', 0, 1, b'ENSG00000159658'),\n", " ('chr1', 0, 1, b'ENSG00000142973'),\n", " ('chr1', 0, 1, b'ENSG00000187048'),\n", " ('chr1', 0, 1, b'ENSG00000186377'),\n", " ('chr1', 0, 1, b'ENSG00000186160'),\n", " ('chr1', 0, 1, b'ENSG00000225506'),\n", " ('chr1', 0, 1, b'ENSG00000162365'),\n", " ('chr1', 0, 1, b'ENSG00000224805'),\n", " ('chr1', 0, 1, b'ENSG00000162366'),\n", " ('chr1', 0, 1, b'ENSG00000162367'),\n", " ('chr1', 0, 1, b'ENSG00000226252'),\n", " ('chr1', 0, 1, b'ENSG00000123473'),\n", " ('chr1', 0, 1, b'ENSG00000162368'),\n", " ('chr1', 0, 1, b'ENSG00000225762'),\n", " ('chr1', 0, 1, b'ENSG00000237424'),\n", " ('chr1', 0, 1, b'ENSG00000186564'),\n", " ('chr1', 0, 1, b'ENSG00000269113'),\n", " ('chr1', 0, 1, b'ENSG00000117834'),\n", " ('chr1', 0, 1, b'ENSG00000272491'),\n", " ('chr1', 0, 1, b'ENSG00000132122'),\n", " ('chr1', 0, 1, b'ENSG00000224986'),\n", " ('chr1', 0, 1, b'ENSG00000235105'),\n", " ('chr1', 0, 1, b'ENSG00000186094'),\n", " ('chr1', 0, 1, b'ENSG00000162373'),\n", " ('chr1', 0, 1, b'ENSG00000162374'),\n", " ('chr1', 0, 1, b'ENSG00000185104'),\n", " ('chr1', 0, 1, b'ENSG00000225767'),\n", " ('chr1', 0, 1, b'ENSG00000230585'),\n", " ('chr1', 0, 1, b'ENSG00000123080'),\n", " ('chr1', 0, 1, b'ENSG00000123091'),\n", " ('chr1', 0, 1, b'ENSG00000233406'),\n", " ('chr1', 0, 1, b'ENSG00000236434'),\n", " ('chr1', 0, 1, b'ENSG00000085831'),\n", " ('chr1', 0, 1, b'ENSG00000085832'),\n", " ('chr1', 0, 1, b'ENSG00000232027'),\n", " ('chr1', 0, 1, b'ENSG00000238140'),\n", " ('chr1', 0, 1, b'ENSG00000227070'),\n", " ('chr1', 0, 1, b'ENSG00000227742'),\n", " ('chr1', 0, 1, b'ENSG00000117859'),\n", " ('chr1', 0, 1, b'ENSG00000078618'),\n", " ('chr1', 0, 1, b'ENSG00000266993'),\n", " ('chr1', 0, 1, b'ENSG00000169213'),\n", " ('chr1', 0, 1, b'ENSG00000117862'),\n", " ('chr1', 0, 1, b'ENSG00000198841'),\n", " ('chr1', 0, 1, b'ENSG00000228369'),\n", " ('chr1', 0, 1, b'ENSG00000134717'),\n", " ('chr1', 0, 1, b'ENSG00000157077'),\n", " ('chr1', 0, 1, b'ENSG00000228407'),\n", " ('chr1', 0, 1, b'ENSG00000154222'),\n", " ('chr1', 0, 1, b'ENSG00000224680'),\n", " ('chr1', 0, 1, b'ENSG00000085840'),\n", " ('chr1', 0, 1, b'ENSG00000134748'),\n", " ('chr1', 0, 1, b'ENSG00000134744'),\n", " ('chr1', 0, 1, b'ENSG00000116157'),\n", " ('chr1', 0, 1, b'ENSG00000182183'),\n", " ('chr1', 0, 1, b'ENSG00000162377'),\n", " ('chr1', 0, 1, b'ENSG00000162378'),\n", " ('chr1', 0, 1, b'ENSG00000228929'),\n", " ('chr1', 0, 1, b'ENSG00000203995'),\n", " ('chr1', 0, 1, b'ENSG00000121310'),\n", " ('chr1', 0, 1, b'ENSG00000230953'),\n", " ('chr1', 0, 1, b'ENSG00000116171'),\n", " ('chr1', 0, 1, b'ENSG00000174348'),\n", " ('chr1', 0, 1, b'ENSG00000162383'),\n", " ('chr1', 0, 1, b'ENSG00000157184'),\n", " ('chr1', 0, 1, b'ENSG00000162384'),\n", " ('chr1', 0, 1, b'ENSG00000259818'),\n", " ('chr1', 0, 1, b'ENSG00000162385'),\n", " ('chr1', 0, 1, b'ENSG00000226754'),\n", " ('chr1', 0, 1, b'ENSG00000157193'),\n", " ('chr1', 0, 1, b'ENSG00000225675'),\n", " ('chr1', 0, 1, b'ENSG00000230138'),\n", " ('chr1', 0, 1, b'ENSG00000174332'),\n", " ('chr1', 0, 1, b'ENSG00000058804'),\n", " ('chr1', 0, 1, b'ENSG00000058799'),\n", " ('chr1', 0, 1, b'ENSG00000211452'),\n", " ('chr1', 0, 1, b'ENSG00000081870'),\n", " ('chr1', 0, 1, b'ENSG00000116212'),\n", " ('chr1', 0, 1, b'ENSG00000219102'),\n", " ('chr1', 0, 1, b'ENSG00000116209'),\n", " ('chr1', 0, 1, b'ENSG00000280378'),\n", " ('chr1', 0, 1, b'ENSG00000116205'),\n", " ('chr1', 0, 1, b'ENSG00000215883'),\n", " ('chr1', 0, 1, b'ENSG00000116221'),\n", " ('chr1', 0, 1, b'ENSG00000157216'),\n", " ('chr1', 0, 1, b'ENSG00000198711'),\n", " ('chr1', 0, 1, b'ENSG00000225632'),\n", " ('chr1', 0, 1, b'ENSG00000162390'),\n", " ('chr1', 0, 1, b'ENSG00000162391'),\n", " ('chr1', 0, 1, b'ENSG00000184313'),\n", " ('chr1', 0, 1, b'ENSG00000243725'),\n", " ('chr1', 0, 1, b'ENSG00000162396'),\n", " ('chr1', 0, 1, b'ENSG00000006555'),\n", " ('chr1', 0, 1, b'ENSG00000162398'),\n", " ('chr1', 0, 1, b'ENSG00000116133'),\n", " ('chr1', 0, 1, b'ENSG00000169174'),\n", " ('chr1', 0, 1, b'ENSG00000162402'),\n", " ('chr1', 0, 1, b'ENSG00000234810'),\n", " ('chr1', 0, 1, b'ENSG00000260971'),\n", " ('chr1', 0, 1, b'ENSG00000162407'),\n", " ('chr1', 0, 1, b'ENSG00000162409'),\n", " ('chr1', 0, 1, b'ENSG00000187889'),\n", " ('chr1', 0, 1, b'ENSG00000173406'),\n", " ('chr1', 0, 1, b'ENSG00000162600'),\n", " ('chr1', 0, 1, b'ENSG00000184292'),\n", " ('chr1', 0, 1, b'ENSG00000185839'),\n", " ('chr1', 0, 1, b'ENSG00000162601'),\n", " ('chr1', 0, 1, b'ENSG00000283445'),\n", " ('chr1', 0, 1, b'ENSG00000177606'),\n", " ('chr1', 0, 1, b'ENSG00000234807'),\n", " ('chr1', 0, 1, b'ENSG00000272226'),\n", " ('chr1', 0, 1, b'ENSG00000232453'),\n", " ('chr1', 0, 1, b'ENSG00000237352'),\n", " ('chr1', 0, 1, b'ENSG00000172456'),\n", " ('chr1', 0, 1, b'ENSG00000134709'),\n", " ('chr1', 0, 1, b'ENSG00000134716'),\n", " ('chr1', 0, 1, b'ENSG00000162598'),\n", " ('chr1', 0, 1, b'ENSG00000226476'),\n", " ('chr1', 0, 1, b'ENSG00000231252'),\n", " ('chr1', 0, 1, b'ENSG00000162599'),\n", " ('chr1', 0, 1, b'ENSG00000237928'),\n", " ('chr1', 0, 1, b'ENSG00000270742'),\n", " ('chr1', 0, 1, b'ENSG00000237853'),\n", " ('chr1', 0, 1, b'ENSG00000162604'),\n", " ('chr1', 0, 1, b'ENSG00000132849'),\n", " ('chr1', 0, 1, b'ENSG00000240563'),\n", " ('chr1', 0, 1, b'ENSG00000132854'),\n", " ('chr1', 0, 1, b'ENSG00000162607'),\n", " ('chr1', 0, 1, b'ENSG00000116641'),\n", " ('chr1', 0, 1, b'ENSG00000132855'),\n", " ('chr1', 0, 1, b'ENSG00000278967'),\n", " ('chr1', 0, 1, b'ENSG00000235545'),\n", " ('chr1', 0, 1, b'ENSG00000125703'),\n", " ('chr1', 0, 1, b'ENSG00000088035'),\n", " ('chr1', 0, 1, b'ENSG00000142856'),\n", " ('chr1', 0, 1, b'ENSG00000203965'),\n", " ('chr1', 0, 1, b'ENSG00000116652'),\n", " ('chr1', 0, 1, b'ENSG00000079739'),\n", " ('chr1', 0, 1, b'ENSG00000185483'),\n", " ('chr1', 0, 1, b'ENSG00000223949'),\n", " ('chr1', 0, 1, b'ENSG00000158966'),\n", " ('chr1', 0, 1, b'ENSG00000162437'),\n", " ('chr1', 0, 1, b'ENSG00000162434'),\n", " ('chr1', 0, 1, b'ENSG00000234784'),\n", " ('chr1', 0, 1, b'ENSG00000233877'),\n", " ('chr1', 0, 1, b'ENSG00000226891'),\n", " ('chr1', 0, 1, b'ENSG00000185031'),\n", " ('chr1', 0, 1, b'ENSG00000272506'),\n", " ('chr1', 0, 1, b'ENSG00000265996'),\n", " ('chr1', 0, 1, b'ENSG00000231485'),\n", " ('chr1', 0, 1, b'ENSG00000162433'),\n", " ('chr1', 0, 1, b'ENSG00000116675'),\n", " ('chr1', 0, 1, b'ENSG00000213625'),\n", " ('chr1', 0, 1, b'ENSG00000116678'),\n", " ('chr1', 0, 1, b'ENSG00000184588'),\n", " ('chr1', 0, 1, b'ENSG00000118473'),\n", " ('chr1', 0, 1, b'ENSG00000248458'),\n", " ('chr1', 0, 1, b'ENSG00000152760'),\n", " ('chr1', 0, 1, b'ENSG00000152763'),\n", " ('chr1', 0, 1, b'ENSG00000198160'),\n", " ('chr1', 0, 1, b'ENSG00000116704'),\n", " ('chr1', 0, 1, b'ENSG00000275678'),\n", " ('chr1', 0, 1, b'ENSG00000162594'),\n", " ('chr1', 0, 1, b'ENSG00000081985'),\n", " ('chr1', 0, 1, b'ENSG00000142864'),\n", " ('chr1', 0, 1, b'ENSG00000223263'),\n", " ('chr1', 0, 1, b'ENSG00000116717'),\n", " ('chr1', 0, 1, b'ENSG00000172380'),\n", " ('chr1', 0, 1, b'ENSG00000232284'),\n", " ('chr1', 0, 1, b'ENSG00000162595'),\n", " ('chr1', 0, 1, b'ENSG00000116729'),\n", " ('chr1', 0, 1, b'ENSG00000234383'),\n", " ('chr1', 0, 1, b'ENSG00000229133'),\n", " ('chr1', 0, 1, b'ENSG00000116745'),\n", " ('chr1', 0, 1, b'ENSG00000024526'),\n", " ('chr1', 0, 1, b'ENSG00000033122'),\n", " ('chr1', 0, 1, b'ENSG00000066557'),\n", " ('chr1', 0, 1, b'ENSG00000116754'),\n", " ('chr1', 0, 1, b'ENSG00000118454'),\n", " ('chr1', 0, 1, b'ENSG00000197568'),\n", " ('chr1', 0, 1, b'ENSG00000116761'),\n", " ('chr1', 0, 1, b'ENSG00000271992'),\n", " ('chr1', 0, 1, b'ENSG00000050628'),\n", " ('chr1', 0, 1, b'ENSG00000235079'),\n", " ('chr1', 0, 1, b'ENSG00000132485'),\n", " ('chr1', 0, 1, b'ENSG00000207721'),\n", " ('chr1', 0, 1, b'ENSG00000229956'),\n", " ('chr1', 0, 1, b'ENSG00000172260'),\n", " ('chr1', 0, 1, b'ENSG00000233994'),\n", " ('chr1', 0, 1, b'ENSG00000162620'),\n", " ('chr1', 0, 1, b'ENSG00000254685'),\n", " ('chr1', 0, 1, b'ENSG00000259030'),\n", " ('chr1', 0, 1, b'ENSG00000116783'),\n", " ('chr1', 0, 1, b'ENSG00000162621'),\n", " ('chr1', 0, 1, b'ENSG00000178965'),\n", " ('chr1', 0, 1, b'ENSG00000272864'),\n", " ('chr1', 0, 1, b'ENSG00000116791'),\n", " ('chr1', 0, 1, b'ENSG00000162623'),\n", " ('chr1', 0, 1, b'ENSG00000137968'),\n", " ('chr1', 0, 1, b'ENSG00000117054'),\n", " ('chr1', 0, 1, b'ENSG00000181227'),\n", " ('chr1', 0, 1, b'ENSG00000137955'),\n", " ('chr1', 0, 1, b'ENSG00000057468'),\n", " ('chr1', 0, 1, b'ENSG00000184005'),\n", " ('chr1', 0, 1, b'ENSG00000226415'),\n", " ('chr1', 0, 1, b'ENSG00000117069'),\n", " ('chr1', 0, 1, b'ENSG00000230498'),\n", " ('chr1', 0, 1, b'ENSG00000142892'),\n", " ('chr1', 0, 1, b'ENSG00000226084'),\n", " ('chr1', 0, 1, b'ENSG00000154027'),\n", " ('chr1', 0, 1, b'ENSG00000036549'),\n", " ('chr1', 0, 1, b'ENSG00000077254'),\n", " ('chr1', 0, 1, b'ENSG00000180488'),\n", " ('chr1', 0, 1, b'ENSG00000219201'),\n", " ('chr1', 0, 1, b'ENSG00000235613'),\n", " ('chr1', 0, 1, b'ENSG00000235927'),\n", " ('chr1', 0, 1, b'ENSG00000162614'),\n", " ('chr1', 0, 1, b'ENSG00000162613'),\n", " ('chr1', 0, 1, b'ENSG00000162616'),\n", " ('chr1', 0, 1, b'ENSG00000137960'),\n", " ('chr1', 0, 1, b'ENSG00000273338'),\n", " ('chr1', 0, 1, b'ENSG00000122420'),\n", " ...]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jac_exp.cut_intervals" ] }, { "cell_type": "code", "execution_count": 52, "id": "89d45039", "metadata": {}, "outputs": [], "source": [ "df_gene_chr = df_gene_chr.pivot_table(index='gene', columns='chrom', values='val', aggfunc='sum')" ] }, { "cell_type": "code", "execution_count": 15, "id": "ec5321dc", "metadata": {}, "outputs": [], "source": [ "df_gene_chr = df_gene_chr.fillna(0)" ] }, { "cell_type": "code", "execution_count": 17, "id": "364fa61b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(24243, 24243)\n", "(24243, 23)\n", "0.9565217391304348\n", "0.0\n" ] } ], "source": [ "df_2d_jac, go_chrom = run_egad(df_gene_chr, df_exp_corr)" ] }, { "cell_type": "code", "execution_count": 1, "id": "3f27268e", "metadata": {}, "outputs": [], "source": [ "resolution_human = 10000\n", "species = \"human\"\n", "SRP_name = \"aggregates\"\n", "resolution = \"10kbp_raw\"" ] }, { "cell_type": "code", "execution_count": 42, "id": "aa3f2647", "metadata": {}, "outputs": [], "source": [ " input_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_{species}/{SRP_name}/{resolution}/max/'\n", " bins_bed = pd.read_csv(f'{input_path}/all_bins.bed', names=['chr', 'start', 'end', 'bin_id'])" ] }, { "cell_type": "code", "execution_count": 45, "id": "0b9ea825", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | chr | \n", "start | \n", "end | \n", "bin_id | \n", "
---|---|---|---|---|
0 | \n", "chr1 | \n", "0 | \n", "10000 | \n", "1 | \n", "
1 | \n", "chr1 | \n", "10000 | \n", "20000 | \n", "1 | \n", "
2 | \n", "chr1 | \n", "20000 | \n", "30000 | \n", "1 | \n", "
3 | \n", "chr1 | \n", "30000 | \n", "40000 | \n", "1 | \n", "
4 | \n", "chr1 | \n", "40000 | \n", "50000 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
287504 | \n", "chr22 | \n", "50770000 | \n", "50780000 | \n", "1 | \n", "
287505 | \n", "chr22 | \n", "50780000 | \n", "50790000 | \n", "1 | \n", "
287506 | \n", "chr22 | \n", "50790000 | \n", "50800000 | \n", "1 | \n", "
287507 | \n", "chr22 | \n", "50800000 | \n", "50810000 | \n", "1 | \n", "
287508 | \n", "chr22 | \n", "50810000 | \n", "50818468 | \n", "1 | \n", "
287509 rows × 4 columns
\n", "\n", " | AUC | \n", "AVG_NODE_DEGREE | \n", "DEGREE_NULL_AUC | \n", "P_Value | \n", "
---|---|---|---|---|
chrom | \n", "\n", " | \n", " | \n", " | \n", " |
chr1 | \n", "0.548086 | \n", "12339.842509 | \n", "0.500163 | \n", "1.654169e-16 | \n", "
chr10 | \n", "0.586479 | \n", "12438.598510 | \n", "0.509500 | \n", "1.143218e-20 | \n", "
chr11 | \n", "0.574297 | \n", "12373.596395 | \n", "0.504759 | \n", "1.818544e-20 | \n", "
chr12 | \n", "0.556375 | \n", "12376.406448 | \n", "0.504662 | \n", "8.466436e-13 | \n", "
chr13 | \n", "0.667056 | \n", "12156.259454 | \n", "0.491709 | \n", "1.109736e-36 | \n", "
chr14 | \n", "0.562514 | \n", "12193.342382 | \n", "0.497277 | \n", "7.912834e-10 | \n", "
chr15 | \n", "0.594549 | \n", "12143.086516 | \n", "0.487864 | \n", "1.052527e-21 | \n", "
chr16 | \n", "0.706183 | \n", "12335.161854 | \n", "0.499546 | \n", "8.773301e-126 | \n", "
chr17 | \n", "0.666923 | \n", "12595.475912 | \n", "0.516480 | \n", "1.190610e-102 | \n", "
chr18 | \n", "0.662012 | \n", "11630.005105 | \n", "0.458888 | \n", "1.417273e-29 | \n", "
chr19 | \n", "0.743358 | \n", "12796.638458 | \n", "0.529496 | \n", "5.206787e-237 | \n", "
chr2 | \n", "0.583047 | \n", "12288.538649 | \n", "0.501277 | \n", "2.970102e-31 | \n", "
chr20 | \n", "0.626716 | \n", "12350.859676 | \n", "0.506664 | \n", "2.054682e-29 | \n", "
chr21 | \n", "0.606783 | \n", "11578.942705 | \n", "0.451534 | \n", "5.082539e-10 | \n", "
chr22 | \n", "0.740890 | \n", "12264.462411 | \n", "0.493158 | \n", "1.192274e-92 | \n", "
chr3 | \n", "0.580678 | \n", "12552.541917 | \n", "0.518726 | \n", "4.347483e-25 | \n", "
chr4 | \n", "0.688013 | \n", "12121.824832 | \n", "0.487091 | \n", "3.659621e-87 | \n", "
chr5 | \n", "0.620566 | \n", "11982.817831 | \n", "0.479412 | \n", "4.853560e-45 | \n", "
chr6 | \n", "0.636830 | \n", "11747.767883 | \n", "0.467278 | \n", "4.285290e-56 | \n", "
chr7 | \n", "0.593755 | \n", "12443.340673 | \n", "0.507129 | \n", "4.118314e-29 | \n", "
chr8 | \n", "0.634348 | \n", "11803.715731 | \n", "0.465617 | \n", "4.540686e-41 | \n", "
chr9 | \n", "0.574253 | \n", "12519.524911 | \n", "0.513562 | \n", "2.091948e-15 | \n", "
chrX | \n", "0.693078 | \n", "12553.542469 | \n", "0.513710 | \n", "5.165830e-77 | \n", "