{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "\n", "def run_egad(go, nw, **kwargs):\n", " \"\"\"EGAD running function\n", " \n", " Wrapper to lower level functions for EGAD\n", "\n", " EGAD measures modularity of gene lists in co-expression networks. \n", "\n", " This was translated from the MATLAB version, which does tiled Cross Validation\n", " \n", " The useful kwargs are:\n", " int - nFold : Number of CV folds to do, default is 3, \n", " int - {min,max}_count : limits for number of terms in each gene list, these are exclusive values\n", "\n", "\n", " Arguments:\n", " go {pd.DataFrame} -- dataframe of genes x terms of values [0,1], where 1 is included in gene lists\n", " nw {pd.DataFrame} -- dataframe of co-expression network, genes x genes\n", " **kwargs \n", " \n", " Returns:\n", " pd.DataFrame -- dataframe of terms x metrics where the metrics are \n", " ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " \"\"\"\n", " assert nw.shape[0] == nw.shape[1] , 'Network is not square'\n", " #print(nw.index)\n", " #nw.columns = nw.columns.astype(int)\n", " #print(nw.columns.astype(int))\n", " assert np.all(nw.index == nw.columns) , 'Network index and columns are not in the same order'\n", "\n", " #nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " #nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " #np.fill_diagonal(nw.values, 1)\n", " return _runNV(go, nw, **kwargs)\n", "\n", "def _runNV(go, nw, nFold=3, min_count=20, max_count=1000):\n", "\n", " #Make sure genes are same in go and nw\n", " #go.index = go.index.map(str) \n", " #nw.index = nw.index.map(str)\n", " #nw.index = nw.index.str.replace('_', '')\n", " #go.index = go.index.str.replace('_', '')\n", " #print (nw)\n", " genes_intersect = go.index.intersection(nw.index)\n", "\n", "\n", " #print (genes_intersect)\n", " go = go.loc[genes_intersect, :]\n", " nw = nw.loc[genes_intersect, genes_intersect]\n", " #print (go)\n", " print (nw.shape)\n", " print (go.shape)\n", " sparsity = 1.0 - np.count_nonzero(go) / go.size\n", " print (sparsity)\n", " sparsity = 1.0 - np.count_nonzero(nw) / nw.size\n", " print (sparsity)\n", " #print(nw\n", " #print(go\n", " nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " np.fill_diagonal(nw.values, 1)\n", " #Make sure there aren't duplicates\n", " duplicates = nw.index.duplicated(keep='first')\n", " nw = nw.loc[~duplicates, ~duplicates]\n", "\n", " go = go.loc[:, (go.sum(axis=0) > min_count) & (go.sum(axis=0) < max_count)]\n", " go = go.loc[~go.index.duplicated(keep='first'), :]\n", " #print(go)\n", "\n", " roc = _new_egad(go.values, nw.values, nFold)\n", "\n", " col_names = ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " #Put output in dataframe\n", " return pd.DataFrame(dict(zip(col_names, roc)), index=go.columns)\n", "\n", "def _new_egad(go, nw, nFold):\n", "\n", " #Build Cross validated Positive\n", " x, y = np.where(go)\n", " #print(x, y)\n", " cvgo = {}\n", " for i in np.arange(nFold):\n", " a = x[i::nFold]\n", " #print(a)\n", " b = y[i::nFold]\n", " dat = np.ones_like(a)\n", " mask = sparse.coo_matrix((dat, (a, b)), shape=go.shape)\n", " cvgo[i] = go - mask.toarray()\n", "\n", " CVgo = np.concatenate(list(cvgo.values()), axis=1)\n", " #print(CVgo)\n", "\n", " sumin = np.matmul(nw.T, CVgo)\n", "\n", " degree = np.sum(nw, axis=0)\n", " #print(degree)\n", " #print(degree[:, None])\n", "\n", " predicts = sumin / degree[:, None]\n", " #print(predicts)\n", "\n", " np.place(predicts, CVgo > 0, np.nan)\n", "\n", " #print(predicts)\n", "\n", " #Calculate ranks of positives\n", " rank_abs = lambda x: stats.rankdata(np.abs(x))\n", " predicts2 = np.apply_along_axis(rank_abs, 0, predicts)\n", " #print(predicts2)\n", "\n", " #Masking Nans that were ranked (how tiedrank works in matlab)\n", " predicts2[np.isnan(predicts)] = np.nan\n", " #print(predicts2)\n", "\n", " filtering = np.tile(go, nFold)\n", " #print(filtering)\n", "\n", " #negatives :filtering == 0\n", " #Sets Ranks of negatives to 0\n", " np.place(predicts2, filtering == 0, 0)\n", "\n", " #Sum of ranks for each prediction\n", " p = bottleneck.nansum(predicts2, axis=0)\n", " n_p = np.sum(filtering, axis=0) - np.sum(CVgo, axis=0)\n", "\n", " #Number of negatives\n", " #Number of GO terms - number of postiive\n", " n_n = filtering.shape[0] - np.sum(filtering, axis=0)\n", "\n", " roc = (p / n_p - (n_p + 1) / 2) / n_n\n", " U = roc * n_p * n_n\n", " Z = (np.abs(U - (n_p * n_n / 2))) / np.sqrt(n_p * n_n *(n_p + n_n + 1) / 12)\n", " roc = roc.reshape(nFold, go.shape[1])\n", " Z = Z.reshape(nFold, go.shape[1])\n", " #Stouffer Z method\n", " Z = bottleneck.nansum(Z, axis=0) / np.sqrt(nFold)\n", " #Calc ROC of Neighbor Voting\n", " roc = bottleneck.nanmean(roc, axis=0)\n", " P = stats.norm.sf(Z)\n", "\n", " #Average degree for nodes in each go term\n", " avg_degree = degree.dot(go) / np.sum(go, axis=0)\n", "\n", " #Calc null auc for degree\n", " ranks = np.tile(stats.rankdata(degree), (go.shape[1], 1)).T\n", "\n", " np.place(ranks, go == 0, 0)\n", "\n", " n_p = bottleneck.nansum(go, axis=0)\n", " nn = go.shape[0] - n_p\n", " p = bottleneck.nansum(ranks, axis=0)\n", "\n", " roc_null = (p / n_p - ((n_p + 1) / 2)) / nn\n", " #print(roc)\n", " return roc, avg_degree, roc_null, P\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-05-18 21:02:42-- http://session.asuscomm.com/database.php\n", "Resolving session.asuscomm.com (session.asuscomm.com)... 178.55.16.55\n", "Connecting to session.asuscomm.com (session.asuscomm.com)|178.55.16.55|:80... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/html]\n", "Saving to: ‘database.php’\n", "\n", "database.php [ <=> ] 49.98K 177KB/s in 0.3s \n", "\n", "2022-05-18 21:02:42 (177 KB/s) - ‘database.php’ saved [51176]\n", "\n" ] } ], "source": [ "!wget http://session.asuscomm.com/database.php" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", " df = pd.read_csv('/grid/gillis/data/lohia/hi_c_data_processing/data_human/scType_marker_genes.csv', header=1, usecols=[0,1,2,3], skipfooter=2)\n" ] } ], "source": [ "import pandas as pd\n", "df = pd.read_csv('/grid/gillis/data/lohia/hi_c_data_processing/data_human/scType_marker_genes.csv', header=1, usecols=[0,1,2,3], skipfooter=2)\n", "\n", "\n", "tissue_type_list = df['Tissue'].drop_duplicates().tolist()\n", "\n", "all_gene_list = df['Marker genes'].str.cat(sep=',').split(\",\")\n", "\n", "data_tissue = df.groupby(['Tissue']).apply(lambda grp: grp['Marker genes'].str.cat(sep=',').split(\",\"))\n", "\n", "all_gene_list = list(set(all_gene_list))\n", "\n", "nested_gene_marker_tissue_list = []\n", "\n", "for i in tissue_type_list:\n", " gene_for_given_tissue = data[data.index ==i][0]\n", " particular_gene_tissue = [ 1 if x in gene_for_given_tissue else 0 for x in all_gene_list]\n", " nested_gene_marker_tissue_list.append(particular_gene_tissue)\n", "\n", " \n", "\n", "pd.DataFrame(nested_gene_marker_tissue_list, columns = all_gene_list, index=tissue_type_list)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "JSONDecodeError", "evalue": "Expecting value: line 1 column 1 (char 0)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mencoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_content_charset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'utf8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# JSON default\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;31m#print(raw_data) #this is data in string format\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraw_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#this would be your json data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/hicexplorer/lib/python3.8/json/__init__.py\u001b[0m in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 355\u001b[0m \u001b[0mparse_int\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mparse_float\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 356\u001b[0m parse_constant is None and object_pairs_hook is None and not kw):\n\u001b[0;32m--> 357\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_default_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 358\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJSONDecoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/hicexplorer/lib/python3.8/json/decoder.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 336\u001b[0m \"\"\"\n\u001b[0;32m--> 337\u001b[0;31m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 338\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/hicexplorer/lib/python3.8/json/decoder.py\u001b[0m in \u001b[0;36mraw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscan_once\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 355\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mJSONDecodeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Expecting value\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 356\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" ] } ], "source": [ "import json\n", "import urllib.request\n", "url = \"http://session.asuscomm.com/database.php\"\n", "x = urllib.request.urlopen(url)\n", "raw_data = x.read()\n", "encoding = x.info().get_content_charset('utf8') # JSON default\n", "#print(raw_data) #this is data in string format\n", "data = json.loads(raw_data)\n", "print(data) #this would be your json data" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Gene stable IDGene name
0ENSG00000210049MT-TF
1ENSG00000211459MT-RNR1
2ENSG00000210077MT-TV
3ENSG00000210082MT-RNR2
4ENSG00000209082MT-TL1
.........
68011ENSG00000163263CFAP141
68012ENSG00000143612C1orf43
68013ENSG00000143569UBAP2L
68014ENSG00000201129SNORA58B
68015ENSG00000143575HAX1
\n", "

68016 rows × 2 columns

\n", "
" ], "text/plain": [ " Gene stable ID Gene name\n", "0 ENSG00000210049 MT-TF\n", "1 ENSG00000211459 MT-RNR1\n", "2 ENSG00000210077 MT-TV\n", "3 ENSG00000210082 MT-RNR2\n", "4 ENSG00000209082 MT-TL1\n", "... ... ...\n", "68011 ENSG00000163263 CFAP141\n", "68012 ENSG00000143612 C1orf43\n", "68013 ENSG00000143569 UBAP2L\n", "68014 ENSG00000201129 SNORA58B\n", "68015 ENSG00000143575 HAX1\n", "\n", "[68016 rows x 2 columns]" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_gene_list" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ENSG00000188612',\n", " 'ENSG00000163331',\n", " 'ENSG00000184302',\n", " 'ENSG00000163631',\n", " 'del',\n", " 'ENSG00000068305',\n", " 'ENSG00000112559',\n", " 'ENSG00000136153',\n", " 'ENSG00000125780',\n", " 'ENSG00000124253',\n", " 'ENSG00000154358',\n", " 'ENSG00000157404',\n", " 'ENSG00000140263',\n", " 'ENSG00000063127',\n", " 'ENSG00000107984',\n", " 'ENSG00000008196',\n", " 'ENSG00000120156',\n", " 'ENSG00000138722',\n", " 'ENSG00000115290',\n", " 'ENSG00000156222',\n", " 'ENSG00000154553',\n", " 'ENSG00000111199',\n", " 'ENSG00000139549',\n", " 'ENSG00000010278',\n", " 'ENSG00000112562',\n", " 'ENSG00000115008',\n", " 'ENSG00000113327',\n", " 'ENSG00000275395',\n", " 'del',\n", " 'ENSG00000154330',\n", " 'ENSG00000136352',\n", " 'ENSG00000134762',\n", " 'ENSG00000180879',\n", " 'ENSG00000130513',\n", " 'ENSG00000125820',\n", " 'del',\n", " 'del',\n", " 'del',\n", " 'ENSG00000112175',\n", " 'del',\n", " 'ENSG00000104921',\n", " 'ENSG00000088386',\n", " 'del',\n", " 'ENSG00000114115',\n", " 'ENSG00000172238',\n", " 'ENSG00000102554',\n", " 'ENSG00000204482',\n", " 'del',\n", " 'ENSG00000187098',\n", " 'ENSG00000107447',\n", " 'ENSG00000163563',\n", " 'del',\n", " 'ENSG00000185010',\n", " 'del',\n", " 'ENSG00000105221',\n", " 'ENSG00000157005',\n", " 'ENSG00000162551',\n", " 'ENSG00000183421',\n", " 'ENSG00000120437',\n", " 'ENSG00000166959',\n", " 'ENSG00000182968',\n", " 'ENSG00000115041',\n", " 'ENSG00000169903',\n", " 'ENSG00000144366',\n", " 'ENSG00000117707',\n", " 'ENSG00000171564',\n", " 'ENSG00000224389',\n", " 'ENSG00000100079',\n", " 'del',\n", " 'ENSG00000109452',\n", " 'del',\n", " 'ENSG00000099958',\n", " 'ENSG00000258947',\n", " 'ENSG00000003436',\n", " 'ENSG00000125845',\n", " 'ENSG00000204262',\n", " 'del',\n", " 'ENSG00000169429',\n", " 'ENSG00000274736',\n", " 'ENSG00000183036',\n", " 'ENSG00000132821',\n", " 'ENSG00000169398',\n", " 'ENSG00000106631',\n", " 'del',\n", " 'ENSG00000127124',\n", " 'ENSG00000142789',\n", " 'ENSG00000143297',\n", " 'ENSG00000107562',\n", " 'ENSG00000123364',\n", " 'ENSG00000182667',\n", " 'ENSG00000122180',\n", " 'ENSG00000177425',\n", " 'ENSG00000284862',\n", " 'ENSG00000129226',\n", " 'ENSG00000138379',\n", " 'del',\n", " 'ENSG00000197614',\n", " 'ENSG00000254709',\n", " 'ENSG00000170180',\n", " 'ENSG00000165215',\n", " 'ENSG00000164893',\n", " 'ENSG00000006468',\n", " 'del',\n", " 'ENSG00000103546',\n", " 'ENSG00000148826',\n", " 'ENSG00000170835',\n", " 'ENSG00000165646',\n", " 'ENSG00000170561',\n", " 'ENSG00000186951',\n", " 'ENSG00000111665',\n", " 'ENSG00000181195',\n", " 'ENSG00000196352',\n", " 'ENSG00000115844',\n", " 'ENSG00000136231',\n", " 'ENSG00000110848',\n", " 'ENSG00000152592',\n", " 'ENSG00000175535',\n", " 'ENSG00000145824',\n", " 'ENSG00000134852',\n", " 'ENSG00000106546',\n", " 'ENSG00000133636',\n", " 'ENSG00000138363',\n", " 'ENSG00000107295',\n", " 'ENSG00000170962',\n", " 'ENSG00000272573',\n", " 'ENSG00000184005',\n", " 'ENSG00000248329',\n", " 'ENSG00000160808',\n", " 'ENSG00000150093',\n", " 'ENSG00000076555',\n", " 'ENSG00000182871',\n", " 'ENSG00000157554',\n", " 'del',\n", " 'ENSG00000123384',\n", " 'ENSG00000164400',\n", " 'ENSG00000131477',\n", " 'ENSG00000196549',\n", " 'ENSG00000153944',\n", " 'ENSG00000198467',\n", " 'ENSG00000137709',\n", " 'ENSG00000134028',\n", " 'ENSG00000197405',\n", " 'ENSG00000077279',\n", " 'ENSG00000078081',\n", " 'ENSG00000116473',\n", " 'ENSG00000091010',\n", " 'ENSG00000070031',\n", " 'ENSG00000150054',\n", " 'del',\n", " 'ENSG00000162654',\n", " 'ENSG00000166819',\n", " 'ENSG00000171533',\n", " 'ENSG00000076706',\n", " 'ENSG00000136111',\n", " 'ENSG00000104888',\n", " 'ENSG00000130649',\n", " 'ENSG00000124664',\n", " 'ENSG00000121577',\n", " 'ENSG00000167434',\n", " 'ENSG00000148965',\n", " 'ENSG00000117322',\n", " 'ENSG00000178301',\n", " 'ENSG00000169245',\n", " 'ENSG00000163221',\n", " 'del',\n", " 'ENSG00000182698',\n", " 'ENSG00000163599',\n", " 'ENSG00000124107',\n", " 'ENSG00000211695',\n", " 'ENSG00000119927',\n", " 'ENSG00000103449',\n", " 'del',\n", " 'ENSG00000112299',\n", " 'ENSG00000130707',\n", " 'ENSG00000185002',\n", " 'ENSG00000125207',\n", " 'ENSG00000211895',\n", " 'ENSG00000072274',\n", " 'ENSG00000178828',\n", " 'del',\n", " 'del',\n", " 'ENSG00000075711',\n", " 'ENSG00000085978',\n", " 'ENSG00000183625',\n", " 'ENSG00000109787',\n", " 'ENSG00000102970',\n", " 'del',\n", " 'ENSG00000145244',\n", " 'ENSG00000086991',\n", " 'ENSG00000130787',\n", " 'ENSG00000116260',\n", " 'ENSG00000149948',\n", " 'ENSG00000169174',\n", " 'ENSG00000197921',\n", " 'ENSG00000185291',\n", " 'del',\n", " 'ENSG00000175592',\n", " 'del',\n", " 'del',\n", " 'del',\n", " 'ENSG00000112992',\n", " 'ENSG00000187714',\n", " 'ENSG00000111701',\n", " 'ENSG00000119917',\n", " 'ENSG00000275302',\n", " 'ENSG00000149596',\n", " 'ENSG00000177989',\n", " 'ENSG00000173786',\n", " 'ENSG00000197181',\n", " 'ENSG00000173578',\n", " 'del',\n", " 'del',\n", " 'ENSG00000153395',\n", " 'ENSG00000127831',\n", " 'del',\n", " 'ENSG00000151690',\n", " 'ENSG00000205038',\n", " 'ENSG00000125798',\n", " 'ENSG00000145113',\n", " 'ENSG00000151715',\n", " 'ENSG00000115935',\n", " 'ENSG00000273706',\n", " 'ENSG00000128591',\n", " 'ENSG00000168542',\n", " 'ENSG00000024422',\n", " 'ENSG00000148803',\n", " 'ENSG00000154118',\n", " 'ENSG00000185985',\n", " 'ENSG00000178257',\n", " 'ENSG00000188554',\n", " 'ENSG00000122420',\n", " 'ENSG00000138083',\n", " 'ENSG00000170324',\n", " 'ENSG00000010327',\n", " 'ENSG00000182010',\n", " 'del',\n", " 'ENSG00000186451',\n", " 'ENSG00000068971',\n", " 'ENSG00000159674',\n", " 'ENSG00000276409',\n", " 'ENSG00000275385',\n", " 'del',\n", " 'del',\n", " 'ENSG00000138798',\n", " 'del',\n", " 'ENSG00000146425',\n", " 'ENSG00000135517',\n", " 'ENSG00000166825',\n", " 'ENSG00000169083',\n", " 'ENSG00000115165',\n", " 'ENSG00000156113',\n", " 'ENSG00000179454',\n", " 'ENSG00000099250',\n", " 'ENSG00000135919',\n", " 'ENSG00000137726',\n", " 'ENSG00000127955',\n", " 'ENSG00000226674',\n", " 'ENSG00000102265',\n", " 'ENSG00000166949',\n", " 'ENSG00000165061',\n", " 'ENSG00000013725',\n", " 'ENSG00000236333',\n", " 'ENSG00000104760',\n", " 'ENSG00000128052',\n", " 'ENSG00000124205',\n", " 'ENSG00000242574',\n", " 'ENSG00000147257',\n", " 'ENSG00000121966',\n", " 'ENSG00000116039',\n", " 'ENSG00000186510',\n", " 'ENSG00000159216',\n", " 'ENSG00000176887',\n", " 'ENSG00000179751',\n", " 'ENSG00000171522',\n", " 'ENSG00000185052',\n", " 'ENSG00000239474',\n", " 'ENSG00000174156',\n", " 'ENSG00000165475',\n", " 'ENSG00000182963',\n", " 'ENSG00000145649',\n", " 'ENSG00000115138',\n", " 'ENSG00000134013',\n", " 'ENSG00000155366',\n", " 'ENSG00000168994',\n", " 'ENSG00000067606',\n", " 'ENSG00000189334',\n", " 'del',\n", " 'ENSG00000135378',\n", " 'ENSG00000130176',\n", " 'ENSG00000134363',\n", " 'ENSG00000116824',\n", " 'ENSG00000111799',\n", " 'ENSG00000223609',\n", " 'ENSG00000152591',\n", " 'ENSG00000172183',\n", " 'ENSG00000161270',\n", " 'ENSG00000160181',\n", " 'ENSG00000134438',\n", " 'del',\n", " 'ENSG00000006210',\n", " 'ENSG00000179914',\n", " 'ENSG00000151418',\n", " 'ENSG00000129514',\n", " 'ENSG00000151090',\n", " 'ENSG00000164600',\n", " 'ENSG00000140488',\n", " 'ENSG00000172016',\n", " 'ENSG00000166278',\n", " 'ENSG00000160654',\n", " 'ENSG00000168925',\n", " 'del',\n", " 'ENSG00000122188',\n", " 'ENSG00000136826',\n", " 'ENSG00000112818',\n", " 'ENSG00000112972',\n", " 'del',\n", " 'ENSG00000185745',\n", " 'ENSG00000161849',\n", " 'del',\n", " 'ENSG00000038382',\n", " 'del',\n", " 'ENSG00000016490',\n", " 'ENSG00000196126',\n", " 'ENSG00000133019',\n", " 'ENSG00000162618',\n", " 'ENSG00000163814',\n", " 'ENSG00000189058',\n", " 'ENSG00000163421',\n", " 'ENSG00000087842',\n", " 'ENSG00000089041',\n", " 'ENSG00000118271',\n", " 'ENSG00000072195',\n", " 'ENSG00000089356',\n", " 'ENSG00000164764',\n", " 'ENSG00000148737',\n", " 'ENSG00000196154',\n", " 'ENSG00000197818',\n", " 'ENSG00000105894',\n", " 'ENSG00000104731',\n", " 'del',\n", " 'ENSG00000134539',\n", " 'ENSG00000181885',\n", " 'ENSG00000186395',\n", " 'ENSG00000080166',\n", " 'ENSG00000147403',\n", " 'ENSG00000198373',\n", " 'ENSG00000232810',\n", " 'ENSG00000110852',\n", " 'ENSG00000072571',\n", " 'del',\n", " 'ENSG00000166828',\n", " 'ENSG00000124564',\n", " 'ENSG00000162951',\n", " 'ENSG00000144711',\n", " 'ENSG00000163751',\n", " 'ENSG00000112964',\n", " 'ENSG00000197249',\n", " 'ENSG00000174236',\n", " 'ENSG00000043355',\n", " 'ENSG00000197561',\n", " 'del',\n", " 'ENSG00000172425',\n", " 'ENSG00000231389',\n", " 'ENSG00000118407',\n", " 'ENSG00000118777',\n", " 'ENSG00000204444',\n", " 'ENSG00000197273',\n", " 'del',\n", " 'ENSG00000185760',\n", " 'ENSG00000106633',\n", " 'ENSG00000012124',\n", " 'ENSG00000180209',\n", " 'ENSG00000021300',\n", " 'ENSG00000239697',\n", " 'ENSG00000096717',\n", " 'ENSG00000107485',\n", " 'ENSG00000144229',\n", " 'ENSG00000118137',\n", " 'ENSG00000134250',\n", " 'ENSG00000070193',\n", " 'ENSG00000143878',\n", " 'ENSG00000159200',\n", " 'ENSG00000145321',\n", " 'ENSG00000106853',\n", " 'ENSG00000043039',\n", " 'ENSG00000211679',\n", " 'ENSG00000154645',\n", " 'del',\n", " 'ENSG00000168269',\n", " 'ENSG00000135437',\n", " 'ENSG00000123836',\n", " 'ENSG00000167741',\n", " 'ENSG00000159224',\n", " 'ENSG00000172023',\n", " 'ENSG00000175161',\n", " 'del',\n", " 'ENSG00000124701',\n", " 'ENSG00000213996',\n", " 'ENSG00000103365',\n", " 'ENSG00000021826',\n", " 'ENSG00000166927',\n", " 'ENSG00000104537',\n", " 'ENSG00000130037',\n", " 'ENSG00000101311',\n", " 'ENSG00000147862',\n", " 'ENSG00000144476',\n", " 'ENSG00000138207',\n", " 'ENSG00000160013',\n", " 'del',\n", " 'del',\n", " 'ENSG00000183023',\n", " 'ENSG00000143921',\n", " 'del',\n", " 'del',\n", " 'ENSG00000174944',\n", " 'ENSG00000179059',\n", " 'del',\n", " 'ENSG00000169562',\n", " 'ENSG00000021645',\n", " 'ENSG00000247809',\n", " 'ENSG00000170365',\n", " 'ENSG00000100453',\n", " 'ENSG00000128567',\n", " 'ENSG00000277893',\n", " 'ENSG00000169252',\n", " 'ENSG00000182732',\n", " 'ENSG00000249459',\n", " 'del',\n", " 'ENSG00000105697',\n", " 'ENSG00000148400',\n", " 'ENSG00000170293',\n", " 'del',\n", " 'del',\n", " 'del',\n", " 'del',\n", " 'ENSG00000163431',\n", " 'ENSG00000175792',\n", " 'ENSG00000198788',\n", " 'ENSG00000065675',\n", " 'ENSG00000101439',\n", " 'ENSG00000113734',\n", " 'ENSG00000113296',\n", " 'ENSG00000138675',\n", " 'ENSG00000185737',\n", " 'ENSG00000204983',\n", " 'del',\n", " 'ENSG00000197702',\n", " 'del',\n", " 'ENSG00000141504',\n", " 'ENSG00000136542',\n", " 'del',\n", " 'ENSG00000132470',\n", " 'ENSG00000137166',\n", " 'ENSG00000136048',\n", " 'ENSG00000071991',\n", " 'ENSG00000145384',\n", " 'ENSG00000198846',\n", " 'ENSG00000145708',\n", " 'ENSG00000161544',\n", " 'ENSG00000196975',\n", " 'ENSG00000160180',\n", " 'ENSG00000100292',\n", " 'ENSG00000179981',\n", " 'ENSG00000150510',\n", " 'del',\n", " 'ENSG00000111049',\n", " 'ENSG00000157890',\n", " 'ENSG00000171346',\n", " 'del',\n", " 'ENSG00000187479',\n", " 'ENSG00000129167',\n", " 'ENSG00000135821',\n", " 'del',\n", " 'ENSG00000167972',\n", " 'ENSG00000196415',\n", " 'ENSG00000106819',\n", " 'ENSG00000112837',\n", " 'ENSG00000137860',\n", " 'ENSG00000058085',\n", " 'ENSG00000111245',\n", " 'ENSG00000170558',\n", " 'ENSG00000095637',\n", " 'ENSG00000154188',\n", " 'ENSG00000233041',\n", " 'ENSG00000138772',\n", " 'ENSG00000074803',\n", " 'ENSG00000008517',\n", " 'del',\n", " 'ENSG00000134285',\n", " 'ENSG00000277734',\n", " 'del',\n", " 'del',\n", " 'ENSG00000106991',\n", " 'ENSG00000148584',\n", " 'del',\n", " 'ENSG00000135374',\n", " 'ENSG00000165685',\n", " 'ENSG00000244734',\n", " 'ENSG00000163497',\n", " 'ENSG00000160862',\n", " 'ENSG00000104856',\n", " 'ENSG00000114166',\n", " 'ENSG00000137673',\n", " 'ENSG00000173991',\n", " 'ENSG00000188517',\n", " 'del',\n", " 'ENSG00000196660',\n", " 'ENSG00000113140',\n", " 'ENSG00000189013',\n", " 'ENSG00000100439',\n", " 'ENSG00000188786',\n", " 'ENSG00000055917',\n", " 'ENSG00000152689',\n", " 'ENSG00000164825',\n", " 'ENSG00000006327',\n", " 'ENSG00000163739',\n", " 'del',\n", " 'ENSG00000119457',\n", " 'ENSG00000157851',\n", " 'ENSG00000150347',\n", " 'ENSG00000069399',\n", " 'ENSG00000106089',\n", " 'ENSG00000129910',\n", " 'ENSG00000176884',\n", " 'ENSG00000165379',\n", " 'ENSG00000188257',\n", " 'ENSG00000182533',\n", " 'ENSG00000148200',\n", " 'ENSG00000106018',\n", " 'ENSG00000204531',\n", " 'ENSG00000196344',\n", " 'ENSG00000143153',\n", " 'ENSG00000203727',\n", " 'del',\n", " 'ENSG00000119139',\n", " 'ENSG00000100526',\n", " 'ENSG00000158516',\n", " 'ENSG00000066279',\n", " 'ENSG00000146555',\n", " 'ENSG00000171444',\n", " 'ENSG00000081051',\n", " 'ENSG00000171236',\n", " 'ENSG00000126785',\n", " 'ENSG00000105852',\n", " 'ENSG00000107731',\n", " 'del',\n", " 'ENSG00000127588',\n", " 'ENSG00000196139',\n", " 'ENSG00000149591',\n", " 'ENSG00000128322',\n", " 'del',\n", " 'ENSG00000115850',\n", " 'ENSG00000115738',\n", " 'ENSG00000109132',\n", " 'ENSG00000140937',\n", " 'del',\n", " 'ENSG00000127920',\n", " 'ENSG00000137699',\n", " 'ENSG00000065613',\n", " 'del',\n", " 'ENSG00000132170',\n", " 'ENSG00000129538',\n", " 'ENSG00000170577',\n", " 'ENSG00000241404',\n", " 'ENSG00000198759',\n", " 'ENSG00000095303',\n", " 'ENSG00000087303',\n", " 'ENSG00000104332',\n", " 'ENSG00000243290',\n", " 'ENSG00000173762',\n", " 'ENSG00000179915',\n", " 'ENSG00000163286',\n", " 'ENSG00000173406',\n", " 'ENSG00000169710',\n", " 'ENSG00000145198',\n", " 'ENSG00000119514',\n", " 'ENSG00000166928',\n", " 'del',\n", " 'ENSG00000198523',\n", " 'ENSG00000149516',\n", " 'ENSG00000110777',\n", " 'ENSG00000146457',\n", " 'ENSG00000198947',\n", " 'del',\n", " 'ENSG00000160255',\n", " 'ENSG00000170890',\n", " 'ENSG00000147872',\n", " 'ENSG00000102145',\n", " 'ENSG00000120436',\n", " 'ENSG00000128881',\n", " 'ENSG00000177301',\n", " 'ENSG00000153253',\n", " 'ENSG00000131095',\n", " 'ENSG00000189056',\n", " 'ENSG00000134827',\n", " 'ENSG00000104219',\n", " 'ENSG00000107165',\n", " 'ENSG00000198216',\n", " 'ENSG00000109047',\n", " 'del',\n", " 'ENSG00000145721',\n", " 'ENSG00000198821',\n", " 'ENSG00000113492',\n", " 'ENSG00000138030',\n", " 'del',\n", " 'del',\n", " 'ENSG00000171109',\n", " 'ENSG00000136011',\n", " 'del',\n", " 'del',\n", " 'ENSG00000050628',\n", " 'ENSG00000008256',\n", " 'ENSG00000162761',\n", " 'ENSG00000219073',\n", " 'ENSG00000164265',\n", " 'ENSG00000185633',\n", " 'ENSG00000168878',\n", " 'ENSG00000075891',\n", " 'ENSG00000062038',\n", " 'del',\n", " 'del',\n", " 'ENSG00000123838',\n", " 'ENSG00000128683',\n", " 'ENSG00000158122',\n", " 'ENSG00000151704',\n", " 'ENSG00000149564',\n", " 'ENSG00000104312',\n", " 'ENSG00000112033',\n", " 'del',\n", " 'ENSG00000129194',\n", " 'ENSG00000080709',\n", " 'ENSG00000135298',\n", " 'ENSG00000085063',\n", " 'del',\n", " 'ENSG00000081479',\n", " 'ENSG00000116745',\n", " 'ENSG00000092295',\n", " 'ENSG00000130702',\n", " 'ENSG00000125851',\n", " 'ENSG00000096088',\n", " 'del',\n", " 'ENSG00000137642',\n", " 'ENSG00000075426',\n", " 'del',\n", " 'ENSG00000215644',\n", " 'ENSG00000262179',\n", " 'ENSG00000060566',\n", " 'ENSG00000118526',\n", " 'ENSG00000138639',\n", " 'ENSG00000163623',\n", " 'ENSG00000165272',\n", " 'ENSG00000007171',\n", " 'ENSG00000178394',\n", " 'ENSG00000188993',\n", " 'ENSG00000099953',\n", " 'del',\n", " 'del',\n", " 'ENSG00000106004',\n", " 'ENSG00000067798',\n", " 'ENSG00000164434',\n", " 'ENSG00000205927',\n", " 'ENSG00000110195',\n", " 'ENSG00000120937',\n", " 'ENSG00000119121',\n", " 'ENSG00000121361',\n", " 'ENSG00000184292',\n", " 'ENSG00000060709',\n", " 'ENSG00000170476',\n", " 'ENSG00000181449',\n", " 'ENSG00000169189',\n", " 'ENSG00000158669',\n", " 'ENSG00000115884',\n", " 'ENSG00000105355',\n", " 'del',\n", " 'ENSG00000176396',\n", " 'ENSG00000110911',\n", " 'ENSG00000056736',\n", " 'ENSG00000100490',\n", " 'ENSG00000174827',\n", " 'ENSG00000105392',\n", " 'ENSG00000088305',\n", " 'ENSG00000112769',\n", " 'ENSG00000233670',\n", " 'ENSG00000163072',\n", " 'ENSG00000102837',\n", " 'ENSG00000158481',\n", " 'ENSG00000122585',\n", " 'ENSG00000159197',\n", " 'ENSG00000112902',\n", " 'ENSG00000170482',\n", " 'ENSG00000108846',\n", " 'ENSG00000178878',\n", " 'ENSG00000143248',\n", " 'ENSG00000235984',\n", " 'ENSG00000197594',\n", " 'ENSG00000119125',\n", " 'ENSG00000154175',\n", " 'ENSG00000183733',\n", " 'ENSG00000165474',\n", " 'ENSG00000145692',\n", " 'ENSG00000100625',\n", " 'del',\n", " 'ENSG00000173253',\n", " 'ENSG00000152270',\n", " 'ENSG00000179639',\n", " 'ENSG00000176387',\n", " 'ENSG00000115665',\n", " 'del',\n", " 'ENSG00000163207',\n", " 'ENSG00000140545',\n", " 'ENSG00000229314',\n", " 'ENSG00000245848',\n", " 'ENSG00000122786',\n", " 'del',\n", " 'ENSG00000086159',\n", " 'ENSG00000121440',\n", " 'ENSG00000105205',\n", " 'del',\n", " 'ENSG00000162676',\n", " 'ENSG00000147459',\n", " 'ENSG00000112378',\n", " 'del',\n", " 'ENSG00000124225',\n", " 'ENSG00000117592',\n", " 'ENSG00000106331',\n", " 'ENSG00000009709',\n", " 'ENSG00000170312',\n", " 'ENSG00000143632',\n", " 'del',\n", " 'del',\n", " 'ENSG00000101680',\n", " 'ENSG00000133063',\n", " 'ENSG00000180176',\n", " 'ENSG00000176153',\n", " 'ENSG00000117595',\n", " 'ENSG00000007516',\n", " 'ENSG00000149534',\n", " 'ENSG00000179388',\n", " 'ENSG00000099260',\n", " 'del',\n", " 'ENSG00000182742',\n", " 'ENSG00000104879',\n", " 'ENSG00000239839',\n", " 'ENSG00000143226',\n", " 'ENSG00000173020',\n", " 'ENSG00000036828',\n", " 'ENSG00000171476',\n", " 'ENSG00000165621',\n", " 'ENSG00000110492',\n", " 'ENSG00000148346',\n", " 'del',\n", " 'ENSG00000261371',\n", " 'ENSG00000169605',\n", " 'ENSG00000169442',\n", " 'ENSG00000126705',\n", " 'ENSG00000131067',\n", " 'ENSG00000203724',\n", " 'ENSG00000157388',\n", " 'ENSG00000135903',\n", " 'ENSG00000177590',\n", " 'ENSG00000121060',\n", " 'ENSG00000169856',\n", " 'ENSG00000152092',\n", " 'ENSG00000187134',\n", " 'ENSG00000092067',\n", " 'ENSG00000167995',\n", " 'ENSG00000180509',\n", " 'ENSG00000133256',\n", " 'ENSG00000156265',\n", " 'ENSG00000240583',\n", " 'ENSG00000186868',\n", " 'ENSG00000162366',\n", " 'ENSG00000138795',\n", " 'ENSG00000169031',\n", " 'ENSG00000105447',\n", " 'del',\n", " 'ENSG00000143799',\n", " 'ENSG00000139629',\n", " 'ENSG00000134884',\n", " 'ENSG00000175567',\n", " 'ENSG00000102854',\n", " 'ENSG00000012223',\n", " 'ENSG00000113722',\n", " 'ENSG00000152672',\n", " 'ENSG00000248746',\n", " 'del',\n", " 'ENSG00000129757',\n", " 'ENSG00000171885',\n", " 'del',\n", " 'ENSG00000169594',\n", " 'del',\n", " 'ENSG00000109205',\n", " 'del',\n", " 'ENSG00000167281',\n", " 'ENSG00000122859',\n", " 'ENSG00000167772',\n", " 'ENSG00000079841',\n", " 'ENSG00000164175',\n", " 'ENSG00000184221',\n", " 'ENSG00000121351',\n", " 'ENSG00000144908',\n", " 'ENSG00000215846',\n", " 'ENSG00000173546',\n", " 'del',\n", " 'ENSG00000137561',\n", " 'ENSG00000140519',\n", " 'ENSG00000173320',\n", " 'ENSG00000082175',\n", " 'ENSG00000118520',\n", " 'ENSG00000142627',\n", " 'ENSG00000166710',\n", " 'del',\n", " 'ENSG00000273079',\n", " 'ENSG00000189001',\n", " 'ENSG00000103569',\n", " 'ENSG00000165655',\n", " 'ENSG00000143839',\n", " 'ENSG00000215148',\n", " 'ENSG00000157765',\n", " 'del',\n", " 'ENSG00000136944',\n", " 'ENSG00000165521',\n", " 'ENSG00000198682',\n", " 'ENSG00000105329',\n", " 'ENSG00000164404',\n", " 'ENSG00000115271',\n", " 'ENSG00000169218',\n", " 'ENSG00000012779',\n", " 'ENSG00000127528',\n", " 'ENSG00000100311',\n", " 'ENSG00000160211',\n", " 'ENSG00000090339',\n", " 'ENSG00000103375',\n", " 'ENSG00000166426',\n", " 'del',\n", " 'ENSG00000109511',\n", " 'ENSG00000205213',\n", " 'ENSG00000173702',\n", " 'ENSG00000177468',\n", " 'ENSG00000169474',\n", " 'ENSG00000150961',\n", " 'ENSG00000124102',\n", " 'del',\n", " 'ENSG00000138794',\n", " 'ENSG00000089685',\n", " 'ENSG00000282608',\n", " 'ENSG00000214128',\n", " 'ENSG00000135097',\n", " 'ENSG00000198178',\n", " 'ENSG00000127616',\n", " 'ENSG00000243566',\n", " 'ENSG00000158955',\n", " 'ENSG00000187017',\n", " 'ENSG00000126337',\n", " 'ENSG00000126778',\n", " 'ENSG00000112782',\n", " 'ENSG00000091879',\n", " 'ENSG00000168610',\n", " 'del',\n", " 'ENSG00000106799',\n", " 'ENSG00000196104',\n", " 'ENSG00000164120',\n", " 'del',\n", " 'ENSG00000163950',\n", " 'ENSG00000100285',\n", " 'ENSG00000139352',\n", " 'ENSG00000036565',\n", " 'del',\n", " 'ENSG00000183662',\n", " 'ENSG00000095917',\n", " 'ENSG00000123095',\n", " 'ENSG00000130598',\n", " 'ENSG00000161055',\n", " 'del',\n", " 'del',\n", " 'del',\n", " 'ENSG00000007062',\n", " 'ENSG00000148848',\n", " 'ENSG00000186832',\n", " 'ENSG00000105357',\n", " 'ENSG00000154678',\n", " 'ENSG00000018280',\n", " 'ENSG00000166523',\n", " 'ENSG00000102038',\n", " 'ENSG00000049768',\n", " 'ENSG00000099194',\n", " 'ENSG00000102879',\n", " 'ENSG00000131142',\n", " 'ENSG00000177575',\n", " 'ENSG00000121879',\n", " 'ENSG00000154864',\n", " 'ENSG00000211772',\n", " 'ENSG00000211689',\n", " 'ENSG00000113593',\n", " 'ENSG00000124731',\n", " 'del',\n", " 'del',\n", " 'ENSG00000243649',\n", " 'ENSG00000174099',\n", " 'ENSG00000198910',\n", " 'del',\n", " 'ENSG00000160339',\n", " 'ENSG00000204257',\n", " 'ENSG00000119922',\n", " 'ENSG00000179348',\n", " 'ENSG00000095981',\n", " 'ENSG00000137462',\n", " 'ENSG00000132693',\n", " 'ENSG00000180155',\n", " 'ENSG00000136235',\n", " 'ENSG00000183963',\n", " 'ENSG00000170549',\n", " 'ENSG00000144648',\n", " 'ENSG00000172209',\n", " 'ENSG00000166143',\n", " 'ENSG00000187021',\n", " 'ENSG00000147571',\n", " 'ENSG00000175084',\n", " 'ENSG00000164916',\n", " 'ENSG00000120341',\n", " 'ENSG00000077522',\n", " 'ENSG00000244405',\n", " 'ENSG00000102760',\n", " 'ENSG00000172232',\n", " 'ENSG00000099985',\n", " 'ENSG00000039600',\n", " 'ENSG00000178764',\n", " 'ENSG00000047457',\n", " 'ENSG00000066405',\n", " 'ENSG00000175793',\n", " 'ENSG00000161798',\n", " 'ENSG00000002586',\n", " 'ENSG00000089199',\n", " 'ENSG00000050767',\n", " 'ENSG00000144810',\n", " 'ENSG00000167244',\n", " 'ENSG00000091138',\n", " 'ENSG00000133048',\n", " 'ENSG00000106404',\n", " 'ENSG00000243955',\n", " 'ENSG00000172164',\n", " 'ENSG00000160200',\n", " 'ENSG00000015520',\n", " 'ENSG00000142319',\n", " 'ENSG00000174059',\n", " 'ENSG00000110245',\n", " 'ENSG00000182492',\n", " 'ENSG00000130226',\n", " 'ENSG00000113494',\n", " 'ENSG00000173372',\n", " 'del',\n", " 'del',\n", " 'ENSG00000050165',\n", " 'ENSG00000249669',\n", " 'ENSG00000051523',\n", " 'ENSG00000104783',\n", " 'ENSG00000145777',\n", " 'ENSG00000175538',\n", " 'ENSG00000164081',\n", " 'ENSG00000120885',\n", " 'ENSG00000123560',\n", " 'ENSG00000100031',\n", " 'ENSG00000144668',\n", " 'ENSG00000010319',\n", " 'ENSG00000083457',\n", " 'ENSG00000188229',\n", " 'ENSG00000152778',\n", " 'ENSG00000198728',\n", " 'ENSG00000196792',\n", " 'del',\n", " 'ENSG00000145287',\n", " 'ENSG00000112149',\n", " 'del',\n", " 'ENSG00000162614',\n", " 'ENSG00000172137',\n", " 'ENSG00000019991',\n", " 'ENSG00000134627',\n", " 'ENSG00000128218',\n", " 'ENSG00000180900',\n", " 'ENSG00000111913',\n", " 'ENSG00000168903',\n", " 'ENSG00000114200',\n", " 'ENSG00000141646',\n", " 'ENSG00000123096',\n", " 'ENSG00000184481',\n", " 'ENSG00000196878',\n", " 'ENSG00000162998',\n", " 'ENSG00000067048',\n", " 'ENSG00000157992',\n", " 'ENSG00000187513',\n", " 'ENSG00000181649',\n", " 'ENSG00000178538',\n", " 'ENSG00000125872',\n", " 'ENSG00000177455',\n", " 'ENSG00000166402',\n", " 'ENSG00000116690',\n", " 'ENSG00000126460',\n", " 'ENSG00000162747',\n", " 'ENSG00000127329',\n", " 'ENSG00000211821',\n", " ...]" ] }, "execution_count": 155, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[dict_gene_name_to_ensg[x] if x in dict_gene_name_to_ensg.keys() else 'del' for x in all_gene_list ]" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "SRP_name='aggregates'\n", "resolution='40kbp_raw'\n", "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_mouse/aggregates/40kbp_raw/max/pcc/0/all_bins/KR_KR/hic_gene_corr_gw.h5'\n", "\n", "pcc_sim = hm.hiCMatrix(exp_file_path)\n", "all_pcc_genes = [x[3].decode() for x in pcc_sim.cut_intervals]\n", "pcc_sim_matrix = pcc_sim.matrix.toarray()\n", "df_pcc_sim = pd.DataFrame(pcc_sim_matrix, index=all_pcc_genes, columns = all_pcc_genes)\n" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ " pcc_sim_matrix = pcc_sim.matrix.toarray()\n", " df_pcc_sim = pd.DataFrame(pcc_sim_matrix, index=all_pcc_genes, columns = all_pcc_genes)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(92, 92)\n", "(92, 22996)\n", "0.9942792508337934\n", "0.0\n" ] } ], "source": [ "df_2d_pcc = run_egad(go_table_marker, df_pcc_sim )" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4710205388227698" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2d_pcc['AUC'].mean()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1621, 1621)\n", "(1621, 22996)\n", "0.9944747250520944\n", "0.0\n" ] } ], "source": [ "df_2d_pcc_marker = run_egad(go_table_marker, df_pcc_sim)" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GO:0000002GO:0000003GO:0000009GO:0000010GO:0000012GO:0000014GO:0000015GO:0000016GO:0000017GO:0000018...GO:2001293GO:2001294GO:2001295GO:2001300GO:2001301GO:2001302GO:2001303GO:2001304GO:2001306GO:2001311
NetworkIDs
ENSMUSG000000015200.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000019880.01.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000034110.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000036570.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000043660.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
ENSMUSG000000688590.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000708800.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000752700.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000900630.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000969140.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", "

96 rows × 22996 columns

\n", "
" ], "text/plain": [ " GO:0000002 GO:0000003 GO:0000009 GO:0000010 \\\n", "NetworkIDs \n", "ENSMUSG00000001520 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000001988 0.0 1.0 0.0 0.0 \n", "ENSMUSG00000003411 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003657 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000004366 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "ENSMUSG00000068859 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000070880 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000075270 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000090063 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000096914 0.0 0.0 0.0 0.0 \n", "\n", " GO:0000012 GO:0000014 GO:0000015 GO:0000016 \\\n", "NetworkIDs \n", "ENSMUSG00000001520 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000001988 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003411 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003657 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000004366 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "ENSMUSG00000068859 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000070880 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000075270 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000090063 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000096914 0.0 0.0 0.0 0.0 \n", "\n", " GO:0000017 GO:0000018 ... GO:2001293 GO:2001294 \\\n", "NetworkIDs ... \n", "ENSMUSG00000001520 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000001988 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000003411 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000003657 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000004366 0.0 0.0 ... 0.0 0.0 \n", "... ... ... ... ... ... \n", "ENSMUSG00000068859 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000070880 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000075270 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000090063 0.0 0.0 ... 0.0 0.0 \n", "ENSMUSG00000096914 0.0 0.0 ... 0.0 0.0 \n", "\n", " GO:2001295 GO:2001300 GO:2001301 GO:2001302 \\\n", "NetworkIDs \n", "ENSMUSG00000001520 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000001988 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003411 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003657 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000004366 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "ENSMUSG00000068859 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000070880 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000075270 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000090063 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000096914 0.0 0.0 0.0 0.0 \n", "\n", " GO:2001303 GO:2001304 GO:2001306 GO:2001311 \n", "NetworkIDs \n", "ENSMUSG00000001520 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000001988 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003411 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000003657 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000004366 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "ENSMUSG00000068859 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000070880 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000075270 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000090063 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000096914 0.0 0.0 0.0 0.0 \n", "\n", "[96 rows x 22996 columns]" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "go_table_marker" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5100544514893378" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2d_pcc_marker['AUC'].mean()" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ " from hicmatrix import HiCMatrix as hm\n", " from hicmatrix.lib import MatrixFileHandler\n", " \n", "\n", "\n", " exp_file = hm.hiCMatrix('/grid/gillis/data/lohia/hi_c_data_processing/software/CoCoCoNet/networks/mouse_prioAggNet.h5')\n", " #chr_list_1 = exp_file.getChrNames()\n", " \n", " exp_matrix = exp_file.matrix.toarray()\n", " exp_genes_all = [x[3].decode() for x in exp_file.cut_intervals]\n", " df_exp = pd.DataFrame(exp_matrix , index=exp_genes_all , columns = exp_genes_all)\n", " " ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(96, 96)\n", "(96, 22996)\n", "0.9943604322490868\n", "0.0\n" ] } ], "source": [ "df_2d = run_egad(go_table_marker, df_exp)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ " df = pd.read_csv(f'/grid/gillis/data/lohia/hi_c_data_processing/software/CoCoCoNet/gene2go/mouse_gene2go.csv', delim_whitespace=True)\n", "\n", " df['val'] = 1\n", "\n", " go_table = pd.pivot_table(df, index=['NetworkIDs'],columns=['GO_term'])\n", "\n", " go_table = go_table.fillna(0)\n", " \n", " go_table = pd.DataFrame(go_table.values , index=go_table.index , columns = [x[1] for x in go_table.columns])\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "SRP_name='aggregates'\n", "resolution='40kbp_raw'\n", "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_mouse/aggregates/10kbp_raw/max/hic_gene_inter_KR.h5'\n", "\n", "jac_sim = hm.hiCMatrix(exp_file_path)\n", "all_genes = [x[3].decode() for x in jac_sim.cut_intervals]\n", "jac_sim_matrix = jac_sim.matrix.toarray()\n", "df_jac_sim = pd.DataFrame(jac_sim_matrix, index=all_genes, columns = all_genes)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(92, 92)\n", "(92, 22996)\n", "0.9942792508337934\n", "0.07230623818525517\n" ] } ], "source": [ "df_2d_jac = run_egad(go_table_marker, df_jac_sim)" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5231521483736035" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2d_jac['AUC'].mean()" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "#plt.plot(data=df_2d, x='AUC', y='DEGREE_NULL_AUC')\n", "plt.scatter(df_2d_pcc['AUC'], df_2d_pcc['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "#plt.plot(data=df_2d, x='AUC', y='DEGREE_NULL_AUC')\n", "plt.scatter(df_2d['AUC'], df_2d['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "GO_groups = pd.read_csv('/grid/gillis/data/lohia/hi_c_data_processing/genomes_jlee/aug4.GOslim')\n", "df_slim = df_2d[df_2d.index.isin(GO_groups['go_id'].tolist())]" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "#plt.plot(data=df_2d_pcc_marker, x='AUC', y='DEGREE_NULL_AUC')\n", "plt.scatter(df_2d_pcc_marker['AUC'], df_2d_pcc_marker['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d_pcc_marker['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d_pcc_marker['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "#plt.plot(data=df_2d, x='AUC', y='DEGREE_NULL_AUC')\n", "plt.scatter(df_2d['AUC'], df_2d['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AUCAVG_NODE_DEGREEDEGREE_NULL_AUCP_Value
GO:00000020.36512919.6030630.5067720.017060
GO:00000180.55247324.2371660.5805850.022531
GO:00000270.50010724.4615630.6009000.175725
GO:00000280.43234431.4982790.7163990.136796
GO:00000300.53237218.9212800.5023400.272772
...............
GO:20012510.49963223.0311820.5677700.293245
GO:20012520.49903121.8374830.5333570.094241
GO:20012570.52281418.7202780.4829620.150789
GO:20012580.45781518.7788350.4864790.036969
GO:20012590.53903218.1032870.4739040.114798
\n", "

5533 rows × 4 columns

\n", "
" ], "text/plain": [ " AUC AVG_NODE_DEGREE DEGREE_NULL_AUC P_Value\n", "GO:0000002 0.365129 19.603063 0.506772 0.017060\n", "GO:0000018 0.552473 24.237166 0.580585 0.022531\n", "GO:0000027 0.500107 24.461563 0.600900 0.175725\n", "GO:0000028 0.432344 31.498279 0.716399 0.136796\n", "GO:0000030 0.532372 18.921280 0.502340 0.272772\n", "... ... ... ... ...\n", "GO:2001251 0.499632 23.031182 0.567770 0.293245\n", "GO:2001252 0.499031 21.837483 0.533357 0.094241\n", "GO:2001257 0.522814 18.720278 0.482962 0.150789\n", "GO:2001258 0.457815 18.778835 0.486479 0.036969\n", "GO:2001259 0.539032 18.103287 0.473904 0.114798\n", "\n", "[5533 rows x 4 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2d" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import seaborn" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "SRP_name='aggregates'\n", "resolution='40kbp_raw'\n", "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_mouse/aggregates/40kbp_raw/max/pcc/0/all_bins/KR_KR/hic_gene_corr_gw.h5'\n", "\n", "pcc_sim = hm.hiCMatrix(exp_file_path)\n", "all_pcc_genes = [x[3].decode() for x in pcc_sim.cut_intervals]\n", "pcc_sim_matrix = pcc_sim.matrix.toarray()\n", "df_pcc_sim = pd.DataFrame(pcc_sim_matrix, index=all_pcc_genes, columns = all_pcc_genes)\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ENSMUSG00000102693ENSMUSG00000064842ENSMUSG00000051951ENSMUSG00000102851ENSMUSG00000103377ENSMUSG00000104017ENSMUSG00000103025ENSMUSG00000089699ENSMUSG00000103201ENSMUSG00000103147...ENSMUSG00000024997ENSMUSG00000003228ENSMUSG00000096578ENSMUSG00000088894ENSMUSG00000074733ENSMUSG00000070263ENSMUSG00000094649ENSMUSG00000069475ENSMUSG00000059326ENSMUSG00000095993
ENSMUSG000001026931.0000000.5712570.5055230.6388820.5372950.5372950.4429600.4276620.4168630.412963...-0.001455-0.001262-0.001243-0.001231-0.001103-0.000789-0.000789-0.000789-0.000853-0.000940
ENSMUSG000000648420.5712571.0000000.5197880.6719240.5545780.5545780.4557760.4390550.4252400.420280...-0.001193-0.001049-0.000989-0.001026-0.000567-0.000496-0.000496-0.000496-0.000529-0.000626
ENSMUSG000000519510.5055230.5197881.0000000.6117460.6127940.6127940.7104650.7343280.7087550.714727...-0.001094-0.000870-0.000900-0.000883-0.000497-0.000611-0.000611-0.000611-0.000707-0.000704
ENSMUSG000001028510.6388820.6719240.6117461.0000000.8673080.8673080.7793320.7487510.7410940.729829...-0.001573-0.001405-0.001275-0.001307-0.000594-0.000760-0.000760-0.000760-0.001115-0.001000
ENSMUSG000001033770.5372950.5545780.6127940.8673081.0000001.0000000.8145320.7948820.8454490.831854...-0.001317-0.001101-0.000989-0.000979-0.000999-0.000678-0.000678-0.000678-0.000690-0.000806
..................................................................
ENSMUSG00000070263-0.000789-0.000496-0.000611-0.000760-0.000678-0.000678-0.000522-0.000493-0.000439-0.000602...0.6146520.6448820.6140070.6697020.5221781.0000001.0000001.0000000.5373420.626563
ENSMUSG00000094649-0.000789-0.000496-0.000611-0.000760-0.000678-0.000678-0.000522-0.000493-0.000439-0.000602...0.6146520.6448820.6140070.6697020.5221781.0000001.0000001.0000000.5373420.626563
ENSMUSG00000069475-0.000789-0.000496-0.000611-0.000760-0.000678-0.000678-0.000522-0.000493-0.000439-0.000602...0.6146520.6448820.6140070.6697020.5221781.0000001.0000001.0000000.5373420.626563
ENSMUSG00000059326-0.000853-0.000529-0.000707-0.001115-0.000690-0.000690-0.000491-0.000612-0.000632-0.000558...0.5455700.6238420.5157800.5437180.7974470.5373420.5373420.5373421.0000000.700237
ENSMUSG00000095993-0.000940-0.000626-0.000704-0.001000-0.000806-0.000806-0.000655-0.000598-0.000574-0.000606...0.4275230.4845230.3959560.4130000.5811450.6265630.6265630.6265630.7002371.000000
\n", "

50225 rows × 50225 columns

\n", "
" ], "text/plain": [ " ENSMUSG00000102693 ENSMUSG00000064842 \\\n", "ENSMUSG00000102693 1.000000 0.571257 \n", "ENSMUSG00000064842 0.571257 1.000000 \n", "ENSMUSG00000051951 0.505523 0.519788 \n", "ENSMUSG00000102851 0.638882 0.671924 \n", "ENSMUSG00000103377 0.537295 0.554578 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000789 -0.000496 \n", "ENSMUSG00000094649 -0.000789 -0.000496 \n", "ENSMUSG00000069475 -0.000789 -0.000496 \n", "ENSMUSG00000059326 -0.000853 -0.000529 \n", "ENSMUSG00000095993 -0.000940 -0.000626 \n", "\n", " ENSMUSG00000051951 ENSMUSG00000102851 \\\n", "ENSMUSG00000102693 0.505523 0.638882 \n", "ENSMUSG00000064842 0.519788 0.671924 \n", "ENSMUSG00000051951 1.000000 0.611746 \n", "ENSMUSG00000102851 0.611746 1.000000 \n", "ENSMUSG00000103377 0.612794 0.867308 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000611 -0.000760 \n", "ENSMUSG00000094649 -0.000611 -0.000760 \n", "ENSMUSG00000069475 -0.000611 -0.000760 \n", "ENSMUSG00000059326 -0.000707 -0.001115 \n", "ENSMUSG00000095993 -0.000704 -0.001000 \n", "\n", " ENSMUSG00000103377 ENSMUSG00000104017 \\\n", "ENSMUSG00000102693 0.537295 0.537295 \n", "ENSMUSG00000064842 0.554578 0.554578 \n", "ENSMUSG00000051951 0.612794 0.612794 \n", "ENSMUSG00000102851 0.867308 0.867308 \n", "ENSMUSG00000103377 1.000000 1.000000 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000678 -0.000678 \n", "ENSMUSG00000094649 -0.000678 -0.000678 \n", "ENSMUSG00000069475 -0.000678 -0.000678 \n", "ENSMUSG00000059326 -0.000690 -0.000690 \n", "ENSMUSG00000095993 -0.000806 -0.000806 \n", "\n", " ENSMUSG00000103025 ENSMUSG00000089699 \\\n", "ENSMUSG00000102693 0.442960 0.427662 \n", "ENSMUSG00000064842 0.455776 0.439055 \n", "ENSMUSG00000051951 0.710465 0.734328 \n", "ENSMUSG00000102851 0.779332 0.748751 \n", "ENSMUSG00000103377 0.814532 0.794882 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000522 -0.000493 \n", "ENSMUSG00000094649 -0.000522 -0.000493 \n", "ENSMUSG00000069475 -0.000522 -0.000493 \n", "ENSMUSG00000059326 -0.000491 -0.000612 \n", "ENSMUSG00000095993 -0.000655 -0.000598 \n", "\n", " ENSMUSG00000103201 ENSMUSG00000103147 ... \\\n", "ENSMUSG00000102693 0.416863 0.412963 ... \n", "ENSMUSG00000064842 0.425240 0.420280 ... \n", "ENSMUSG00000051951 0.708755 0.714727 ... \n", "ENSMUSG00000102851 0.741094 0.729829 ... \n", "ENSMUSG00000103377 0.845449 0.831854 ... \n", "... ... ... ... \n", "ENSMUSG00000070263 -0.000439 -0.000602 ... \n", "ENSMUSG00000094649 -0.000439 -0.000602 ... \n", "ENSMUSG00000069475 -0.000439 -0.000602 ... \n", "ENSMUSG00000059326 -0.000632 -0.000558 ... \n", "ENSMUSG00000095993 -0.000574 -0.000606 ... \n", "\n", " ENSMUSG00000024997 ENSMUSG00000003228 \\\n", "ENSMUSG00000102693 -0.001455 -0.001262 \n", "ENSMUSG00000064842 -0.001193 -0.001049 \n", "ENSMUSG00000051951 -0.001094 -0.000870 \n", "ENSMUSG00000102851 -0.001573 -0.001405 \n", "ENSMUSG00000103377 -0.001317 -0.001101 \n", "... ... ... \n", "ENSMUSG00000070263 0.614652 0.644882 \n", "ENSMUSG00000094649 0.614652 0.644882 \n", "ENSMUSG00000069475 0.614652 0.644882 \n", "ENSMUSG00000059326 0.545570 0.623842 \n", "ENSMUSG00000095993 0.427523 0.484523 \n", "\n", " ENSMUSG00000096578 ENSMUSG00000088894 \\\n", "ENSMUSG00000102693 -0.001243 -0.001231 \n", "ENSMUSG00000064842 -0.000989 -0.001026 \n", "ENSMUSG00000051951 -0.000900 -0.000883 \n", "ENSMUSG00000102851 -0.001275 -0.001307 \n", "ENSMUSG00000103377 -0.000989 -0.000979 \n", "... ... ... \n", "ENSMUSG00000070263 0.614007 0.669702 \n", "ENSMUSG00000094649 0.614007 0.669702 \n", "ENSMUSG00000069475 0.614007 0.669702 \n", "ENSMUSG00000059326 0.515780 0.543718 \n", "ENSMUSG00000095993 0.395956 0.413000 \n", "\n", " ENSMUSG00000074733 ENSMUSG00000070263 \\\n", "ENSMUSG00000102693 -0.001103 -0.000789 \n", "ENSMUSG00000064842 -0.000567 -0.000496 \n", "ENSMUSG00000051951 -0.000497 -0.000611 \n", "ENSMUSG00000102851 -0.000594 -0.000760 \n", "ENSMUSG00000103377 -0.000999 -0.000678 \n", "... ... ... \n", "ENSMUSG00000070263 0.522178 1.000000 \n", "ENSMUSG00000094649 0.522178 1.000000 \n", "ENSMUSG00000069475 0.522178 1.000000 \n", "ENSMUSG00000059326 0.797447 0.537342 \n", "ENSMUSG00000095993 0.581145 0.626563 \n", "\n", " ENSMUSG00000094649 ENSMUSG00000069475 \\\n", "ENSMUSG00000102693 -0.000789 -0.000789 \n", "ENSMUSG00000064842 -0.000496 -0.000496 \n", "ENSMUSG00000051951 -0.000611 -0.000611 \n", "ENSMUSG00000102851 -0.000760 -0.000760 \n", "ENSMUSG00000103377 -0.000678 -0.000678 \n", "... ... ... \n", "ENSMUSG00000070263 1.000000 1.000000 \n", "ENSMUSG00000094649 1.000000 1.000000 \n", "ENSMUSG00000069475 1.000000 1.000000 \n", "ENSMUSG00000059326 0.537342 0.537342 \n", "ENSMUSG00000095993 0.626563 0.626563 \n", "\n", " ENSMUSG00000059326 ENSMUSG00000095993 \n", "ENSMUSG00000102693 -0.000853 -0.000940 \n", "ENSMUSG00000064842 -0.000529 -0.000626 \n", "ENSMUSG00000051951 -0.000707 -0.000704 \n", "ENSMUSG00000102851 -0.001115 -0.001000 \n", "ENSMUSG00000103377 -0.000690 -0.000806 \n", "... ... ... \n", "ENSMUSG00000070263 0.537342 0.626563 \n", "ENSMUSG00000094649 0.537342 0.626563 \n", "ENSMUSG00000069475 0.537342 0.626563 \n", "ENSMUSG00000059326 1.000000 0.700237 \n", "ENSMUSG00000095993 0.700237 1.000000 \n", "\n", "[50225 rows x 50225 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pcc_sim" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "SRP_name='aggregates'\n", "resolution='40kbp_raw'\n", "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_human/aggregates/100kbp_raw/max/pcc/0/all_bins/KR_KR/hic_gene_corr_gw.h5'\n", "\n", "pcc_sim = hm.hiCMatrix(exp_file_path)\n", "all_pcc_genes = [x[3].decode() for x in pcc_sim.cut_intervals]\n", "pcc_sim_matrix_human = pcc_sim.matrix.toarray()\n", "df_pcc_sim_human = pd.DataFrame(pcc_sim_matrix_human, index=all_pcc_genes, columns = all_pcc_genes)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ENSG00000119946 - neuron CNNM1 10 \n", "CNR1 - ENSG00000166589 16 \n", "CNTN1 - ENSG00000018236 12 \n", "\n", "epithilal cells\n", "RND3 - ENSG00000115963 2\n", "ASS1 - ENSG00000130707 9 \n", "KRT5 - ENSG00000186081 12 \n", "\n" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-0.0009672968385351642" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pcc_sim_human['ENSG00000130707']['ENSG00000115963']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "#import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "ax = sns.histplot(df_pcc_sim_human)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "\n", "df_jac_corr = pd.DataFrame(jac_sim.matrix.toarray() , index=all_genes, columns = all_genes)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ENSMUSG00000102693ENSMUSG00000064842ENSMUSG00000051951ENSMUSG00000102851ENSMUSG00000103377ENSMUSG00000104017ENSMUSG00000103025ENSMUSG00000089699ENSMUSG00000103201ENSMUSG00000103147...ENSMUSG00000024997ENSMUSG00000003228ENSMUSG00000096578ENSMUSG00000088894ENSMUSG00000074733ENSMUSG00000070263ENSMUSG00000094649ENSMUSG00000069475ENSMUSG00000059326ENSMUSG00000095993
ENSMUSG000001026931.0000000.5712570.5055230.6388820.5372950.5372950.4429600.4276620.4168630.412963...-0.001455-0.001262-0.001243-0.001231-0.001103-0.000789-0.000789-0.000789-0.000853-0.000940
ENSMUSG000000648420.5712571.0000000.5197880.6719240.5545780.5545780.4557760.4390550.4252400.420280...-0.001193-0.001049-0.000989-0.001026-0.000567-0.000496-0.000496-0.000496-0.000529-0.000626
ENSMUSG000000519510.5055230.5197881.0000000.6117460.6127940.6127940.7104650.7343280.7087550.714727...-0.001094-0.000870-0.000900-0.000883-0.000497-0.000611-0.000611-0.000611-0.000707-0.000704
ENSMUSG000001028510.6388820.6719240.6117461.0000000.8673080.8673080.7793320.7487510.7410940.729829...-0.001573-0.001405-0.001275-0.001307-0.000594-0.000760-0.000760-0.000760-0.001115-0.001000
ENSMUSG000001033770.5372950.5545780.6127940.8673081.0000001.0000000.8145320.7948820.8454490.831854...-0.001317-0.001101-0.000989-0.000979-0.000999-0.000678-0.000678-0.000678-0.000690-0.000806
..................................................................
ENSMUSG00000070263-0.000789-0.000496-0.000611-0.000760-0.000678-0.000678-0.000522-0.000493-0.000439-0.000602...0.6146520.6448820.6140070.6697020.5221781.0000001.0000001.0000000.5373420.626563
ENSMUSG00000094649-0.000789-0.000496-0.000611-0.000760-0.000678-0.000678-0.000522-0.000493-0.000439-0.000602...0.6146520.6448820.6140070.6697020.5221781.0000001.0000001.0000000.5373420.626563
ENSMUSG00000069475-0.000789-0.000496-0.000611-0.000760-0.000678-0.000678-0.000522-0.000493-0.000439-0.000602...0.6146520.6448820.6140070.6697020.5221781.0000001.0000001.0000000.5373420.626563
ENSMUSG00000059326-0.000853-0.000529-0.000707-0.001115-0.000690-0.000690-0.000491-0.000612-0.000632-0.000558...0.5455700.6238420.5157800.5437180.7974470.5373420.5373420.5373421.0000000.700237
ENSMUSG00000095993-0.000940-0.000626-0.000704-0.001000-0.000806-0.000806-0.000655-0.000598-0.000574-0.000606...0.4275230.4845230.3959560.4130000.5811450.6265630.6265630.6265630.7002371.000000
\n", "

50225 rows × 50225 columns

\n", "
" ], "text/plain": [ " ENSMUSG00000102693 ENSMUSG00000064842 \\\n", "ENSMUSG00000102693 1.000000 0.571257 \n", "ENSMUSG00000064842 0.571257 1.000000 \n", "ENSMUSG00000051951 0.505523 0.519788 \n", "ENSMUSG00000102851 0.638882 0.671924 \n", "ENSMUSG00000103377 0.537295 0.554578 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000789 -0.000496 \n", "ENSMUSG00000094649 -0.000789 -0.000496 \n", "ENSMUSG00000069475 -0.000789 -0.000496 \n", "ENSMUSG00000059326 -0.000853 -0.000529 \n", "ENSMUSG00000095993 -0.000940 -0.000626 \n", "\n", " ENSMUSG00000051951 ENSMUSG00000102851 \\\n", "ENSMUSG00000102693 0.505523 0.638882 \n", "ENSMUSG00000064842 0.519788 0.671924 \n", "ENSMUSG00000051951 1.000000 0.611746 \n", "ENSMUSG00000102851 0.611746 1.000000 \n", "ENSMUSG00000103377 0.612794 0.867308 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000611 -0.000760 \n", "ENSMUSG00000094649 -0.000611 -0.000760 \n", "ENSMUSG00000069475 -0.000611 -0.000760 \n", "ENSMUSG00000059326 -0.000707 -0.001115 \n", "ENSMUSG00000095993 -0.000704 -0.001000 \n", "\n", " ENSMUSG00000103377 ENSMUSG00000104017 \\\n", "ENSMUSG00000102693 0.537295 0.537295 \n", "ENSMUSG00000064842 0.554578 0.554578 \n", "ENSMUSG00000051951 0.612794 0.612794 \n", "ENSMUSG00000102851 0.867308 0.867308 \n", "ENSMUSG00000103377 1.000000 1.000000 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000678 -0.000678 \n", "ENSMUSG00000094649 -0.000678 -0.000678 \n", "ENSMUSG00000069475 -0.000678 -0.000678 \n", "ENSMUSG00000059326 -0.000690 -0.000690 \n", "ENSMUSG00000095993 -0.000806 -0.000806 \n", "\n", " ENSMUSG00000103025 ENSMUSG00000089699 \\\n", "ENSMUSG00000102693 0.442960 0.427662 \n", "ENSMUSG00000064842 0.455776 0.439055 \n", "ENSMUSG00000051951 0.710465 0.734328 \n", "ENSMUSG00000102851 0.779332 0.748751 \n", "ENSMUSG00000103377 0.814532 0.794882 \n", "... ... ... \n", "ENSMUSG00000070263 -0.000522 -0.000493 \n", "ENSMUSG00000094649 -0.000522 -0.000493 \n", "ENSMUSG00000069475 -0.000522 -0.000493 \n", "ENSMUSG00000059326 -0.000491 -0.000612 \n", "ENSMUSG00000095993 -0.000655 -0.000598 \n", "\n", " ENSMUSG00000103201 ENSMUSG00000103147 ... \\\n", "ENSMUSG00000102693 0.416863 0.412963 ... \n", "ENSMUSG00000064842 0.425240 0.420280 ... \n", "ENSMUSG00000051951 0.708755 0.714727 ... \n", "ENSMUSG00000102851 0.741094 0.729829 ... \n", "ENSMUSG00000103377 0.845449 0.831854 ... \n", "... ... ... ... \n", "ENSMUSG00000070263 -0.000439 -0.000602 ... \n", "ENSMUSG00000094649 -0.000439 -0.000602 ... \n", "ENSMUSG00000069475 -0.000439 -0.000602 ... \n", "ENSMUSG00000059326 -0.000632 -0.000558 ... \n", "ENSMUSG00000095993 -0.000574 -0.000606 ... \n", "\n", " ENSMUSG00000024997 ENSMUSG00000003228 \\\n", "ENSMUSG00000102693 -0.001455 -0.001262 \n", "ENSMUSG00000064842 -0.001193 -0.001049 \n", "ENSMUSG00000051951 -0.001094 -0.000870 \n", "ENSMUSG00000102851 -0.001573 -0.001405 \n", "ENSMUSG00000103377 -0.001317 -0.001101 \n", "... ... ... \n", "ENSMUSG00000070263 0.614652 0.644882 \n", "ENSMUSG00000094649 0.614652 0.644882 \n", "ENSMUSG00000069475 0.614652 0.644882 \n", "ENSMUSG00000059326 0.545570 0.623842 \n", "ENSMUSG00000095993 0.427523 0.484523 \n", "\n", " ENSMUSG00000096578 ENSMUSG00000088894 \\\n", "ENSMUSG00000102693 -0.001243 -0.001231 \n", "ENSMUSG00000064842 -0.000989 -0.001026 \n", "ENSMUSG00000051951 -0.000900 -0.000883 \n", "ENSMUSG00000102851 -0.001275 -0.001307 \n", "ENSMUSG00000103377 -0.000989 -0.000979 \n", "... ... ... \n", "ENSMUSG00000070263 0.614007 0.669702 \n", "ENSMUSG00000094649 0.614007 0.669702 \n", "ENSMUSG00000069475 0.614007 0.669702 \n", "ENSMUSG00000059326 0.515780 0.543718 \n", "ENSMUSG00000095993 0.395956 0.413000 \n", "\n", " ENSMUSG00000074733 ENSMUSG00000070263 \\\n", "ENSMUSG00000102693 -0.001103 -0.000789 \n", "ENSMUSG00000064842 -0.000567 -0.000496 \n", "ENSMUSG00000051951 -0.000497 -0.000611 \n", "ENSMUSG00000102851 -0.000594 -0.000760 \n", "ENSMUSG00000103377 -0.000999 -0.000678 \n", "... ... ... \n", "ENSMUSG00000070263 0.522178 1.000000 \n", "ENSMUSG00000094649 0.522178 1.000000 \n", "ENSMUSG00000069475 0.522178 1.000000 \n", "ENSMUSG00000059326 0.797447 0.537342 \n", "ENSMUSG00000095993 0.581145 0.626563 \n", "\n", " ENSMUSG00000094649 ENSMUSG00000069475 \\\n", "ENSMUSG00000102693 -0.000789 -0.000789 \n", "ENSMUSG00000064842 -0.000496 -0.000496 \n", "ENSMUSG00000051951 -0.000611 -0.000611 \n", "ENSMUSG00000102851 -0.000760 -0.000760 \n", "ENSMUSG00000103377 -0.000678 -0.000678 \n", "... ... ... \n", "ENSMUSG00000070263 1.000000 1.000000 \n", "ENSMUSG00000094649 1.000000 1.000000 \n", "ENSMUSG00000069475 1.000000 1.000000 \n", "ENSMUSG00000059326 0.537342 0.537342 \n", "ENSMUSG00000095993 0.626563 0.626563 \n", "\n", " ENSMUSG00000059326 ENSMUSG00000095993 \n", "ENSMUSG00000102693 -0.000853 -0.000940 \n", "ENSMUSG00000064842 -0.000529 -0.000626 \n", "ENSMUSG00000051951 -0.000707 -0.000704 \n", "ENSMUSG00000102851 -0.001115 -0.001000 \n", "ENSMUSG00000103377 -0.000690 -0.000806 \n", "... ... ... \n", "ENSMUSG00000070263 0.537342 0.626563 \n", "ENSMUSG00000094649 0.537342 0.626563 \n", "ENSMUSG00000069475 0.537342 0.626563 \n", "ENSMUSG00000059326 1.000000 0.700237 \n", "ENSMUSG00000095993 0.700237 1.000000 \n", "\n", "[50225 rows x 50225 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_jac_corr" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "marker_list = pd.read_csv('/grid/gillis/data/lohia/hi_c_data_processing/notebooks/class_level_markers.csv')\n", "marker_list['gene'] = marker_list['gene'].str.upper()\n", "df_ensg_name = pd.read_csv('/grid/gillis/data/lohia/hi_c_data_processing/genomes_jlee/mouse_geneid_symbol.txt',sep='\\t', names=['gene_id', 'gene'])\n", "df_ensg_name['gene'] = df_ensg_name['gene'].str.upper()\n", "marker_list = marker_list.merge(df_ensg_name, right_on='gene', left_on='gene') " ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
groupcell_typerankgenerecurrenceaurocfold_changefold_change_detectionexpressionprecision...population_sizen_datasetsscSSsnSSscCv2snCv2snCv3MscCv3snCv3Zgene_id
0allGABAergic1GAD170.941159116.9604729.289078820.4634860.659089...10207.0000007TrueTrueTrueTrueTrueTrueTrueENSMUSG00000070880
1allGABAergic2GAD270.928440139.81141513.987046659.1515660.730005...10207.0000007TrueTrueTrueTrueTrueTrueTrueENSMUSG00000026787
2allGABAergic3ERBB470.92144981.7173835.7364152257.1677530.514809...10207.0000007TrueTrueTrueTrueTrueTrueTrueENSMUSG00000062209
3allGABAergic4KCNIP170.91691932.25203810.796420588.5719930.687830...10207.0000007TrueTrueTrueTrueTrueTrueTrueENSMUSG00000053519
4allGABAergic5RBMS370.90209317.0388693.607831340.7017980.442861...10207.0000007TrueTrueTrueTrueTrueTrueTrueENSMUSG00000039607
..................................................................
294allNon-Neuronal96MAG60.648512165.82008116.747920456.9553700.370001...8908.8571437FalseTrueTrueTrueTrueTrueTrueENSMUSG00000036634
295allNon-Neuronal97FNBP160.6480285.5928860.955553454.9319250.063414...8908.8571437FalseTrueTrueTrueTrueTrueTrueENSMUSG00000075415
296allNon-Neuronal98FRMD4B60.6478059.0505472.335387192.1510030.124702...8908.8571437FalseTrueTrueTrueTrueTrueTrueENSMUSG00000030064
297allNon-Neuronal99PLLP60.64554061.88454812.642643190.0391320.314672...8908.8571437FalseTrueTrueTrueTrueTrueTrueENSMUSG00000031775
298allNon-Neuronal100PRR5L60.645414187.90618530.603359334.9964040.447383...8908.8571437FalseTrueTrueTrueTrueTrueTrueENSMUSG00000032841
\n", "

299 rows × 21 columns

\n", "
" ], "text/plain": [ " group cell_type rank gene recurrence auroc fold_change \\\n", "0 all GABAergic 1 GAD1 7 0.941159 116.960472 \n", "1 all GABAergic 2 GAD2 7 0.928440 139.811415 \n", "2 all GABAergic 3 ERBB4 7 0.921449 81.717383 \n", "3 all GABAergic 4 KCNIP1 7 0.916919 32.252038 \n", "4 all GABAergic 5 RBMS3 7 0.902093 17.038869 \n", ".. ... ... ... ... ... ... ... \n", "294 all Non-Neuronal 96 MAG 6 0.648512 165.820081 \n", "295 all Non-Neuronal 97 FNBP1 6 0.648028 5.592886 \n", "296 all Non-Neuronal 98 FRMD4B 6 0.647805 9.050547 \n", "297 all Non-Neuronal 99 PLLP 6 0.645540 61.884548 \n", "298 all Non-Neuronal 100 PRR5L 6 0.645414 187.906185 \n", "\n", " fold_change_detection expression precision ... population_size \\\n", "0 9.289078 820.463486 0.659089 ... 10207.000000 \n", "1 13.987046 659.151566 0.730005 ... 10207.000000 \n", "2 5.736415 2257.167753 0.514809 ... 10207.000000 \n", "3 10.796420 588.571993 0.687830 ... 10207.000000 \n", "4 3.607831 340.701798 0.442861 ... 10207.000000 \n", ".. ... ... ... ... ... \n", "294 16.747920 456.955370 0.370001 ... 8908.857143 \n", "295 0.955553 454.931925 0.063414 ... 8908.857143 \n", "296 2.335387 192.151003 0.124702 ... 8908.857143 \n", "297 12.642643 190.039132 0.314672 ... 8908.857143 \n", "298 30.603359 334.996404 0.447383 ... 8908.857143 \n", "\n", " n_datasets scSS snSS scCv2 snCv2 snCv3M scCv3 snCv3Z \\\n", "0 7 True True True True True True True \n", "1 7 True True True True True True True \n", "2 7 True True True True True True True \n", "3 7 True True True True True True True \n", "4 7 True True True True True True True \n", ".. ... ... ... ... ... ... ... ... \n", "294 7 False True True True True True True \n", "295 7 False True True True True True True \n", "296 7 False True True True True True True \n", "297 7 False True True True True True True \n", "298 7 False True True True True True True \n", "\n", " gene_id \n", "0 ENSMUSG00000070880 \n", "1 ENSMUSG00000026787 \n", "2 ENSMUSG00000062209 \n", "3 ENSMUSG00000053519 \n", "4 ENSMUSG00000039607 \n", ".. ... \n", "294 ENSMUSG00000036634 \n", "295 ENSMUSG00000075415 \n", "296 ENSMUSG00000030064 \n", "297 ENSMUSG00000031775 \n", "298 ENSMUSG00000032841 \n", "\n", "[299 rows x 21 columns]" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marker_list " ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "marker_list = marker_list.merge(df_ensg_name, right_on='gene', left_on='gene') " ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [ "marker_gene_list = marker_list[marker_list['cell_type']=='GABAergic']['gene_id'].to_list()" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [], "source": [ "go_table_marker = go_table[go_table.index.isin(marker_gene_list)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cell_typeAstroEndoL2/3 ITL4/5 ITL5 ITL5 PTL5/6 NPL6 CTL6 ITL6 IT Car3...Micro-PVMOligoPeriPvalbSMCSncgSstSst ChodlVLMCVip
gene_id
ENSMUSG00000000093NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaN96.0NaNNaNNaNNaNNaN
ENSMUSG00000000126NaNNaNNaN93.0NaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG0000000020280.0NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG00000000305NaNNaNNaNNaNNaNNaNNaNNaN72.0NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG00000000392NaNNaNNaNNaNNaNNaNNaNNaNNaN21.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
ENSMUSG00000116022NaNNaNNaNNaNNaNNaNNaNNaNNaN95.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG00000116076NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG00000116258NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...67.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG00000116470NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...27.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
ENSMUSG00000116510NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaN49.0NaN
\n", "

1748 rows × 23 columns

\n", "
" ], "text/plain": [ "cell_type Astro Endo L2/3 IT L4/5 IT L5 IT L5 PT L5/6 NP \\\n", "gene_id \n", "ENSMUSG00000000093 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000000126 NaN NaN NaN 93.0 NaN NaN NaN \n", "ENSMUSG00000000202 80.0 NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000000305 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000000392 NaN NaN NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "ENSMUSG00000116022 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116076 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116258 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116470 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116510 NaN NaN NaN NaN NaN NaN NaN \n", "\n", "cell_type L6 CT L6 IT L6 IT Car3 ... Micro-PVM Oligo Peri \\\n", "gene_id ... \n", "ENSMUSG00000000093 NaN NaN NaN ... NaN NaN NaN \n", "ENSMUSG00000000126 NaN NaN NaN ... NaN NaN NaN \n", "ENSMUSG00000000202 NaN NaN NaN ... NaN NaN NaN \n", "ENSMUSG00000000305 NaN 72.0 NaN ... NaN NaN NaN \n", "ENSMUSG00000000392 NaN NaN 21.0 ... NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "ENSMUSG00000116022 NaN NaN 95.0 ... NaN NaN NaN \n", "ENSMUSG00000116076 NaN NaN NaN ... NaN NaN NaN \n", "ENSMUSG00000116258 NaN NaN NaN ... 67.0 NaN NaN \n", "ENSMUSG00000116470 NaN NaN NaN ... 27.0 NaN NaN \n", "ENSMUSG00000116510 NaN NaN NaN ... NaN NaN NaN \n", "\n", "cell_type Pvalb SMC Sncg Sst Sst Chodl VLMC Vip \n", "gene_id \n", "ENSMUSG00000000093 NaN 96.0 NaN NaN NaN NaN NaN \n", "ENSMUSG00000000126 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000000202 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000000305 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000000392 NaN NaN NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "ENSMUSG00000116022 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116076 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116258 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116470 NaN NaN NaN NaN NaN NaN NaN \n", "ENSMUSG00000116510 NaN NaN NaN NaN NaN 49.0 NaN \n", "\n", "[1748 rows x 23 columns]" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marker_list.pivot(index='gene_id', columns='cell_type', values='rank')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "marker_table = marker_list.pivot(index='gene_id', columns='cell_type', values='rank')" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "marker_table.fillna(0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [], "source": [ "marker_list['gene_2'] = marker_list['gene_id']" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cell_typeAstroEndoL2/3 ITL4/5 ITL5 ITL5 PTL5/6 NPL6 CTL6 ITL6 IT Car3...Micro-PVMOligoPeriPvalbSMCSncgSstSst ChodlVLMCVip
gene_id
ENSMUSG000000000930.00.00.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
ENSMUSG000000001260.00.00.01.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000002021.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000003050.00.00.00.00.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000003920.00.00.00.00.00.00.00.00.01.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
ENSMUSG000001160220.00.00.00.00.00.00.00.00.01.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000001160760.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000001162580.00.00.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
ENSMUSG000001164700.00.00.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
ENSMUSG000001165100.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
\n", "

1748 rows × 23 columns

\n", "
" ], "text/plain": [ "cell_type Astro Endo L2/3 IT L4/5 IT L5 IT L5 PT L5/6 NP \\\n", "gene_id \n", "ENSMUSG00000000093 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000126 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", "ENSMUSG00000000202 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000305 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... \n", "ENSMUSG00000116022 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116076 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116258 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116470 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116510 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", "cell_type L6 CT L6 IT L6 IT Car3 ... Micro-PVM Oligo Peri \\\n", "gene_id ... \n", "ENSMUSG00000000093 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "ENSMUSG00000000126 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "ENSMUSG00000000202 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "ENSMUSG00000000305 0.0 1.0 0.0 ... 0.0 0.0 0.0 \n", "ENSMUSG00000000392 0.0 0.0 1.0 ... 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... \n", "ENSMUSG00000116022 0.0 0.0 1.0 ... 0.0 0.0 0.0 \n", "ENSMUSG00000116076 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "ENSMUSG00000116258 0.0 0.0 0.0 ... 1.0 0.0 0.0 \n", "ENSMUSG00000116470 0.0 0.0 0.0 ... 1.0 0.0 0.0 \n", "ENSMUSG00000116510 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "\n", "cell_type Pvalb SMC Sncg Sst Sst Chodl VLMC Vip \n", "gene_id \n", "ENSMUSG00000000093 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000126 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000202 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000305 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000000392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... \n", "ENSMUSG00000116022 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116076 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116258 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116470 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000116510 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", "\n", "[1748 rows x 23 columns]" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marker_table" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4.0" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marker_table.sum(axis=1).max()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "marker_table[marker_table != 0] = 1" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1686, 1686)\n", "(1686, 23)\n", "0.9434473154881634\n", "0.00015267734141610756\n" ] } ], "source": [ "df_2d_marker = run_egad(marker_table, df_jac_sim)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'seaborn'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mseaborn\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'seaborn'" ] } ], "source": [ "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "#plt.plot(data=df_2d_marker, x='AUC', y='DEGREE_NULL_AUC')\n", "plt.scatter(df_2d_marker['AUC'], df_2d_marker['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d_marker['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d_marker['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AUCAVG_NODE_DEGREEDEGREE_NULL_AUCP_Value
cell_type
Astro0.5032101.9137910.4787750.391881
Endo0.5158182.1708900.5741700.296932
L2/3 IT0.5215181.7746650.4246000.161082
L4/5 IT0.5440231.9449300.5037520.068288
L5 IT0.6236931.9177190.4886620.000173
L5 PT0.5053242.0077710.5220820.010282
L5/6 NP0.4814461.9582540.4938980.193746
L6 CT0.5301661.9278220.4867920.164214
L6 IT0.5278011.9742290.5139810.172077
L6 IT Car30.5946221.7678280.4297820.000774
L6b0.5942252.0993370.5364060.001011
Lamp50.5446321.8421100.4504680.014267
Meis20.5728951.7886340.4248240.007227
Micro-PVM0.4834502.1464250.5585340.173389
Oligo0.4477672.0501760.5274280.041854
Peri0.4826232.1311280.5619210.069004
Pvalb0.4915731.9973020.5092450.350954
SMC0.5371132.0713640.5360030.105072
Sncg0.5385481.8338580.4625460.098128
Sst0.6372081.8523870.4471680.000003
Sst Chodl0.5717571.9042020.4769540.006663
VLMC0.5148681.8189410.4438880.314480
Vip0.5364601.8862730.4847170.116352
\n", "
" ], "text/plain": [ " AUC AVG_NODE_DEGREE DEGREE_NULL_AUC P_Value\n", "cell_type \n", "Astro 0.503210 1.913791 0.478775 0.391881\n", "Endo 0.515818 2.170890 0.574170 0.296932\n", "L2/3 IT 0.521518 1.774665 0.424600 0.161082\n", "L4/5 IT 0.544023 1.944930 0.503752 0.068288\n", "L5 IT 0.623693 1.917719 0.488662 0.000173\n", "L5 PT 0.505324 2.007771 0.522082 0.010282\n", "L5/6 NP 0.481446 1.958254 0.493898 0.193746\n", "L6 CT 0.530166 1.927822 0.486792 0.164214\n", "L6 IT 0.527801 1.974229 0.513981 0.172077\n", "L6 IT Car3 0.594622 1.767828 0.429782 0.000774\n", "L6b 0.594225 2.099337 0.536406 0.001011\n", "Lamp5 0.544632 1.842110 0.450468 0.014267\n", "Meis2 0.572895 1.788634 0.424824 0.007227\n", "Micro-PVM 0.483450 2.146425 0.558534 0.173389\n", "Oligo 0.447767 2.050176 0.527428 0.041854\n", "Peri 0.482623 2.131128 0.561921 0.069004\n", "Pvalb 0.491573 1.997302 0.509245 0.350954\n", "SMC 0.537113 2.071364 0.536003 0.105072\n", "Sncg 0.538548 1.833858 0.462546 0.098128\n", "Sst 0.637208 1.852387 0.447168 0.000003\n", "Sst Chodl 0.571757 1.904202 0.476954 0.006663\n", "VLMC 0.514868 1.818941 0.443888 0.314480\n", "Vip 0.536460 1.886273 0.484717 0.116352" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2d_pcc" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ENSMUSG00000102693ENSMUSG00000064842ENSMUSG00000051951ENSMUSG00000102851ENSMUSG00000103377ENSMUSG00000104017ENSMUSG00000103025ENSMUSG00000089699ENSMUSG00000103201ENSMUSG00000103147...non-genenon-genenon-genenon-genenon-genenon-genenon-genenon-genenon-genenon-gene
ENSMUSG000000259020.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000024590.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000259050.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000337740.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000337400.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.00.00.00.00.00.00.00.00.0
..................................................................
ENSMUSG000000337170.3661660.2296271.1789690.0000000.9186160.0000000.1841680.8291700.4955070.624326...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000250851.3655591.4785652.4912481.0604221.5030661.0248281.0157932.4912482.2438461.349966...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000250891.2693981.5891142.3596331.1413122.1941951.0221591.3633942.0281712.0281711.242715...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000870950.5746530.6234931.8370310.8522650.8294311.3940880.6235781.0440950.5592500.847598...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000439690.0000000.0000001.6847550.2361730.8294311.3797400.9153740.9358360.5592500.258592...0.00.00.00.00.00.00.00.00.00.0
\n", "

1686 rows × 156467 columns

\n", "
" ], "text/plain": [ " ENSMUSG00000102693 ENSMUSG00000064842 \\\n", "ENSMUSG00000025902 0.000000 0.000000 \n", "ENSMUSG00000002459 0.000000 0.000000 \n", "ENSMUSG00000025905 0.000000 0.000000 \n", "ENSMUSG00000033774 0.000000 0.000000 \n", "ENSMUSG00000033740 0.000000 0.000000 \n", "... ... ... \n", "ENSMUSG00000033717 0.366166 0.229627 \n", "ENSMUSG00000025085 1.365559 1.478565 \n", "ENSMUSG00000025089 1.269398 1.589114 \n", "ENSMUSG00000087095 0.574653 0.623493 \n", "ENSMUSG00000043969 0.000000 0.000000 \n", "\n", " ENSMUSG00000051951 ENSMUSG00000102851 \\\n", "ENSMUSG00000025902 0.000000 0.000000 \n", "ENSMUSG00000002459 0.000000 0.000000 \n", "ENSMUSG00000025905 0.000000 0.000000 \n", "ENSMUSG00000033774 0.000000 0.000000 \n", "ENSMUSG00000033740 0.000000 0.000000 \n", "... ... ... \n", "ENSMUSG00000033717 1.178969 0.000000 \n", "ENSMUSG00000025085 2.491248 1.060422 \n", "ENSMUSG00000025089 2.359633 1.141312 \n", "ENSMUSG00000087095 1.837031 0.852265 \n", "ENSMUSG00000043969 1.684755 0.236173 \n", "\n", " ENSMUSG00000103377 ENSMUSG00000104017 \\\n", "ENSMUSG00000025902 0.000000 0.000000 \n", "ENSMUSG00000002459 0.000000 0.000000 \n", "ENSMUSG00000025905 0.000000 0.000000 \n", "ENSMUSG00000033774 0.000000 0.000000 \n", "ENSMUSG00000033740 0.000000 0.000000 \n", "... ... ... \n", "ENSMUSG00000033717 0.918616 0.000000 \n", "ENSMUSG00000025085 1.503066 1.024828 \n", "ENSMUSG00000025089 2.194195 1.022159 \n", "ENSMUSG00000087095 0.829431 1.394088 \n", "ENSMUSG00000043969 0.829431 1.379740 \n", "\n", " ENSMUSG00000103025 ENSMUSG00000089699 \\\n", "ENSMUSG00000025902 0.000000 0.000000 \n", "ENSMUSG00000002459 0.000000 0.000000 \n", "ENSMUSG00000025905 0.000000 0.000000 \n", "ENSMUSG00000033774 0.000000 0.000000 \n", "ENSMUSG00000033740 0.000000 0.000000 \n", "... ... ... \n", "ENSMUSG00000033717 0.184168 0.829170 \n", "ENSMUSG00000025085 1.015793 2.491248 \n", "ENSMUSG00000025089 1.363394 2.028171 \n", "ENSMUSG00000087095 0.623578 1.044095 \n", "ENSMUSG00000043969 0.915374 0.935836 \n", "\n", " ENSMUSG00000103201 ENSMUSG00000103147 ... non-gene \\\n", "ENSMUSG00000025902 0.000000 0.000000 ... 0.0 \n", "ENSMUSG00000002459 0.000000 0.000000 ... 0.0 \n", "ENSMUSG00000025905 0.000000 0.000000 ... 0.0 \n", "ENSMUSG00000033774 0.000000 0.000000 ... 0.0 \n", "ENSMUSG00000033740 0.000000 0.000000 ... 0.0 \n", "... ... ... ... ... \n", "ENSMUSG00000033717 0.495507 0.624326 ... 0.0 \n", "ENSMUSG00000025085 2.243846 1.349966 ... 0.0 \n", "ENSMUSG00000025089 2.028171 1.242715 ... 0.0 \n", "ENSMUSG00000087095 0.559250 0.847598 ... 0.0 \n", "ENSMUSG00000043969 0.559250 0.258592 ... 0.0 \n", "\n", " non-gene non-gene non-gene non-gene non-gene \\\n", "ENSMUSG00000025902 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000002459 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000025905 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000033774 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000033740 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... \n", "ENSMUSG00000033717 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000025085 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000025089 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000087095 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000043969 0.0 0.0 0.0 0.0 0.0 \n", "\n", " non-gene non-gene non-gene non-gene \n", "ENSMUSG00000025902 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000002459 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000025905 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000033774 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000033740 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "ENSMUSG00000033717 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000025085 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000025089 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000087095 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000043969 0.0 0.0 0.0 0.0 \n", "\n", "[1686 rows x 156467 columns]" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ENSMUSG00000102693ENSMUSG00000064842ENSMUSG00000051951ENSMUSG00000102851ENSMUSG00000103377ENSMUSG00000104017ENSMUSG00000103025ENSMUSG00000089699ENSMUSG00000103201ENSMUSG00000103147...non-genenon-genenon-genenon-genenon-genenon-genenon-genenon-genenon-genenon-gene
ENSMUSG000001026930.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000648420.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000000519510.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000001028510.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
ENSMUSG000001033770.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
non-gene0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
non-gene0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
non-gene0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
non-gene0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
non-gene0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", "

156467 rows × 156467 columns

\n", "
" ], "text/plain": [ " ENSMUSG00000102693 ENSMUSG00000064842 \\\n", "ENSMUSG00000102693 0.0 0.0 \n", "ENSMUSG00000064842 0.0 0.0 \n", "ENSMUSG00000051951 0.0 0.0 \n", "ENSMUSG00000102851 0.0 0.0 \n", "ENSMUSG00000103377 0.0 0.0 \n", "... ... ... \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "\n", " ENSMUSG00000051951 ENSMUSG00000102851 \\\n", "ENSMUSG00000102693 0.0 0.0 \n", "ENSMUSG00000064842 0.0 0.0 \n", "ENSMUSG00000051951 0.0 0.0 \n", "ENSMUSG00000102851 0.0 0.0 \n", "ENSMUSG00000103377 0.0 0.0 \n", "... ... ... \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "\n", " ENSMUSG00000103377 ENSMUSG00000104017 \\\n", "ENSMUSG00000102693 0.0 0.0 \n", "ENSMUSG00000064842 0.0 0.0 \n", "ENSMUSG00000051951 0.0 0.0 \n", "ENSMUSG00000102851 0.0 0.0 \n", "ENSMUSG00000103377 0.0 0.0 \n", "... ... ... \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "\n", " ENSMUSG00000103025 ENSMUSG00000089699 \\\n", "ENSMUSG00000102693 0.0 0.0 \n", "ENSMUSG00000064842 0.0 0.0 \n", "ENSMUSG00000051951 0.0 0.0 \n", "ENSMUSG00000102851 0.0 0.0 \n", "ENSMUSG00000103377 0.0 0.0 \n", "... ... ... \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "non-gene 0.0 0.0 \n", "\n", " ENSMUSG00000103201 ENSMUSG00000103147 ... non-gene \\\n", "ENSMUSG00000102693 0.0 0.0 ... 0.0 \n", "ENSMUSG00000064842 0.0 0.0 ... 0.0 \n", "ENSMUSG00000051951 0.0 0.0 ... 0.0 \n", "ENSMUSG00000102851 0.0 0.0 ... 0.0 \n", "ENSMUSG00000103377 0.0 0.0 ... 0.0 \n", "... ... ... ... ... \n", "non-gene 0.0 0.0 ... 0.0 \n", "non-gene 0.0 0.0 ... 0.0 \n", "non-gene 0.0 0.0 ... 0.0 \n", "non-gene 0.0 0.0 ... 0.0 \n", "non-gene 0.0 0.0 ... 0.0 \n", "\n", " non-gene non-gene non-gene non-gene non-gene \\\n", "ENSMUSG00000102693 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000064842 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000051951 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000102851 0.0 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000103377 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... \n", "non-gene 0.0 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 0.0 \n", "\n", " non-gene non-gene non-gene non-gene \n", "ENSMUSG00000102693 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000064842 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000051951 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000102851 0.0 0.0 0.0 0.0 \n", "ENSMUSG00000103377 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "non-gene 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 \n", "non-gene 0.0 0.0 0.0 0.0 \n", "\n", "[156467 rows x 156467 columns]" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_jac_sim" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "df_jac_sim_subset_matrix = df_jac_sim[df_jac_sim.index.isin(marker_table.index)]\n", "df_jac_sim_subset_genes = df_jac_sim_subset_matrix.index.tolist()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import h5py\n", "import logging\n", "import numpy as np\n", "import pandas as pd\n", "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "from scipy.sparse import csr_matrix, dia_matrix, triu, tril, coo_matrix\n", "import scipy.stats as stats\n", "import os.path\n", "\n", " \n", "def calc_auc(exp_file, jac_sim, outfile, auc_type, given_top_percentile=1 ,jac_sim_species='a', exp_file_species='a', ortho_genes=\"ortho_genes\"):\n", " \n", " exp_genes_all = [x[3].decode() for x in exp_file.cut_intervals]\n", " exp_matrix = exp_file.matrix\n", " \n", " \n", " all_genes_all = [x[3].decode() for x in jac_sim.cut_intervals]\n", " \n", " entire_matrix = jac_sim.matrix\n", " entire_matrix = entire_matrix.astype('float32')\n", " \n", " \n", " \n", " if jac_sim_species != exp_file_species:\n", " df_1_1 = pd.read_csv(ortho_genes)\n", " df_1_1 = df_1_1.drop_duplicates(subset=[jac_sim_species], keep=False)\n", " df_1_1 = df_1_1.drop_duplicates(subset=[exp_file_species], keep=False)\n", " jac_sim_species_ortho_subset = list (set(all_genes_all) & set(df_1_1[jac_sim_species].tolist()))\n", " df_1_1 = df_1_1[df_1_1[jac_sim_species].isin(jac_sim_species_ortho_subset)]\n", " exp_file_species_ortho_subset = list (set(exp_genes_all) & set(df_1_1[exp_file_species].tolist()))\n", " df_1_1 = df_1_1[df_1_1[exp_file_species].isin(exp_file_species_ortho_subset)]\n", " subset_genes_exp_file = df_1_1[exp_file_species].tolist()\n", " subset_genes_jac_sim = df_1_1[jac_sim_species].tolist()\n", " \n", " else:\n", " subset_genes_exp_file = list (set(all_genes_all) & set(exp_genes_all))\n", " subset_genes_jac_sim = list (set(all_genes_all) & set(exp_genes_all))\n", " \n", " exp_genes_index_list = [i for i, value in enumerate(exp_genes_all) if value in subset_genes_exp_file]\n", " exp_genes = [value for i, value in enumerate(exp_genes_all) if value in subset_genes_exp_file]\n", " exp_matrix = csr_matrix(exp_matrix)[exp_genes_index_list, :][:, exp_genes_index_list]\n", " \n", " updated_cut_intervals = [exp_file.cut_intervals[x] for x in exp_genes_index_list]\n", " exp_file.update_matrix(exp_matrix, updated_cut_intervals)\n", " \n", " exp_matrix = exp_matrix.toarray()\n", " \n", " \n", "\n", " rank_abs = lambda x: stats.rankdata(x, method='ordinal')\n", " exp_matrix = np.apply_along_axis(rank_abs, 1, exp_matrix)\n", " \n", " exp_matrix = exp_matrix * 1.0\n", " \n", " exp_matrix = exp_matrix.astype('float32')\n", " \n", " \n", " \n", " \n", " chr_list_2 = exp_file.getChrNames()\n", "\n", " \n", " all_gene_index_list = [i for i, value in enumerate(all_genes_all) if value in subset_genes_jac_sim]\n", " all_genes = [value for i, value in enumerate(all_genes_all) if value in subset_genes_jac_sim]\n", " \n", " entire_matrix = csr_matrix(entire_matrix)[all_gene_index_list, :][:, all_gene_index_list]\n", " \n", " updated_cut_intervals = [jac_sim.cut_intervals[x] for x in all_gene_index_list]\n", " jac_sim.update_matrix(entire_matrix, updated_cut_intervals)\n", " \n", " entire_matrix = entire_matrix.toarray()\n", " entire_matrix = entire_matrix.astype('float32')\n", " \n", " chr_list_1 = jac_sim.getChrNames()\n", "\n", " max_value = entire_matrix.max() + 1 + exp_matrix.max()\n", " max_value = max_value.astype('float32')\n", "\n", " if auc_type == 'inter_only':\n", " \n", " for chrom in chr_list_1:\n", "\n", " start, end = jac_sim.getChrBinRange(chrom)\n", " \n", " entire_matrix[start: end, start: end] = max_value \n", " \n", "\n", " \n", "\n", " \n", " \n", " elif auc_type == 'intra_only':\n", "\n", "\n", " for chrom in chr_list_2:\n", "\n", " start, end = exp_file.getChrBinRange(chrom)\n", " \n", " coordinate_matrix[start: end, start: end] = max_value\n", " \n", " inter_coodinates = np.where(coordinate_matrix != max_value)\n", " x_inter = inter_coodinates[0].tolist()\n", " y_inter = inter_coodinates[1].tolist()\n", " exp_matrix[x_inter, y_inter] = max_value\n", " np.fill_diagonal(exp_matrix , max_value)\n", " #exp_matrix.setdiag(max_value, k=0)\n", "\n", "\n", " else:\n", "\n", " np.fill_diagonal(entire_matrix , max_value)\n", "\n", " np.fill_diagonal(exp_matrix , max_value)\n", " #exp_matrix.setdiag(max_value, k=0)\n", " #entire_matrix.setdiag(max_value, k=0)\n", "\n", "\n", " df_jac = pd.DataFrame(entire_matrix , index=all_genes, columns = all_genes)\n", " df_exp = pd.DataFrame(exp_matrix , index=exp_genes, columns = exp_genes)\n", "\n", "\n", " df_jac_subset = df_jac.loc[subset_genes_jac_sim, subset_genes_jac_sim]\n", "\n", "\n", " coodinates_1 = np.where(df_jac_subset == max_value)\n", "\n", "\n", " x_coo = coodinates_1[0].tolist()\n", " y_coo = coodinates_1[1].tolist() \n", "\n", " df_jac_subset.to_numpy()[x_coo, y_coo] = max_value\n", "\n", "\n", " \n", " rank_abs = lambda x: stats.rankdata(x)\n", " predicts2 = np.apply_along_axis(rank_abs, 1, df_jac_subset)\n", " predicts2 = predicts2.astype('float')\n", " \n", " predicts2[x_coo, y_coo] = np.nan\n", " \n", " top_percentile = 100 - given_top_percentile\n", " \n", " exp_upper = lambda x: np.where(x < np.percentile(x[~np.isnan(x)] , top_percentile), 0, 1.0) if x[~np.isnan(x)].shape[0] > 0 else np.zeros(x.shape[0])\n", " tp = np.apply_along_axis(exp_upper, 1, df_exp_subset)\n", " tp[x_coo, y_coo] = np.nan\n", " exp_upper = lambda x: np.where(x >= np.percentile(x[~np.isnan(x)] , top_percentile), 0, 1.0) if x[~np.isnan(x)].shape[0] > 0 else np.zeros(x.shape[0])\n", " tn = np.apply_along_axis(exp_upper, 1, df_exp_subset)\n", " tn[x_coo, y_coo] = np.nan\n", " pos_rank_sum = tp * predicts2\n", " auc_array = (((np.nansum(pos_rank_sum, axis=1) / (np.nansum(tp, axis=1)) )- (np.nansum(tp, axis=1) + 1)/2)) / np.nansum(tn, axis=1)\n", " \n", " df = pd.DataFrame(columns = ['gene_id_jac_sim', 'chrom_jac_sim', 'gene_id_exp_file', 'chrom_exp_file', 'auc']) \n", " df['auc'] = auc_array.tolist()\n", " df['gene_id_exp_file'] = subset_genes_exp_file\n", " df['gene_id_jac_sim'] = subset_genes_jac_sim\n", " \n", " gene_list = [x[3].decode() for x in jac_sim.cut_intervals]\n", " _chrom_list = [x[0] for x in jac_sim.cut_intervals]\n", " df['chrom_jac_sim'] = [_chrom_list[gene_list.index(x)] for x in df['gene_id_jac_sim']]\n", "\n", " gene_list = [x[3].decode() for x in exp_file.cut_intervals]\n", " _chrom_list = [x[0] for x in exp_file.cut_intervals]\n", " df['chrom_exp_file'] = [_chrom_list[gene_list.index(x)] for x in df['gene_id_exp_file']]\n", " df.to_csv(outfile, sep='\\t', index=False)\n", " \n", " \n", "if __name__ == '__main__':\n", "\n", " \n", " import argparse\n", " parser = argparse.ArgumentParser()\n", "\n", " parser.add_argument('--outfile', default='tss',\n", " help='tss or tss_max')\n", " parser.add_argument('--exp_file_path', default='tss',\n", " help='tss or tss_max')\n", " parser.add_argument('--jac_sim_path', default='tss',\n", " help='tss or tss_max')\n", " parser.add_argument('--auc_type', default='intra_only',\n", " help='tss or tss_max')\n", " parser.add_argument('--jac_sim_species', default='a',\n", " help='tss or tss_max')\n", " parser.add_argument('--exp_file_species', default='a',\n", " help='tss or tss_max')\n", " parser.add_argument('--ortho_genes', default='b',\n", " help='tss or tss_max')\n", " parser.add_argument('--given_top_percentile', default=1,\n", " help='tss or tss_max')\n", " \n", "\n", "\n", " args = parser.parse_args()\n", " \n", " if not os.path.isfile(args.outfile):\n", "\n", " \n", " jac_sim = hm.hiCMatrix(args.jac_sim_path)\n", " exp_file = hm.hiCMatrix(args.exp_file_path)\n", "\n", " print (args.jac_sim_path)\n", " print (args.exp_file_path)\n", "\n", " calc_auc(exp_file, jac_sim, args.outfile, args.auc_type, given_top_percentile=int(args.given_top_percentile), jac_sim_species=args.jac_sim_species, exp_file_species=args.exp_file_species, ortho_genes=args.ortho_genes)\n" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "cannot import name 'UMAP' from 'umap' (/grid/gillis/home/lohia/.conda/envs/hicexplorer/lib/python3.8/site-packages/umap/__init__.py)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mumap\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mUMAP\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;31m#import plotly.express as px\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mreducer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'random'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mImportError\u001b[0m: cannot import name 'UMAP' from 'umap' (/grid/gillis/home/lohia/.conda/envs/hicexplorer/lib/python3.8/site-packages/umap/__init__.py)" ] } ], "source": [ "from umap import UMAP\n", "#import plotly.express as px\n", "reducer = UMAP(n_components=2, init='random', random_state=0)" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
groupcell_typerankgenerecurrenceaurocfold_changefold_change_detectionexpressionprecision...population_sizen_datasetsscSSsnSSscCv2snCv2snCv3MscCv3snCv3Zgene_id
0Non-NeuronalAstro1SLC1A260.99576979.3279703.9252416011.8769200.627385...2775.1428577FalseTrueTrueTrueTrueTrueTrueENSMUSG00000005089
1Non-NeuronalAstro2NTM60.97897718.6718753.0942783039.7153100.591128...2775.1428577FalseTrueTrueTrueTrueTrueTrueENSMUSG00000059974
2Non-NeuronalAstro3SLC1A360.97247016.1644213.7156633039.4917780.620450...2775.1428577FalseTrueTrueTrueTrueTrueTrueENSMUSG00000005360
3Non-NeuronalAstro4GABRB160.95802648.2320325.0035641849.9133500.683010...2775.1428577FalseTrueTrueTrueTrueTrueTrueENSMUSG00000029212
4Non-NeuronalAstro5TSPAN760.95618612.6827712.1627952402.9567820.507171...2775.1428577FalseTrueTrueTrueTrueTrueTrueENSMUSG00000058254
..................................................................
2263Non-NeuronalVLMC95GM3062420.63182617.6777369.8740280.0000000.384595...473.0000005FalseNaNTrueFalseTrueNaNFalseENSMUSG00000112430
2264Non-NeuronalVLMC96SH3PXD2A20.6309812.4103482.1168790.0000000.127298...473.0000005FalseNaNFalseFalseTrueNaNTrueENSMUSG00000053617
2265Non-NeuronalVLMC97NXN20.6294920.5829570.2565020.0000000.054695...473.0000005FalseNaNFalseFalseTrueNaNTrueENSMUSG00000020844
2266Non-NeuronalVLMC98PRDM620.62541613.18969212.1744970.000000NaN...473.0000005FalseNaNFalseFalseTrueNaNTrueENSMUSG00000069378
2267Non-NeuronalVLMC100SNED120.6134291.2942670.9016680.000000NaN...473.0000005FalseNaNFalseFalseTrueNaNTrueENSMUSG00000047793
\n", "

2268 rows × 21 columns

\n", "
" ], "text/plain": [ " group cell_type rank gene recurrence auroc \\\n", "0 Non-Neuronal Astro 1 SLC1A2 6 0.995769 \n", "1 Non-Neuronal Astro 2 NTM 6 0.978977 \n", "2 Non-Neuronal Astro 3 SLC1A3 6 0.972470 \n", "3 Non-Neuronal Astro 4 GABRB1 6 0.958026 \n", "4 Non-Neuronal Astro 5 TSPAN7 6 0.956186 \n", "... ... ... ... ... ... ... \n", "2263 Non-Neuronal VLMC 95 GM30624 2 0.631826 \n", "2264 Non-Neuronal VLMC 96 SH3PXD2A 2 0.630981 \n", "2265 Non-Neuronal VLMC 97 NXN 2 0.629492 \n", "2266 Non-Neuronal VLMC 98 PRDM6 2 0.625416 \n", "2267 Non-Neuronal VLMC 100 SNED1 2 0.613429 \n", "\n", " fold_change fold_change_detection expression precision ... \\\n", "0 79.327970 3.925241 6011.876920 0.627385 ... \n", "1 18.671875 3.094278 3039.715310 0.591128 ... \n", "2 16.164421 3.715663 3039.491778 0.620450 ... \n", "3 48.232032 5.003564 1849.913350 0.683010 ... \n", "4 12.682771 2.162795 2402.956782 0.507171 ... \n", "... ... ... ... ... ... \n", "2263 17.677736 9.874028 0.000000 0.384595 ... \n", "2264 2.410348 2.116879 0.000000 0.127298 ... \n", "2265 0.582957 0.256502 0.000000 0.054695 ... \n", "2266 13.189692 12.174497 0.000000 NaN ... \n", "2267 1.294267 0.901668 0.000000 NaN ... \n", "\n", " population_size n_datasets scSS snSS scCv2 snCv2 snCv3M scCv3 \\\n", "0 2775.142857 7 False True True True True True \n", "1 2775.142857 7 False True True True True True \n", "2 2775.142857 7 False True True True True True \n", "3 2775.142857 7 False True True True True True \n", "4 2775.142857 7 False True True True True True \n", "... ... ... ... ... ... ... ... ... \n", "2263 473.000000 5 False NaN True False True NaN \n", "2264 473.000000 5 False NaN False False True NaN \n", "2265 473.000000 5 False NaN False False True NaN \n", "2266 473.000000 5 False NaN False False True NaN \n", "2267 473.000000 5 False NaN False False True NaN \n", "\n", " snCv3Z gene_id \n", "0 True ENSMUSG00000005089 \n", "1 True ENSMUSG00000059974 \n", "2 True ENSMUSG00000005360 \n", "3 True ENSMUSG00000029212 \n", "4 True ENSMUSG00000058254 \n", "... ... ... \n", "2263 False ENSMUSG00000112430 \n", "2264 True ENSMUSG00000053617 \n", "2265 True ENSMUSG00000020844 \n", "2266 True ENSMUSG00000069378 \n", "2267 True ENSMUSG00000047793 \n", "\n", "[2268 rows x 21 columns]" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marker_list[['gene_id', 'cell_type']]" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ENSMUSG00000102693ENSMUSG00000064842ENSMUSG00000051951ENSMUSG00000102851ENSMUSG00000103377ENSMUSG00000104017ENSMUSG00000103025ENSMUSG00000089699ENSMUSG00000103201ENSMUSG00000103147...ENSMUSG00000096578ENSMUSG00000088894ENSMUSG00000074733ENSMUSG00000070263ENSMUSG00000094649ENSMUSG00000069475ENSMUSG00000059326ENSMUSG00000095993gene_idcell_type
00.1781460.1667130.1607930.2017590.2098730.2098730.1657550.1595540.1597540.163636...-0.000373-0.000444-0.000516-0.000346-0.000346-0.000346-0.000409-0.000384ENSMUSG00000025902Endo
10.1080310.0975230.0819690.1185870.1198690.1198690.0955990.0944660.0948760.093490...-0.000413-0.000351-0.0003550.0000590.0000590.000059-0.000428-0.000219ENSMUSG00000002459Astro
20.1245280.1189700.0958820.1404690.1338040.1338040.1065760.1025040.1020560.103876...-0.001213-0.001144-0.001116-0.000780-0.000780-0.000780-0.001023-0.000859ENSMUSG00000025905L5 IT
30.1245280.1189700.0958820.1404690.1338040.1338040.1065760.1025040.1020560.103876...-0.001213-0.001144-0.001116-0.000780-0.000780-0.000780-0.001023-0.000859ENSMUSG00000025905L6 IT
40.1245280.1189700.0958820.1404690.1338040.1338040.1065760.1025040.1020560.103876...-0.001213-0.001144-0.001116-0.000780-0.000780-0.000780-0.001023-0.000859ENSMUSG00000025905L6 IT Car3
..................................................................
2188-0.001659-0.001450-0.001149-0.001612-0.001151-0.001151-0.000950-0.000977-0.000920-0.000996...0.0323680.0314940.0283450.0320370.0320370.0320370.0288040.028606ENSMUSG00000033717L6b
2189-0.002251-0.002021-0.001267-0.002103-0.002146-0.002146-0.001643-0.001495-0.001412-0.001449...0.0458090.0448040.0417110.0452280.0452280.0452280.0404450.039271ENSMUSG00000025085Endo
2190-0.001260-0.000996-0.000322-0.001172-0.000955-0.000955-0.000665-0.000767-0.0007670.000069...0.0509670.0503580.0460300.0498950.0498950.0498950.0435350.043980ENSMUSG00000025089L6 IT Car3
2191-0.001703-0.001529-0.001257-0.001613-0.001380-0.001380-0.001215-0.001186-0.001096-0.001132...0.1291760.1266050.1146280.1229670.1229670.1229670.1059430.103637ENSMUSG00000087095L2/3 IT
2192-0.001687-0.001485-0.001246-0.001549-0.001315-0.001315-0.001153-0.001147-0.001077-0.001097...0.1311760.1289060.1182430.1253120.1253120.1253120.1088400.107101ENSMUSG00000043969L2/3 IT
\n", "

2193 rows × 50227 columns

\n", "
" ], "text/plain": [ " ENSMUSG00000102693 ENSMUSG00000064842 ENSMUSG00000051951 \\\n", "0 0.178146 0.166713 0.160793 \n", "1 0.108031 0.097523 0.081969 \n", "2 0.124528 0.118970 0.095882 \n", "3 0.124528 0.118970 0.095882 \n", "4 0.124528 0.118970 0.095882 \n", "... ... ... ... \n", "2188 -0.001659 -0.001450 -0.001149 \n", "2189 -0.002251 -0.002021 -0.001267 \n", "2190 -0.001260 -0.000996 -0.000322 \n", "2191 -0.001703 -0.001529 -0.001257 \n", "2192 -0.001687 -0.001485 -0.001246 \n", "\n", " ENSMUSG00000102851 ENSMUSG00000103377 ENSMUSG00000104017 \\\n", "0 0.201759 0.209873 0.209873 \n", "1 0.118587 0.119869 0.119869 \n", "2 0.140469 0.133804 0.133804 \n", "3 0.140469 0.133804 0.133804 \n", "4 0.140469 0.133804 0.133804 \n", "... ... ... ... \n", "2188 -0.001612 -0.001151 -0.001151 \n", "2189 -0.002103 -0.002146 -0.002146 \n", "2190 -0.001172 -0.000955 -0.000955 \n", "2191 -0.001613 -0.001380 -0.001380 \n", "2192 -0.001549 -0.001315 -0.001315 \n", "\n", " ENSMUSG00000103025 ENSMUSG00000089699 ENSMUSG00000103201 \\\n", "0 0.165755 0.159554 0.159754 \n", "1 0.095599 0.094466 0.094876 \n", "2 0.106576 0.102504 0.102056 \n", "3 0.106576 0.102504 0.102056 \n", "4 0.106576 0.102504 0.102056 \n", "... ... ... ... \n", "2188 -0.000950 -0.000977 -0.000920 \n", "2189 -0.001643 -0.001495 -0.001412 \n", "2190 -0.000665 -0.000767 -0.000767 \n", "2191 -0.001215 -0.001186 -0.001096 \n", "2192 -0.001153 -0.001147 -0.001077 \n", "\n", " ENSMUSG00000103147 ... ENSMUSG00000096578 ENSMUSG00000088894 \\\n", "0 0.163636 ... -0.000373 -0.000444 \n", "1 0.093490 ... -0.000413 -0.000351 \n", "2 0.103876 ... -0.001213 -0.001144 \n", "3 0.103876 ... -0.001213 -0.001144 \n", "4 0.103876 ... -0.001213 -0.001144 \n", "... ... ... ... ... \n", "2188 -0.000996 ... 0.032368 0.031494 \n", "2189 -0.001449 ... 0.045809 0.044804 \n", "2190 0.000069 ... 0.050967 0.050358 \n", "2191 -0.001132 ... 0.129176 0.126605 \n", "2192 -0.001097 ... 0.131176 0.128906 \n", "\n", " ENSMUSG00000074733 ENSMUSG00000070263 ENSMUSG00000094649 \\\n", "0 -0.000516 -0.000346 -0.000346 \n", "1 -0.000355 0.000059 0.000059 \n", "2 -0.001116 -0.000780 -0.000780 \n", "3 -0.001116 -0.000780 -0.000780 \n", "4 -0.001116 -0.000780 -0.000780 \n", "... ... ... ... \n", "2188 0.028345 0.032037 0.032037 \n", "2189 0.041711 0.045228 0.045228 \n", "2190 0.046030 0.049895 0.049895 \n", "2191 0.114628 0.122967 0.122967 \n", "2192 0.118243 0.125312 0.125312 \n", "\n", " ENSMUSG00000069475 ENSMUSG00000059326 ENSMUSG00000095993 \\\n", "0 -0.000346 -0.000409 -0.000384 \n", "1 0.000059 -0.000428 -0.000219 \n", "2 -0.000780 -0.001023 -0.000859 \n", "3 -0.000780 -0.001023 -0.000859 \n", "4 -0.000780 -0.001023 -0.000859 \n", "... ... ... ... \n", "2188 0.032037 0.028804 0.028606 \n", "2189 0.045228 0.040445 0.039271 \n", "2190 0.049895 0.043535 0.043980 \n", "2191 0.122967 0.105943 0.103637 \n", "2192 0.125312 0.108840 0.107101 \n", "\n", " gene_id cell_type \n", "0 ENSMUSG00000025902 Endo \n", "1 ENSMUSG00000002459 Astro \n", "2 ENSMUSG00000025905 L5 IT \n", "3 ENSMUSG00000025905 L6 IT \n", "4 ENSMUSG00000025905 L6 IT Car3 \n", "... ... ... \n", "2188 ENSMUSG00000033717 L6b \n", "2189 ENSMUSG00000025085 Endo \n", "2190 ENSMUSG00000025089 L6 IT Car3 \n", "2191 ENSMUSG00000087095 L2/3 IT \n", "2192 ENSMUSG00000043969 L2/3 IT \n", "\n", "[2193 rows x 50227 columns]" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pcc_sim.merge(marker_list[['gene_id', 'cell_type']], left_on=df_pcc_sim.index, right_on='gene_id')" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1.00000000e+00, 1.00000000e+00],\n", " [ 5.71256919e-01, 5.71256919e-01],\n", " [ 5.05523164e-01, 5.05523164e-01],\n", " ...,\n", " [-7.89479182e-04, -7.89479182e-04],\n", " [-8.52762882e-04, -8.52762882e-04],\n", " [-9.40203123e-04, -9.40203123e-04]])" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pcc_sim[\n", " [\n", " \"ENSMUSG00000102693\",\n", " \"ENSMUSG00000102693\",\n", " ]\n", "].values" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1.00000000e+00, 5.71256919e-01, 5.05523164e-01, ...,\n", " -7.89479182e-04, -8.52762882e-04, -9.40203123e-04],\n", " [ 5.71256919e-01, 1.00000000e+00, 5.19787869e-01, ...,\n", " -4.96218317e-04, -5.28529783e-04, -6.26098922e-04],\n", " [ 5.05523164e-01, 5.19787869e-01, 1.00000000e+00, ...,\n", " -6.11435339e-04, -7.07052226e-04, -7.03668866e-04],\n", " ...,\n", " [-7.89479182e-04, -4.96218317e-04, -6.11435339e-04, ...,\n", " 1.00000000e+00, 5.37342233e-01, 6.26562922e-01],\n", " [-8.52762882e-04, -5.28529783e-04, -7.07052226e-04, ...,\n", " 5.37342233e-01, 1.00000000e+00, 7.00236598e-01],\n", " [-9.40203123e-04, -6.26098922e-04, -7.03668866e-04, ...,\n", " 6.26562922e-01, 7.00236598e-01, 1.00000000e+00]])" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pcc_sim.values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "hicexp", "language": "python", "name": "hicexp" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 }