{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f0e9cff5", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "from scipy.stats import mannwhitneyu" ] }, { "cell_type": "code", "execution_count": 2, "id": "07133a8e", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "def run_egad(go, nw, **kwargs):\n", " \"\"\"EGAD running function\n", " \n", " Wrapper to lower level functions for EGAD\n", "\n", " EGAD measures modularity of gene lists in co-expression networks. \n", "\n", " This was translated from the MATLAB version, which does tiled Cross Validation\n", " \n", " The useful kwargs are:\n", " int - nFold : Number of CV folds to do, default is 3, \n", " int - {min,max}_count : limits for number of terms in each gene list, these are exclusive values\n", "\n", "\n", " Arguments:\n", " go {pd.DataFrame} -- dataframe of genes x terms of values [0,1], where 1 is included in gene lists\n", " nw {pd.DataFrame} -- dataframe of co-expression network, genes x genes\n", " **kwargs \n", " \n", " Returns:\n", " pd.DataFrame -- dataframe of terms x metrics where the metrics are \n", " ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " \"\"\"\n", " assert nw.shape[0] == nw.shape[1] , 'Network is not square'\n", " #print(nw.index)\n", " #nw.columns = nw.columns.astype(int)\n", " #print(nw.columns.astype(int))\n", " assert np.all(nw.index == nw.columns) , 'Network index and columns are not in the same order'\n", "\n", " #nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " #nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " #np.fill_diagonal(nw.values, 1)\n", " return _runNV(go, nw, **kwargs)\n", "\n", "def _runNV(go, nw, nFold=3, min_count=5, max_count=1000000):\n", "\n", " #Make sure genes are same in go and nw\n", " #go.index = go.index.map(str) \n", " #nw.index = nw.index.map(str)\n", " #nw.index = nw.index.str.replace('_', '')\n", " #go.index = go.index.str.replace('_', '')\n", " #print (nw)\n", " genes_intersect = go.index.intersection(nw.index)\n", "\n", "\n", " #print (genes_intersect)\n", " go = go.loc[genes_intersect, :]\n", " nw = nw.loc[genes_intersect, genes_intersect]\n", " #print (go)\n", " print (nw.shape)\n", " print (go.shape)\n", " sparsity = 1.0 - np.count_nonzero(go) / go.size\n", " print (sparsity)\n", " sparsity = 1.0 - np.count_nonzero(nw) / nw.size\n", " print (sparsity)\n", " #print(nw\n", " #print(go\n", " nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " np.fill_diagonal(nw.values, 1)\n", " #Make sure there aren't duplicates\n", " duplicates = nw.index.duplicated(keep='first')\n", " nw = nw.loc[~duplicates, ~duplicates]\n", "\n", " go = go.loc[:, (go.sum(axis=0) > min_count) & (go.sum(axis=0) < max_count)]\n", " go = go.loc[~go.index.duplicated(keep='first'), :]\n", " #print(go)\n", "\n", " roc = _new_egad(go.values, nw.values, nFold)\n", "\n", " col_names = ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " #Put output in dataframe\n", " return pd.DataFrame(dict(zip(col_names, roc)), index=go.columns), go\n", "\n", "def _new_egad(go, nw, nFold):\n", "\n", " #Build Cross validated Positive\n", " x, y = np.where(go)\n", " #print(x, y)\n", " cvgo = {}\n", " for i in np.arange(nFold):\n", " a = x[i::nFold]\n", " #print(a)\n", " b = y[i::nFold]\n", " dat = np.ones_like(a)\n", " mask = sparse.coo_matrix((dat, (a, b)), shape=go.shape)\n", " cvgo[i] = go - mask.toarray()\n", "\n", " CVgo = np.concatenate(list(cvgo.values()), axis=1)\n", " #print(CVgo)\n", "\n", " sumin = np.matmul(nw.T, CVgo)\n", "\n", " degree = np.sum(nw, axis=0)\n", " #print(degree)\n", " #print(degree[:, None])\n", "\n", " predicts = sumin / degree[:, None]\n", " #print(predicts)\n", "\n", " np.place(predicts, CVgo > 0, np.nan)\n", "\n", " #print(predicts)\n", "\n", " #Calculate ranks of positives\n", " rank_abs = lambda x: stats.rankdata(np.abs(x))\n", " predicts2 = np.apply_along_axis(rank_abs, 0, predicts)\n", " #print(predicts2)\n", "\n", " #Masking Nans that were ranked (how tiedrank works in matlab)\n", " predicts2[np.isnan(predicts)] = np.nan\n", " #print(predicts2)\n", "\n", " filtering = np.tile(go, nFold)\n", " #print(filtering)\n", "\n", " #negatives :filtering == 0\n", " #Sets Ranks of negatives to 0\n", " np.place(predicts2, filtering == 0, 0)\n", "\n", " #Sum of ranks for each prediction\n", " p = bottleneck.nansum(predicts2, axis=0)\n", " n_p = np.sum(filtering, axis=0) - np.sum(CVgo, axis=0)\n", "\n", " #Number of negatives\n", " #Number of GO terms - number of postiive\n", " n_n = filtering.shape[0] - np.sum(filtering, axis=0)\n", "\n", " roc = (p / n_p - (n_p + 1) / 2) / n_n\n", " U = roc * n_p * n_n\n", " Z = (np.abs(U - (n_p * n_n / 2))) / np.sqrt(n_p * n_n *\n", " (n_p + n_n + 1) / 12)\n", " roc = roc.reshape(nFold, go.shape[1])\n", " Z = Z.reshape(nFold, go.shape[1])\n", " #Stouffer Z method\n", " Z = bottleneck.nansum(Z, axis=0) / np.sqrt(nFold)\n", " #Calc ROC of Neighbor Voting\n", " roc = bottleneck.nanmean(roc, axis=0)\n", " P = stats.norm.sf(Z)\n", "\n", " #Average degree for nodes in each go term\n", " avg_degree = degree.dot(go) / np.sum(go, axis=0)\n", "\n", " #Calc null auc for degree\n", " ranks = np.tile(stats.rankdata(degree), (go.shape[1], 1)).T\n", "\n", " np.place(ranks, go == 0, 0)\n", "\n", " n_p = bottleneck.nansum(go, axis=0)\n", " nn = go.shape[0] - n_p\n", " p = bottleneck.nansum(ranks, axis=0)\n", "\n", " roc_null = (p / n_p - ((n_p + 1) / 2)) / nn\n", " #print(roc)\n", " return roc, avg_degree, roc_null, P" ] }, { "cell_type": "code", "execution_count": 3, "id": "d33f0c22", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:numexpr.utils:Note: detected 192 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "INFO:numexpr.utils:Note: NumExpr detected 192 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n" ] } ], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler" ] }, { "cell_type": "code", "execution_count": 4, "id": "e9c80797", "metadata": {}, "outputs": [], "source": [ "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/software/CoCoCoNet/networks/human_prioAggNet.h5'\n", "\n", "jac_exp = hm.hiCMatrix(exp_file_path)\n", "all_genes = [x[3].decode() for x in jac_exp.cut_intervals]\n", "df_exp_corr = pd.DataFrame(jac_exp.matrix.toarray() , index=all_genes, columns = all_genes)" ] }, { "cell_type": "code", "execution_count": 5, "id": "c7518350", "metadata": {}, "outputs": [], "source": [ "resolution_human = 1000\n", "species = \"human\"\n", "SRP_name = \"aggregates\"\n", "resolution = \"1kbp_raw\"\n", "\n", "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_{species}/{SRP_name}/{resolution}/max/spr_cre/hic_gene_corr_inter_excluding_intra_chrom_pairs_hicexp.h5'\n", "\n", "jac_exp = hm.hiCMatrix(exp_file_path)\n", "\n", "all_genes = [x[3].decode() for x in jac_exp.cut_intervals]\n", "\n", "f_m = jac_exp.matrix.toarray()\n", "f_m = f_m + abs(f_m.min())\n", "np.fill_diagonal(f_m, 1)\n", "df_hic_corr = pd.DataFrame(f_m, index=all_genes, columns = all_genes)\n", "\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "2c853985", "metadata": {}, "outputs": [], "source": [ "DF_dism = 1 - df_hic_corr" ] }, { "cell_type": "code", "execution_count": 22, "id": "508b4781", "metadata": {}, "outputs": [], "source": [ "import pandas as pd, seaborn as sns\n", "import scipy.spatial as sp, scipy.cluster.hierarchy as hc\n", "from sklearn.datasets import load_iris\n", "sns.set(font=\"monospace\")\n", "\n", "\n", "DF_dism = df_hic_corr.max().max() - df_hic_corr # distance matrix\n", "DF_dism = DF_dism.to_numpy()\n", "np.fill_diagonal(DF_dism, 0)\n", "linkage_gene = hc.linkage(sp.distance.squareform(DF_dism), method='average')" ] }, { "cell_type": "code", "execution_count": 25, "id": "e1314a1d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "DF_dism.min()" ] }, { "cell_type": "code", "execution_count": 26, "id": "44a8e134", "metadata": {}, "outputs": [ { "ename": "RecursionError", "evalue": "maximum recursion depth exceeded while getting the str of an object", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRecursionError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# Create a dendrogram\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mdn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdendrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlinkage_gene\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdistance_sort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# Display the dendogram\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/hicexplorer/lib/python3.8/site-packages/scipy/cluster/hierarchy.py\u001b[0m in \u001b[0;36mdendrogram\u001b[0;34m(Z, p, truncate_mode, color_threshold, get_leaves, orientation, labels, count_sort, distance_sort, show_leaf_counts, no_plot, no_labels, leaf_font_size, leaf_rotation, leaf_label_func, show_contracted, link_color_func, ax, above_threshold_color)\u001b[0m\n\u001b[1;32m 3336\u001b[0m \u001b[0mcontraction_marks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshow_contracted\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3337\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3338\u001b[0;31m _dendrogram_calculate_info(\n\u001b[0m\u001b[1;32m 3339\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mZ\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3340\u001b[0m \u001b[0mtruncate_mode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtruncate_mode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/hicexplorer/lib/python3.8/site-packages/scipy/cluster/hierarchy.py\u001b[0m in \u001b[0;36m_dendrogram_calculate_info\u001b[0;34m(Z, p, truncate_mode, color_threshold, get_leaves, orientation, labels, count_sort, distance_sort, show_leaf_counts, i, iv, ivl, n, icoord_list, dcoord_list, lvs, mhr, current_color, color_list, currently_below_threshold, leaf_label_func, level, contraction_marks, link_color_func, above_threshold_color)\u001b[0m\n\u001b[1;32m 3643\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3644\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0muivb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muwb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mubh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mubmd\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3645\u001b[0;31m _dendrogram_calculate_info(\n\u001b[0m\u001b[1;32m 3646\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mZ\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3647\u001b[0m \u001b[0mtruncate_mode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtruncate_mode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "... last 1 frames repeated, from the frame below ...\n", "\u001b[0;32m~/.conda/envs/hicexplorer/lib/python3.8/site-packages/scipy/cluster/hierarchy.py\u001b[0m in \u001b[0;36m_dendrogram_calculate_info\u001b[0;34m(Z, p, truncate_mode, color_threshold, get_leaves, orientation, labels, count_sort, distance_sort, show_leaf_counts, i, iv, ivl, n, icoord_list, dcoord_list, lvs, mhr, current_color, color_list, currently_below_threshold, leaf_label_func, level, contraction_marks, link_color_func, above_threshold_color)\u001b[0m\n\u001b[1;32m 3643\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3644\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0muivb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muwb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mubh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mubmd\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3645\u001b[0;31m _dendrogram_calculate_info(\n\u001b[0m\u001b[1;32m 3646\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mZ\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3647\u001b[0m \u001b[0mtruncate_mode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtruncate_mode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded while getting the str of an object" ] } ], "source": [ "from scipy.cluster.hierarchy import dendrogram\n", " \n", "# Create a dendrogram\n", "dn = dendrogram(linkage_gene, distance_sort=True)\n", " \n", "# Display the dendogram\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 58, "id": "7ad93678", "metadata": {}, "outputs": [], "source": [ "from scipy.cluster.hierarchy import fcluster, linkage\n", "cluster_labels = fcluster(linkage_gene, 0.7, criterion='distance')" ] }, { "cell_type": "code", "execution_count": 59, "id": "8142a1f4", "metadata": {}, "outputs": [], "source": [ "from collections import Counter\n", "z = list(cluster_labels)\n", "z = Counter(z)" ] }, { "cell_type": "code", "execution_count": 60, "id": "4d2c3a44", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/grid/gillis/home/lohia/.conda/envs/hicexplorer/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "(0.0, 30.0)" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD/CAYAAAAAJProAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZIklEQVR4nO3dfZRddX3v8XdmMpOQZxiDENT6dPlCUaAoeqlXl6tCi1K5oXKrrdIrikWtl2sFXNwqq9CKCqhtxdYbsaUqtopPqBGkxkoX+FxByoN8FSXcIgRGNDAgJCHJ/WPviScn55zfPpOZM2N8v9bKysw++zv7t/f57fM5+7f32Wfe9u3bkSSpl6HZboAkae4zLCRJRYaFJKnIsJAkFRkWkqQiw0KSVDR/EAuJiIXA1cBewDzgm8DrM3NLRJwJvLae9Z2ZefEg2iRJam7eID5nERFDwLLM3Fj//nGq8LgauAI4lCpIbgQOz8y7Z7xRkqTGBnJkkZnbgI2w4yhjOVU4rAYuz8wJYCIi1gHHAR8cRLskSc0M5MhiUh0GzwLWAS8HLgTW1w/fAzwN+Hlmntvgzy0AjgTuBrZOe2Mlac80DOwPfBvY1LRoIEcWkzLz6IhYDFwKHNMy/V0AEXF+H3/uSOCa6W2hJP3KeC5wbdOZBxoWAJn5UER8DHgN8FXggJaHD6A6j9HEjvMa9136mcbLH3vFCdx334OMjS1h/NI1jWpWvuJU7rvvwap+bAk/vuR1jeoOOPn9O+okaS4YGprH3nsvhpbX0CYGdTXUKmB7Zt4dEcPAi4DbgM8CayPibKpzGEcDZzT8szuGnrZNPNRXe7Zt217XPdB3DcDWifEp1UnSHNLX8P2gPmexP3BlRNwI3AyMAOdk5veANcANwDeAszNzw4DaJElqaFBXQ30HOLzLYxcAFwyiHZKkqfET3JKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSiuYPYiERsTdwGfAEYDPwZeBNwPOAK4Dv17PelpknDqJNkqTmBhIWwHbg3My8NiKGgS8CJwF3ALdk5jMH1A5J0hQMZBgqMzdm5rX1z1uBW4BVg1i2JGn3DerIYoeIWAYcX/8bAw6MiBuAh4C3Z+baQbdJktTbQMOiHoK6FLgoM2+sg2NVZj4YEUcAV0XEszPzRzPZjpUrlw6kprVu26ObGZo/2qimn3klaRAGfWSxBrg9M98DkJkPTD6QmddFxHeAI4AZDYvx8Ym+X/zHxyeA/kOjte7ba17cqObIUz+/o06SptPQ0DzGxpb0XzcDbekoIi4ERoE3tkx7XH20QUSsAg4FbhpUmyRJzQzq0tlDgDOAm4HrIwLgE8ADwGkR8TDVFVNnZuatg2iTJKm5gYRFZt4MzOvy8EWDaIMkaer8BLckqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUNH8QC4mIvYHLgCcAm4EvA2/KzG0RcSbw2nrWd2bmxYNokySpuUEdWWwHzs3MAA4HDgFOioiDgdfX044C3hYR+w+oTZKkhgYSFpm5MTOvrX/eCtwCrAJWA5dn5kRm3gusA44bRJskSc0NZBiqVUQsA46v/50KrI+IM4B7gDuBA2a6DStXLh1IzWzUSdJMGGhYRMQwcClwUWbeGBEAZOa76sfPH0Q7xscn+n4xHh+fAPp/Ed/dOkmaTkND8xgbW9J/3Qy0pZc1wO2Z+Z769/YjiQOAHw+4TZKkgoGFRURcCIwCb2yZ/Fng+IhYEhErgaOBtYNqkySpmUFdOnsIcAZwM3B9Pfz0icw8LyLWADfUs56dmRsG0SZJUnMDCYvMvBmY1+WxC4ALBtEOSdLU+AluSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKGodFRDymy/Sx6WuOJGku6ufI4kddpl8/HQ2RJM1d/YTFLl+LGhHDwOj0NUeSNBcVv4M7Ij5c/zja8vOkg4Cbpr1VkqQ5pRgWwA/r/7e3/AywDbgGuGy6GyVJmluKYZGZ5wJExBmTP0uSfrX0c87ihTPWCknSnNY4LDLz2plsiCRp7mpyzgKAiDgS+HPgQGBh62OZ+YRpbpckaQ5pHBbAx4DPAX8DbJ6Z5kiS5qJ+wmIZcHpmbpupxkiS5qZ+wuI84M8i4v3Az1ofaBIgEXEW8BrgycDTM/OmiHg+cAXw/Xq22zLzxD7aJEkagH7C4j31/62Xz86j+vzFcIP6r1ANZV3dNv2WzHxmH+2QJA1YP2HxpN1ZUGZ+EyAidufPSJJmQeOwyMw7ZqgNB0bEDcBDwNszc+0MLUeSNEX9XDr7Eaohp11k5h9NcfnXAasy88GIOAK4KiKenZnd7nA7LVauXDqQmtmok6SZ0M8w1G1tvy+j+lT3lD+sl5kPtPx8XUR8BziC7rdDnxbj4xN9vxiPj08A/b+I726dJE2noaF5jI0t6buun2GoXe4LFRFnA+13om0sIh4H3J2ZWyNiFXAo3sVWkuacfo4sOhmieoEviohzgFOA/YB1EXEr8CngtIh4mGqI68zMvHU32yRJmmb9nLO4hp3PWcwHAvhkk/rMPAc4p8NDFzVtgyRpdvRzZPHBtt8fAX6QmddNY3skSXNQP+csPjSTDZEkzV19nbOIiMOA/0513uEeYG1mfmcmGiZJmjsaf59FRLya6pYdTwEmqO7x9KWI+OMZapskaY7o58jiLOB5mbnj0taIeDrwWeAD090wSdLc0c/Xqq4AftA27TaqD+dJkvZg/RxZ/AtwWUT8FbCB6rzFn9bTJUl7sH6OLF4H3AF8CLi+/v//1dMlSXuwfo4sFgIXZOZpkxPq23UsmPZWSZLmlH6OLD4BPLVt2pNp+AluSdIvr37C4jDg39qmXVtPlyTtwfoJiw3AwW3TDq6nS5L2YP2cs3gv8PmIeDewHngicDq/+G5uSdIeqp97Q/1dRPwMeCXweOA/gbdm5j/PUNskSXNEX/eGqoPBcJCkXzH9nLOQJP2KMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQV9XUjwd0REWcBr6H6dr2nZ+ZN9fQzgdfWs70zMy8eVJskSc0M8sjiK8ALgDsmJ0TEwcDrgcOBo4C3RcT+A2yTJKmBgYVFZn4zM9e3TV4NXJ6ZE5l5L7AOOG5QbZIkNTOwYaguDgDWR8QZwD3AnfU0SdIcMtthAUBmvgsgIs4fxPJWrlw6kJrZqJOkmTDbYdF+JHEAcPVML3R8fKLvF+Px8Qmg/xfx3a2TpOk0NDSPsbEl/dfNQFv68Vng+IhYEhErgaOBtbPcJklSm4GFRUScExF3Ao8D1kXE1Zn5PWANcAPwDeDszNwwqDZJkpoZ2DBUZp4DnNNh+gXABYNqhySpf7M9DCVJ+iVgWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVLR/NluAEBErAceqf8B/GFm3jJ7LZIktZoTYVE7MTNvmu1GSJJ25TCUJKlorhxZbAc+GhFDwJXAWzNz8yy3SZJUmyth8ZzMvCsilgIfB94MvG2mFrZy5dKB1ExH3dZHNzM8f7RRTT/zSlI/5kRYZOZd9f8TEfFh4BUzubzx8Ym+X8THxyeA/l/8p6Puqr9/UaOa33n1FTvqJKmToaF5jI0t6b9uBtrSl4hYEhH71D8PAy8EvjurjZIk7WQuHFnsC6yNiG1U4fU14LzZbZIkqdWsh0Vm/gj49dluhySpu1kfhpIkzX2GhSSpaNaHoTQzViwfZWR0QaN5t2zexMb7/ViLpO4Miz3UyOgCPn7JsY3mfenJXwQMC0ndOQwlSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVOSls5oWy1eMMDqysNG8m7c8wv0bt8xwiyRNJ8NC02J0ZCHnffx3Gs37lpdeBRgW0i8Th6EkSUWGhSSpyLCQJBUZFpKkIsNCklTk1VDaiZfATq+lKxaycGSk0byPbNnCxMZHZrhFmmn7LF/E8Ohwo3m3bt7KT+//+Qy3aHoYFtrJ6MhC1nyk2SWwp57kJbAlC0dG+N1PfrTRvGtPfDkTGBa/7IZHh9nw7lsbzbvf6QfNcGumj8NQkqQiw0KSVGRYSJKKDAtJUpFhIUkq8moozaplK0ZZMLKg0bybtmzigY2bAVi6YgELR0Yb1T2yZTMTGzfVdVO7lHXQl8AuXbEXC0ea7Z6PbHmUiY0P72bdIhaONLvc85EtW5nYWF3uuWzFIhY0rNu0ZSsPbPw5y1csZnSk+fvUzVu2cf/Gh1ixYjEjfdRt2bKNjRsfYu/li5k/2rzu0c3b+Nn9D7HP8sUM91G3dfM2fnr/Q43nb7fP8r0YHm323G3d/Cg/vf/hKdVtnJha3zQsNKsWjCzg5M8c22jeS074IlCFxcKRUV50+emN6q5Y/W4m2FTXjXDcZy5sVPeFE87ccSnrwpERjvvUB5rVveSPd/sS2IUj83nxJy9vNO/nT1zNREvd6k9+uVHd5Se+oKVumBM/dV2juk++5IgddQtGhjntM//ZqO69JzwegNGRIT746Xsb1QCc8nv7AjAyMsSVH/9J47oXvvQxAMwfHeLblzRf3pEnV8sbHh1i/V9vaFz3xDfu13jeToZH53PP33y90byP/d9H7VR37/uubFS37xteOKW2gcNQkqQGDAtJUtGcGIaKiN8A/hFYDHwXOCkzH57NNkmSfmGuHFn8X+AtmflU4BHgT2a5PZKkFrN+ZBER+wNPBb5QT/oQ8OfAuwqlOy7BGFq6uK9lDg3Nq+uW9V0DMLx05ZTqRpfsO6W6hVOsW7TksVOqW7J4anXLF02tbmyKdfsu2nuKdVN73vddtGSKdc375851i6ZY1+xGkO11Kxc1u7qsvW6fRc2uhmqtW7Kov/epk3V7TbFudMnU6uYva75urXXDy5pdOddaAzC0tNmVgbvW7TWVur5Wbt727dv7mX/aRcQzqYagXgacBZwHXJGZTyqU/jfgmpltnSTtsZ4LXNt05lk/spiUmTcBr4iIgxuWfJtqZe8Gts5YwyRpzzIM7E/1GtrYXAiLHwP7R8S8zNwOHFBPK9lEH6koSdrhh/0WzPoJ7sy8G/gB8KJ60h8Bl89agyRJu5j1sKi9Dnh7RPwQWAT87Sy3R5LUYtZPcEuS5r65cmQhSZrDDAtJUpFhIUkqMiwkSUVz4XMW0yoizgJeAzwZeHr9Yb9Szd7AZcATqL4w4cvAmzJzW6FuIXA1sBcwD/gm8PrM3NJHey8BXpyZj2k4/3qq+2dNfmHCH2bmLQ3q9gc+CBxE9RmV/5WZPb/4ICKeC1zUMumxwNcz8/caLO904FXAFuBb9fI2dZiv4/MVEWcCr61ne2dmXlyqi4i9gM8BRwK3ZeYzmywvIo4H3kb15mkTcHZmXtGw7i+B7fW/d2TmZU3Wr37sMcAtwIcz84yW6V37Y6/t0q0O+N1e61eo67p+PeqeB1wBfL+e9bbMPLH9uaj/xo7+32R7dqqrfz8eOL+uezAzn9U2/3ra9hvgHuAfgCdSfbD3HZn5iba6XfYb4FjgmJbZDgb+R2Z+rq7puN9Q9YNdltdtn25/roExWvoS8EzgEuDdwAmT8032iYhYAXyM6nZKPwVenpk/qB87CLi4btsW4MTM/B497IlHFl8BXgDc0UfNduDczAzgcOAQ4KQGdZuBYzPzsMw8FFgGnNJ0oRFxDNWlwv06MTMPr/8Vg6L2fuC6zHwKVUe7vlSQmde0LOdw4D+oOl9PEXE41U71bOA3gKXAn3aZfZfnq/4U/+upnoujgLfVO23POqod8Hx6P3ed6u4GjsnMpwEvBf4pItqfl051/wYcUW+b3wbWRMTyBnWT3k3nD5Z27I8Ntku3flxav251pfXrtd/c0tJ3ugVFe/9vsj13qYuI/ahehF+Wmb9OFXKdtO83bwV+mJmHUX3O628j4vFtNbvsN5l5Zss+cRzwAHDVZEGP/abX8nZqW6fnuv47k31pDPh94C6qWyXtmK+lT/wf4Kb6Bq0fAP6qZb3+GfjHzDwQeAZwZ5dttsMed2SRmd8EiIh+ajZS77SZuTUibgFWNajbBmysl7cQWE51lFEUEYuBs6k+Y/KCxo2dgojYh+qd0MuhWkeqdxr9/I0nAEcAxzeY/UDgxsx8sK79V6p3cu9sn7HL87UauDwzJ4CJiFhHtVN+sFddZm4G1kXE87s1rEtd620PJj/Zuhz4eaHu/pa6MWAE2OlOcN36Y0QcB/wEuB1Y0lazkc79cTU9tku3usz8UK/161HXc/16tLP4Rq1T/2+yPbvsNy8DrszMG+q/0/Rr8QL4+7pmQ0T8APgtqpuZNt1vTgYu63TUXP+N1v3mlV2W18lqdn2uV2XmFXVfOgv4M+Bfgc936ROrqQIF4KPA+yJiAdWR0D5UAUtmth7VdLXHhcXuiohlVE9skxfFyZp1wLOAdVTvRJp4B/DXQL9f2rsd+GhEDAFXAm+tXyR7+TWqdyB/Ub8ru4NqWGh9H8t9FT12ijY3AkdExEqqMH0x1W1cmjoAWB8RZ1ANFdzZZ/3uWA1kfWeBooj4TaoXgF8DXtvkharuY2dTvRD1/G7Ytv54Kg23S49+vJoe69de13T92urGgAMj4gaq/v32zFzbVtKx/zdYXqe6JwEPRsRXgL2phsDeUt8+aNIu+w1wA3BsRHwa+C/A09h5e/bcbyJiHtV+8dJO26S2Y7+pt0en5XVqW699YBFwX2ZeHxHDwL1d5jsA+ElEfAZ4A3A/sF+9vW4HPhQRhwE31+t1X4/12COHoaas3vCXAhdl5o1N6zLzaKobc81j57HMbss5CnhyZn56Cs18Tn0Y+5tUne3NDWoWUY1zfqseLvsK8L6mC6w78clUdwcuqsc+zwa+CHwN+Hda3qU3lZnvysyP9Fs3VVG9ZbuA+p1kE5n5tcw8GPivwGn1i2bJ+cB76neDvdrTsT+Wtku3utL6daprsn4d6q6jehd8GNWL1CUR8eSW+bv2/17L61G3iOo8wu9TDcX8FvCStnk67TfvoLqp3nepvhLhKnbup6X95gXAw21Hpq3bpX2/6ba8rvt0+3Ndn4dY3taOXn1iU2aekJmt99tbBDwHeH+93HHg3E7r0Mqw2Nka4PbMfE+/hZn5ENW45GsazP5c4JCIuJXqpOCKiCieiK+Xc1f9/wTwYaqdquQuqud6cie7gmr8taljgIluO0WXdv5DZj4jM4+kGjO/vY/ltb9jbnpzySmLiMdR3ZPslZl5W7/1mfkfVDtd8c0C1bmcv6if/zcAJ0fEX3aYr70/Nt0uu/TjhuvXtf8X1m+nusx8YHIIMjOvA75DNRQzqdj/uyyvW91dwFczczyrb9i8mrb+3Wm/qdv5qqzOOR5PtT1b+2lpv3k11Qnmbnbab7otr8s+3e25PoQqcP6p3g6LgVMi4oi2+Wj9G1Fd/LEc2FCv1z2Z+fV6vivbt1cnhkUtIi4ERoE39lGzavJkUv3u6kVA8YUmMy/IzCdl5kFU7042ZnXysbS8JfU46uTyXkj1LqW0vNupxqqfX086CshSXYtTaHhUMakegpoc930jvXeqdp8Fjq/XdyVwNNA+jDFtoroq6QvA6Zn51T7qDo2IkfrnVVQviMW7eWbmEZl5UP38vw+4JDPPbvvbnfpjcbt0qmuyfl3qiuvXpe5xdf+crDsU2BEG3fp/aXk99psvAc+KiMX1cp9FS//utt/U8y+qpx9LNTzzxZbldd1v6r/3Yqojqm522m+6LO+aLvt0x+e6fv7uBI6vt8MGYAL4foc+cTnwivrnPwDW1cPI3wL2il98HUSj14M97t5QEXEO1ZO0H9UJxFsz8/mFmkOoOvPNwKP15E9k5nmFumdQja8OU52Muw54XduJulJ7nwj8eza4dLY+lF8LbKMK+q9RjTUWv688qiuU3k91Av5B4JTMvLVB3WOoxmqfkpkbSvO31F0KHEZ1JdTF3bZlt+crIt5MNUYPnS+d7VZ3M7Av1buoe6nGy/+uVx3Vff1PBX7UsoiT2oZwOtV9ieouyZuo+sB7M3NNk3a2Pb4kd750tmt/7LVdutUBK3qtX486eq1fj7oHgNOAh6nG4y/IzI/SQWv/j4i39Fpet7r699Pr2mGqL0X7k6wvfe+231BdNrqW6tztQ3XNN9qWczgd9puIOA347czseOVVp/2mbvNOy6Pqox336fbnmuooob0PPhH4FNW5KNj50tm9qa56eirwM6pLcicvnT2aajh0PlXg/M/S/r3HhYUkafo5DCVJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lS0f8HcNNL4hx1gmgAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "import seaborn as sns\n", "ax=sns.countplot(list(z.values()))\n", "ax.set_ylim([0,30])" ] }, { "cell_type": "code", "execution_count": 61, "id": "d1bfa4f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5009 1006\n", "4986 43\n", "7791 14\n", "7879 25\n", "7890 504\n", "4996 6\n", "7176 79\n", "7724 67\n", "7123 12\n", "7203 79\n", "7856 5\n", "7718 9\n", "7723 5\n", "7628 10\n", "6980 46\n", "7888 6\n", "6995 59\n", "6989 23\n", "5007 5\n", "7160 6\n", "7198 6\n", "7159 9\n", "7161 5\n", "7450 9\n", "6979 20\n", "6697 14\n", "7351 22\n", "6949 5\n", "6978 13\n", "7877 7\n", "4927 11\n", "7388 7\n" ] } ], "source": [ "df_exp_clust = pd.DataFrame(cluster_labels , index=all_genes)\n", "\n", "df_exp_clust['label'] = df_exp_clust[0]\n", "\n", "for clust, clust_len in zip(z.keys(), z.values()):\n", " if clust_len >= 5:\n", " \n", " print (clust, clust_len)\n", " df_exp_clust[clust] = [1 if x == clust else 0 for x in df_exp_clust['label'].tolist()]" ] }, { "cell_type": "code", "execution_count": 62, "id": "cfe1e8e4", "metadata": {}, "outputs": [], "source": [ "df_exp_clust = df_exp_clust.drop(['label', 0], axis=1)" ] }, { "cell_type": "code", "execution_count": 63, "id": "09b3ec7e", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "def run_egad(go, nw, **kwargs):\n", " \"\"\"EGAD running function\n", " \n", " Wrapper to lower level functions for EGAD\n", "\n", " EGAD measures modularity of gene lists in co-expression networks. \n", "\n", " This was translated from the MATLAB version, which does tiled Cross Validation\n", " \n", " The useful kwargs are:\n", " int - nFold : Number of CV folds to do, default is 3, \n", " int - {min,max}_count : limits for number of terms in each gene list, these are exclusive values\n", "\n", "\n", " Arguments:\n", " go {pd.DataFrame} -- dataframe of genes x terms of values [0,1], where 1 is included in gene lists\n", " nw {pd.DataFrame} -- dataframe of co-expression network, genes x genes\n", " **kwargs \n", " \n", " Returns:\n", " pd.DataFrame -- dataframe of terms x metrics where the metrics are \n", " ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " \"\"\"\n", " assert nw.shape[0] == nw.shape[1] , 'Network is not square'\n", " #print(nw.index)\n", " #nw.columns = nw.columns.astype(int)\n", " #print(nw.columns.astype(int))\n", " assert np.all(nw.index == nw.columns) , 'Network index and columns are not in the same order'\n", "\n", " #nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " #nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " #np.fill_diagonal(nw.values, 1)\n", " return _runNV(go, nw, **kwargs)\n", "\n", "def _runNV(go, nw, nFold=3, min_count=5, max_count=1000000):\n", "\n", " #Make sure genes are same in go and nw\n", " #go.index = go.index.map(str) \n", " #nw.index = nw.index.map(str)\n", " #nw.index = nw.index.str.replace('_', '')\n", " #go.index = go.index.str.replace('_', '')\n", " #print (nw)\n", " genes_intersect = go.index.intersection(nw.index)\n", "\n", "\n", " #print (genes_intersect)\n", " go = go.loc[genes_intersect, :]\n", " nw = nw.loc[genes_intersect, genes_intersect]\n", " #print (go)\n", " print (nw.shape)\n", " print (go.shape)\n", " sparsity = 1.0 - np.count_nonzero(go) / go.size\n", " print (sparsity)\n", " sparsity = 1.0 - np.count_nonzero(nw) / nw.size\n", " print (sparsity)\n", " #print(nw\n", " #print(go\n", " nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " np.fill_diagonal(nw.values, 1)\n", " #Make sure there aren't duplicates\n", " duplicates = nw.index.duplicated(keep='first')\n", " nw = nw.loc[~duplicates, ~duplicates]\n", "\n", " go = go.loc[:, (go.sum(axis=0) > min_count) & (go.sum(axis=0) < max_count)]\n", " go = go.loc[~go.index.duplicated(keep='first'), :]\n", " #print(go)\n", "\n", " roc = _new_egad(go.values, nw.values, nFold)\n", "\n", " col_names = ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " #Put output in dataframe\n", " return pd.DataFrame(dict(zip(col_names, roc)), index=go.columns), go\n", "\n", "def _new_egad(go, nw, nFold):\n", "\n", " #Build Cross validated Positive\n", " x, y = np.where(go)\n", " #print(x, y)\n", " cvgo = {}\n", " for i in np.arange(nFold):\n", " a = x[i::nFold]\n", " #print(a)\n", " b = y[i::nFold]\n", " dat = np.ones_like(a)\n", " mask = sparse.coo_matrix((dat, (a, b)), shape=go.shape)\n", " cvgo[i] = go - mask.toarray()\n", "\n", " CVgo = np.concatenate(list(cvgo.values()), axis=1)\n", " #print(CVgo)\n", "\n", " sumin = np.matmul(nw.T, CVgo)\n", "\n", " degree = np.sum(nw, axis=0)\n", " #print(degree)\n", " #print(degree[:, None])\n", "\n", " predicts = sumin / degree[:, None]\n", " #print(predicts)\n", "\n", " np.place(predicts, CVgo > 0, np.nan)\n", "\n", " #print(predicts)\n", "\n", " #Calculate ranks of positives\n", " rank_abs = lambda x: stats.rankdata(np.abs(x))\n", " predicts2 = np.apply_along_axis(rank_abs, 0, predicts)\n", " #print(predicts2)\n", "\n", " #Masking Nans that were ranked (how tiedrank works in matlab)\n", " predicts2[np.isnan(predicts)] = np.nan\n", " #print(predicts2)\n", "\n", " filtering = np.tile(go, nFold)\n", " #print(filtering)\n", "\n", " #negatives :filtering == 0\n", " #Sets Ranks of negatives to 0\n", " np.place(predicts2, filtering == 0, 0)\n", "\n", " #Sum of ranks for each prediction\n", " p = bottleneck.nansum(predicts2, axis=0)\n", " n_p = np.sum(filtering, axis=0) - np.sum(CVgo, axis=0)\n", "\n", " #Number of negatives\n", " #Number of GO terms - number of postiive\n", " n_n = filtering.shape[0] - np.sum(filtering, axis=0)\n", "\n", " roc = (p / n_p - (n_p + 1) / 2) / n_n\n", " U = roc * n_p * n_n\n", " Z = (np.abs(U - (n_p * n_n / 2))) / np.sqrt(n_p * n_n *\n", " (n_p + n_n + 1) / 12)\n", " roc = roc.reshape(nFold, go.shape[1])\n", " Z = Z.reshape(nFold, go.shape[1])\n", " #Stouffer Z method\n", " Z = bottleneck.nansum(Z, axis=0) / np.sqrt(nFold)\n", " #Calc ROC of Neighbor Voting\n", " roc = bottleneck.nanmean(roc, axis=0)\n", " P = stats.norm.sf(Z)\n", "\n", " #Average degree for nodes in each go term\n", " avg_degree = degree.dot(go) / np.sum(go, axis=0)\n", "\n", " #Calc null auc for degree\n", " ranks = np.tile(stats.rankdata(degree), (go.shape[1], 1)).T\n", "\n", " np.place(ranks, go == 0, 0)\n", "\n", " n_p = bottleneck.nansum(go, axis=0)\n", " nn = go.shape[0] - n_p\n", " p = bottleneck.nansum(ranks, axis=0)\n", "\n", " roc_null = (p / n_p - ((n_p + 1) / 2)) / nn\n", " #print(roc)\n", " return roc, avg_degree, roc_null, P" ] }, { "cell_type": "code", "execution_count": 66, "id": "062fcb37", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(23465, 23465)\n", "(23465, 32)\n", "0.9971540059663329\n", "0.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/grid/gillis/home/lohia/.conda/envs/hicexplorer/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEMCAYAAAA1VZrrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA1WklEQVR4nO3deVhV1frA8S8oojiLZKaZmrYytbLMJs0pb2VaZN2yTH8OV7sOKQjigLOoESgO4Rg4G5mZml0rKzW1wSwtzVoOpaY5kuSEinJ+fxwwRODsA+ecfYb38zz3ueyz9z77XYHnPXutvd7lZ7FYEEIIIQrib3YAQggh3J8kCyGEEDZJshBCCGGTJAshhBA2SbIQQghhU3GzA3CSQOAB4Chw1eRYhBDCExQDqgLfAZdy7/TWZPEAsMnsIIQQwgM1AzbnftFbk8VRgNOnz5OZaf88kuDgMqSmnnN4UO5M2uz9vKW9vXp1A2DOnHk2j/WWNtujsG329/ejYsXSkPX5mZu3JourAJmZlkIli+xzfY202ft5Q3tffrkzYLwt3tBmexWxzXl23XtrshBCeKnmzVuaHYJPkqehhBAeZefOn9i58yezw/A5cmchhPAoI0YMAWDlyv+ZHIlvcVmyUEoNAXoCtYGGWutdNo6vAKQAdYC/gE5a673OjlMIIcSNXNkNtR5oDRw0ePxQYJfWug4wB0hwVmBCCCEK5rJkobX+Vmt9wI5TQoFFWT8vAR5XSgU6Oi4hhBC2ufMAdzXglFLqA6AS8Ddws7khCSGEezp06CCvvPICzz33nFPe390HuC9prZ8DUErZfXJwcJlCXzgkpGyhz/VU0mbv5w3tjYuLBYy3xRvaXJCrV6+SmJjIsGHD8PPzY/bs2U5pszsni8P8c3dRCigPHLPnDVJTzxVqckpISFlOnjxr93meTNrs/bylvXXrNgQw1BZvaXN+9uzRhIf347vvvqV16zbExU2hUaO7CtVmf3+/Ar9gu0U3lFJqoVJqYq6XVwKvZv38MvCZ1vqG4lZCCN+ydeu3bN36rdlhmCojI4OEhDhatXqUffv2kJg4h6VLl1O9+q1Ou6YrH50dDfwH67jDZ0qpX7XWLbJ21wAyc50SC7yjlNoHnAZecVGoQgg3NmHCGMB351n8+ON2Bgzoy+7du3j22Q5MmBBHSEiI06/rsmShtR4NjM5nX4s8XjsNPOnUoIQQwkOkp6cTH/8GM2ZMo3LlEObPX0rbtu1cdn13HrMQQggBfP31FsLD+/Hbb/vp1KkLo0fHUL58BZfGIMlCCCHc1NmzZ4iJGc28eW9To0ZNli9fzWOPtTAlFkkWQgjhhj7//FMiI8P4888jvPZaX4YMGU7p0qVNi0eShRDCo4wb94bZIThVamoqI0YMYfnyd1HqTj76aB2NGzcxOyxJFkIIz9Kw4d1mh+AUFouF1as/YOjQSNLS0oiIGExYWCSBge5R5UiShRDCo2zcuB7wrkWQjh07SlTUQD7++CPuvbcR7723mvr1G5gd1nUkWQghPEpCQhzgHcnCYrGwdOkiRo2K5vLlS4waFcNrr/WheHH3+2h2v4iEEMIHHDjwOxER/dm0aSOPPNKUyZOnU7v27WaHlS9JFkII4UJXr17l7bdnMXHiOPz9ixEXN4XOnbvi7+8W1ZfyJclCCCFc5NdffyE8vC/ff7+NNm2eIC5uCrfcUs3ssAyRZCGEEE52+fJlpk2bTEJCHOXKlWPWrCSee+4F/Pz8zA7NMEkWQgiPEh8/1ewQ7LJ9+/eEhfXjl19+pkOHF4iJeZPKlSubHZbdJFkIITxKnTp1zQ7BkAsXLvDmmxOYNestqlS5mUWL3uWJJ54yO6xCk2QhhPAon3yyFsCtP3i3bNnEwIGv8/vvv9G5czdGjRpLuXLlzQ6rSCRZCCE8ysyZ0wH3TBZnzvzN2LGjWLgwmZo1a7FixRqaNn3M7LAcQpKFEEI4wKefrmXQoHCOHz9Gnz79iYoaRlBQkNlhOYwkCyGEKIJTp04xfHgUK1Ysp169u5g3bzH33dfY7LAcTpKFEEIUgsVi4YMPlhMdHcWZM2eIihpG//4DKVGihNmhOYUkCyGEsNOffx4hKiqcTz/9mPvuu5+EhETq1bvL7LCcSpKFEMKjJCbOMe3amZmZLF68gDFjRnDlSgZjx06gZ8/eFCtWzLSYXEWShRDCo1SrVt2U6/72234iIvqzZcsmmjVrTnz8VGrVqm1KLGaQZCGE8CgrV74PQGjo8y653pUrV5gzZyaxsTEULx7A5MnT6dSpi0eV6nAESRZCCI8yf34S4JpksXv3z4SH92X79h948sm2xMZOpmrVW5x+XXckyUIIIXK5dOkSU6bEM3XqJCpUqMCcOfN49tkOPnc3kZMkCyGEyOH7778jPLwfv/76Cy+88BLjxr1BcHCw2WGZTpKFEEIA58+f5403YpgzZwZVq97CkiXLaNPmSbPDchuSLIQQPm/Tpo0MHPg6Bw8eoGvXHowYMYayZcuZHZZbkWQhhPAoSUmLHPZef/+dxpgxI1i8eAG1a9/OqlVrefjhRx32/t5EkoUQwqM4avxg7dqPiIoK5+TJE/TrF8agQUMpVaqUQ97bG0myEEJ4lJSUJQB07NipUOefPHmS6OhBrFy5grvuasCiRSnce+99jgzRK/mbHYAQQtgjJWXJtYRhD4vFwnvvpdC0aWP+9781DBkynHXrNkqiMMhldxZKqUbAfKA0sAPorLVOL+D4O4G5QBmgGDBSa73S6YEKIbzOkSOHGTQojM8++5T773+AKVMSsX7ECKNceWcxC4jWWtcBLgJ9bRwfCyzUWjcCumJNHEIIYVhmZibz5r1Ns2YP8tVXmxk/PpY1az6VRFEILkkWSqmqQB3go6yXFgChNk7LBMpm/VwGOOKU4IQQXmn//r20aNGCwYMHct99jdm48RufqRDrDK7qhqoGHAXqK6WGAOOzXitIGLBGKRUOBAJP2HvR4OAy9p5yTUhIWdsHeRlps/fzhvYGBFg/7PNry5UrV5g8eTKjRo2iZMmSJCcn07VrV58q1eGM37NLn4bSWu8CXlVK1TNweC8gUWs9SynVAkhRStXXWl8xer3U1HNkZlrsjjMkpCwnT561+zxPJm32ft7S3gUL3gXIsy27du0kLKwvP/20g7Zt2/P227MpXrwMp06dc3WYpins79nf36/AL9iuGrM4AlRVSmWn9mrY7lbqD7wPoLXeAFQAbnVSfEIIDxEUFERQUNB1r126dIk33hjHv/7VnD//PEJS0kLmzVtM1apVTYrS+7gkWWitjwJ7gbZZL3UBVmbvV0otVEpNzHXaEaBl1v6GWLuijjo9WCGEW0tOnkty8j/Pu3z33be0bt2UyZPj6NDh32zevJX27UN9qtvJFVzZDdUbmK+UmgZsBxJz7KuBdUA7p27ANKVUNGABOmmtL7okUiGE21q9+gMAXnzxZd54Yxxz586iWrXqpKS8T6tWbUyOznu5LFlorbcD9+Szr0Uer30NPODksIQQHigt7TQtWjzMoUMH6dGjF9HRoyhTxvMH792ZlPsQQniMtLTT7Nu3hxMnTlCnTl1Wr/6Ehx562OywfIKU+xBCeISPPvqQpk2bcOLECapVq84XX2yRROFChpKFUmqGUqpzPvs6KaXmODYsIYSwOn78OD16dKFbt07cdFMV7r77Xm67rSYlS5Y0OzSfYrQb6gVgSD77VgPTsM6LEEIIh7BYLCxb9g4jRgwhPT2d6OhR9OnTn4CAALND80lGk0UgcLWA/fLbEwLAD85cyCDt3GUqlA2kXKni1mf5hF3++OMQkZEDWL/+c5o0eYiEhLeoW/cOs8PyaUaTxXpgglIqIucMaqVUMWAcsMEJsQnhWfzgl0N/M23ZDi5lXCUwoBj9X7yXejXKS8IwyFr4by7jxo0GYOLEOLp164m//z895omJ0wDo27e/GSH6LKPJ4nWsRQCPKKW2AX8BlYD7gZPA084JTwjPceZCxrVEAXAp4yrTlu0gts8jlCslN9+27Nu3l7Cwvmzd+g0tW7YmPn4qt95a44bj1q37GJBk4WqGBri11n9gnSPRC/gOOJf1/68Bd2utDzktQiE8RNq5y9cSBUDlCiV59rHbOXY6nTMXr4BMKM5TRkYGU6dOomXLR9iz51emTZtJSsqKPBOFMI/heRZaawuwKut/QohcKpQNJDCgGJcyrlK5QknaPlKLd9ftkS6pAuzc+SNhYf3YufNH2rcPZcKEOKpUqWJ2WCIPhpKFUio5j5czsd5h/KC1XujQqITwQOVKFaf/i/cybdkOWt1f41qiAC/qknLQAP7FixeZNCmWt96aQqVKwSQnL6Zdu2ccH69wGKN3FvlViC0ORCil7tRaD3NQTEJ4JgvUq1Ge2D6PcOx0+nVdUmBNGGnnL3tusrBnAL+ApPLNN18zcGA/9u3by8svv8qYMeOpUKGi4TBkfoU5DCULrfWI/PblKAwoyUIIC9Zk4Od3rUsqW2BAMSqULmFicEVjeAA/n6RyayV/YsaNJjl5LjVq3MayZStp0aKV3XGkpKxwVJOEHRxR7uM04Ln/AoRwguwuqcCsVd2yPzDLBXnoXQU3DuDDP3dLZ9IzOHTyPGcuXuHcxSs3JJVhbyTTtOmDzJv3Nj17/pcNG74uVKIQ5jE6ZpHfb9Uf6AR87rCIhPAGObqk0s5fpkLpEtZE4c6D2zbGI3IO4GcLDCiGn58fg2d8de0uovfzd1O2dACX0q5yOf0suzcmc3j3emrWrsOHH35KkyYPFinMSZNiAYiIGFyk9xH2MTpmkZTHa5nAeWAzMNBhEQnhLbK6pK510bh5orA1HpFzAD/7mH7/vodZK3667i5i5vs/8VyLOkxOnMeuL2aTcfEcdz78Iu/PTyCkYtHLiG/atBGQZOFqRscsajk7ECGEeQyNR+Rxt3QuPYOjqReue6+/T59kTtwMfvhiLeVvqk2zF8cy8vUOhASX5cx5KYXiqQq9nkVWqY/mQHugnda6rsOiEkK4VEHjEdcNXue+W8oxkG+xWDj88xfs3piMH1cYNGQkz7/Sk8rlgyhXOoBfDkopFE9mV7JQSgVjXUe7HfAkUAZYA4x3fGhCCFfJbzzC1tNb2V1TE+d8zLa1b3Hq0I/c3agJs2fO5Pba/3x/PHNeSqF4OqMD3IOx3kE0AXYD64CXgBVa62edF54QHsrDqs/mNR5x7emtAuK+euUqmz9eysaFY/Dz82NMTByv9eyJv9/1D1oavnMxoGLFSnYdLxzD6J3FROAQ1lpQy7TW5wGUUs6KSwjP5YnVZwvx9NaePZqwsL5s27aV1q3bEBc3herVb83z2MLeueRl3rzFdp8jis7oPIs2wAqsCyD9pZTaqJQagZRGE+IG+Q0Wn7mQYXJkBlkAv/z/aWdkZJCQEEerVo+yf/9eEhPnsHTp8nwTBXjnvBNfY/RpqM+xzqUYqJS6A+uYRTvAXyl1BFipte7rvDCF8ByO7HJxGYN3Qz/+uJ0BA/qye/cunn22AxMmxBESEmL7/XPcuZy7mEFgQHHOX8zgTPoVu7voYmJGAzB8+Gi7miiKxu6nobTWe4DJwGSlVDngKayD3kIIHNvl4iq2Hp1Nv5jO+AkxJM2dQXBwCPMXLKXtU+3su4gFygUFcOTUBaYt+67QXXTbtm21s3XCEYpU7kNrfUZr/a7W+v+yX1NKnSh6WEJ4Lk/scinobujrr7fwaNOHmDNrOrfUa8WDLydQq0GzQnVCe3wXnQ8r9DyLApRywnsK4Tk8sNRHXndD/pmXmDxhGEsXzyOofBUeemEMlWvcgwUK/dirR3bRCcA5ycKN/0kI4SKeVOqDGx+d/evQdvZtmsvJE0d5pUsv/irfiuIB/5QGL+wHvCd20QkrR1SdFUJ4uqy7ocEdFVf3LuKr5WOoWKEcH320jrExb1A6qPR1hxf2A94RXXRVq95C1aq32H1tUTTOuLMQQngYi8XC6lUfMHRoJGlpaUREDCYsLJLAwEDwo1AT9vK+UNG76GbOfNvOiwpHkGQhhI87duwoUVED+fjjj7j33ka8995q6tdv8M8Bjh6D8bAuOmHljGQhE/WE8AAWi4UlSxYyevRwLl++xKhRMbz2Wh+KF8/jY6EwH/BOKnkyfLi1NHlMTGzR30wY5pBkoZSqobU+lLX5VD7HNALmA6WBHUBnrXV6Ae9ZHEjAWpPqHLBEaz3REfEK4esOHPidiIj+bNq0kUceacrkydOpXft2x13AiSVPdu3a6ZgYhV2KPMCtlAoEfs/e1lpvzufQWUC01roOcBGwNeO7H1APUFrrBuS9AJMQwg5Xr15l1qy3aN78IbZv/4H4+KmsWLHGsYkCmU/hjRz1NFSBXU9KqapAHeCjrJcWAKE23rMrMFFrfQlAay2T/YQogp9//pl27dowcuQwmjZ9jM2bt9KlSzf8/R3/UGRB8ynckh/XrSMunek3ctSYha0by2rAUaC+UmoI1vUvqtk4pxZwv1IqHkgHhmutvyhypEL4mMuXLzNt2mQSEuIoV64cs2Yl8dxzL+BXQLHAovKo+RSeWCXYBC59GkprvQt4VSlVz8DhpYAawH3A/cBapVQ1rbXhrybBwWUKFygQElL0tYI9jbTZ+3z33Xd0796dXbt28fLLLzN16lRjhf+KKDjTQvjL95Hwzg/XPoDDX76PWtUr4u9ftCRVv77148Po787WcUdOnMuzy2zqwBZUu6nwnyFmcsbftdHFj/4g/xxr5Dd/BKiqlPLTWluw3lUcsXHOUazVbC3ANqXU1azzfi/4tH+kpp4jM9P+rwYhIWU5efKs3ed5Mmmzd7lw4QKxseOZPTuRKlVuZtGid3n11Rc5efKsy9p8R7WyNzxum5p6rsjvO378JABD7TDyOz526nyeXWbHUs9Rws/zbi0K+3ft7+9X4Bdso3cWr9p95Ry01keVUnuxVqf9COgCrMzer5RaCBzRWg/Ncdo6oCXwmVKqLhAI/FmUOITwBVu2bCI8vB8HDvxOly7dGTlyDOXKlXd9IB4yn8KjusxMZHQ9i40OuFZvYL5SahqwHUjMsa8GkJnr+CggWSm1A+tAfNfswW4hxI3OnPmbMWNGsmjRPGrWrMWKFWto2vQxs8NyuIiI/gBMmjTNIe9X2CVlfU2RxyyUUiWAX7XWtQs6Tmu9Hbgnn30t8njtL2w/MSWEAD79dC2DBoVz/Pgx+vTpT1TUMIKCgswOyyn279/n2Df0wCrBZnDEALcfUNMB7yOEsNOpU6cYPjyKFSuWU6/eXcybt5j77mtsdliex0O6zMzkqkdnhRAOZLFYWLHiPaKjozh79ixRUcPo338gJUpIP7twDikkKISH+fPPI0RFhfPppx9z//2NSUhI5M47jTyNLkThGX10dhH53z0Uc1w4Qoj8ZGZmsmjRfMaMGcHVq1cYO3YCPXv2plgxN/0n6KRCgg0aNDR2DT/rHIpjp8479Pq+yuidha0RpbFFDUQIkb/ffttPRER/tmzZRLNmzZk0aRo1a9YyO6z8OXFW9LVqswVdA5mV7Wh+FotX/perCfwuk/KMkza7pytXrjB79gxiY2MICCjBmDHj6dSpS6FKdbiyvWfSMxg846sb5i4UZt3uwlwDcPr13ZUDJuXVAg7k3m+0G6pLHi9nYi0dvkNrfcMbCyGKZvfunwkP78v27T/w5JNtiY2d7DHLiRZUSLCoH9a9e/8HgKFjp+ZfrNCC067vq4x2Q/Us4Px6Sqn/aq1THBSTED7t0qVLTJkSz9Spk6hQoQJz5szj2Wc7OLXwn6M5c1b00aN/2r6Gn5/MynYwozO4m+W3TynVBFgCSLIQooi2bdtKeHg/tP6VF154iXHj3iA4ONjssOzmilnRBV4DB64bLgDHPDq7DajigPcRwmedP3+eN96IYc6cGVStegtLl77H448/YXZYheeKWdE2rlGvRnmmDmzBsdRzMivbAYyOWeS3Ooo/0B342WERCeFjvvxyAwMH9ufQoQN07dqDESPGULZsObPDKjpXzIou6BoWqHZTmX8qx0qiKBKjdxZXyP8/9S/AK44JRwjf8fffaYwZM4LFixdQu/btrFq1locfftTssNxe48ZNzA7BJxlNFnk90J0JnM8q+OeWevXqxrFjx65tP/PMc3Tv3pMLFy7wyisv3HB8x46d6NixE6dOnSI09Lkb9nft2oPQ0Oc5cuQwffv2umF/796v88QTT7Fv314iIwfcsD88fBDNm7dk586fGDFiyA37hw0bRZMmD7J167dMmDDmhv3jxr1Bw4Z3s3HjehIS4m7YHx8/lTp16vLJJ2uZOXP6DfsTE+dQrVp1Vq58n/nzr1/SPCCgGLNmzSc4OJiUlCWkpCy54fylS5cTFBREcvJcVq/+4Ib9K1f+L+s601i37uPr9pUsWZKUlBUATJoUy6ZN1xcyrlixEvPmLQYgJmY027ZtvW5/1aq3MHPm2wAMHz6YXbt2Xrf/9tvrXKtCGhHR/4Zicw0aNLz2fH7v3v/h6NE/CQgoRkbWAGjjxk0YPnw0AN26vcrp09f/WTdr1pyIiMEAdOzYgYsXL163v02bJ+nb11oNNTS07Q3/bXL/7f31Vyr79+8jIyODW26pRp8+/Xn44UdJTU2lR4/ON5zviL+9F154xi3/9gCSkhYZ/tvL/tvI+d85v7+9gIBiFCsW4HZ/ezk5+m8v5981GP/cS0tLc8h6Frflt0MpBYDW+kuD7yWEzzp58iRa/0pq6imCgoKoV+8uypQpKzWdhNszNCkva6W8vFiAykCg1tqdag7URCbl2UXa7FwWi4Xly99l+PDBnD9/noiIwfTrF0ZAgOue+feW33G3bta12LLvBgriLW22h6mT8rTWt+bcVkoFYF1r4r/ABWCu3ZEJ4SMOH/6DQYPC+PzzdTRu3IQpUxK54w5ldlgeK3c3jXANux6dVUrVAnphHdD+CojRWq93RmBCeLrMzEwWLEhm7NiRWCyZjB8fS/fuvdy38J8QBTD66OxzwGtYb0/eBu7XWp9yZmBCeLL9+/cSHv4633zzFY891pJJk6Zy2201zQ5LiEIzemfxPnAR+AFoD7TPHtjOprX2vsV+hbDTlStXmDnzLeLiJhAYWJKpU2fQsWMnjyrVIURejCaLbk6NQggvsGvXTsLC+vLTTzto27Y9sbGTqFLlZrPD8jrNmjU358K5184IKs6Z845fr8NdGR3gXmD0DZVS72utny98SEJ4losXL5KQ8CbTp0+hQoWKJCUtpF27Z+Vuwkmy5xy4VB5rZ/y3Q0OWfbaHo6kXXL9ehpMWlipIfmU8iqKNE95TCLe0deu3tG7dlISEeJ5//kW2bPmO9u1DJVF4mTMXMq4lCrCWO5+1YifN7q1+bXvash2cuZDh/GCyEtfgGV8xOulbBidu4ZdDf4OT/+SckSyE8Hrnzp0jOjqK9u3/RXp6OikpK5g+fRYVK1YyOzSv17FjBzp27ODSa+a3PkfOD+hra2k4WV6JyxWJyhFVZ4XwKRs2fEFk5AAOHTpIjx69iI4eRZkyZc0Oy2fkLnfhCvmtnZGz68dV62U4c2GpgsidhRAGpaWdZsCAPrz4YiglSpRg9epPmDgxXhKFD8heOyMwwDpHJnvMYtOOw9e2c66l4UzZiSsnVyQqubMQwoA1a1YzZEgEqamnGDAggoiIwZQsWdLssISr5LV2RukAat18v/PW68iHKxaWyoszkoWM7Amvcfz4cYYOjWTNmlU0aHA377yznIYN7zE7LGGG3GtnZLpgvY584nD6wlJ5cEaymOCE9xTCpSwWC+++u5SRI4eSnp5OdPQo+vTp79LCfyJvbdo8aXYI5nPFwlK5GBqzUEqNyrUdmmt7TvbPWuuJDolMCJP88cchOnbsQP/+vbnjjjv54ostDBgQIYnCTfTt2//a+g3CdYwOcEfk2k7Otd3RAbEIYarMzEySkmbTrNmDfPvtN0ycGMfq1R9Tt+4dZocmhOmMdkPlHoewtS2ER9m7dw/h4f3YuvUbWrZsTXz8VG69tYbZYYk8ZK+Ql706nnANo8kid4+Yre0bKKUaAfOB0sAOoLPWOt3AeV2BeUBDrfUuA7EKYVhGRgaJiVOJj3+DoKAgpk2byUsvvSIzsIXIxWiyCFRKjc2xXSrXtpEHfGcB0VrrNUqpxUBfIL6gE5RSVYAXgUMG4xTCsO3bt9OlS1d27fqJ9u1DmTAhjipVqpgdlhBuyeiYxTvArTn+l5Jr+52CTlZKVQXqAB9lvbQA60p7tkwFovHqWo7C1S5evEhMzGgeeOABTpw4TnLyYpKSFkqiEKIARqvOdi3idaoBR4H6SqkhwPis1/KVteDSca319txrZxiVtZ5soYSE+N6sXF9o8+bNm+nRowd79uyhW7duTJo0iYoVK5odlst4w+84IGv2stG2eEOb7eWMNhtdKa+V1vqLHNuVc66Up5TqrLVeZOt9ssYcXlVK1bNxvQrACKClkfjyk5p6jsxM+29KZJF373Pu3FliYkaTnDyXGjVuY9mylfz7389y8uRZr253Tt7yO37qqWcADLXFW9psj8K22d/fr8Av2Ea7oVbm2t6TazvRxvlHgKpKqexRw2pZr+WnPnAT8K1S6tes41crpe4zFq4Q//jii3U89thDzJv3Nj17/pcNG76mRYtWZoclCql79550797T7DB8jksendVaH1VK7QXaYh236EKOBKSUWggc0VoPzTp+C1A9x/4DwDPyNJSwx19/pTJy5DCWLXuHunXv4MMPP6VJkwfNDksU0YULFwAICgpyzQVNWGjIHbns0VmgNzBfKTUN2M71dyM1gEyDsQhRIIvFwpo1qxg8OIK0tNMMHDiIsLBBUvjPS7zyyguAi+ZZ5LFCnktXxHMjRpNFcaVUS/65g8i9XSzv0/6htd4O5FmBTWvdwsa5NQ3GKXzc8ePHGDw4gv/970PuuacRy5atpEGDhmaHJTxUfgsNxfZ5xKlrR7gjo8niBNeX+EjNtX3CYREJUQgWi4WUlCWMHDmMS5cuMmLEWHr37kfx4lKFXxSeWQsNuSOjj87WdHIcQhTawYMHiIgYwJdfruehhx4hIWE6t99e1+ywhBfIb4U8V6yI526KvFKeUqqpUupjRwQjhD2uXr3KnDkzaN78Ib7//jtiYyezcuX/JFEIh8lrhTxXrYjnbozOs6iJtdupMfAL1iqz5YEErI+5znZSfELkSetfCQ/vx7ZtW2ndug1xcVOoXv1Ws8MSLtCxYyfXXcykhYbckdEO3enAYSAO+DeQhHXuQzywUGt9yTnhCXG9jIwMpk9PYPLkNylTpgyJiXN44YWXpPCfD3FpsgBTFhpyR0aTxUNAda31JaXUBuAcUEdr/bvTIhMilx9/3M6AAX3ZvXsXoaEdGD8+jpCQELPDEi6WmpoKQHBwsMmR+BajYxaB2XcPWWXF/5ZEIVwlPT2dsWNH8sQTLUlNPcWCBe8wZ858SRQ+qkePzvTo0dnsMHyO0TuLIKXUlzm2y+baRmv9mOPCEsLqq682M3Dg6/z2235effX/GDVqHOXLVzA7LCF8jtFk0SPXdpKjAxEip7NnzzB27CgWLEiiRo2aLF++mscea2F2WEL4LKPzLBY4OxAhsn322SdERoZx9OifvPZaX4YMGU7p0qXNDksIn2b00dn/AzKzy5ArpS4AgTkO6ae1numE+IQPSU1NZfjwwbz//jKUupO3315H48ZNzA5LCIHxbqhooE2O7ctAdsGd24AZgCQLUSgWi4VVq1YwbNgg0tLSiIwcwoABEQQGBto+Wficrl1z94oLVzCaLG7h+vUnwrTW++Fa+fACV70TIj/Hjh0lKiqcjz/+H/fe24jlyz/krrvqmx2WcGOhoc+7/qLOKlPuQeXPjSaL7FnbiwG01vNz7AsF9jo0KuH1LBYLS5YsZPTo4Vy+fInRo8fTq1dvKfwnbDpy5DAA1apVt3GkgzirTLmHlT83+i9zEPChUup54CvgL6Ai8ADwJPCcc8IT3uj3338jIqI/mzd/ySOPNGXy5OnUrn272WEJD9G3by/ARetZ4Lwy5Z5W/tzQpDyt9QbgLuB34GkgEmgHHAXuybk+txD5uXr1KjNnvkWLFg+zY8d24uOnsmLFGkkUwq0VVKbcHd/XWQzf82ut/wAGOjEW4cV++WU34eF9+eGH7/nXv57kzTcTuOUWGeoS7s9ZZco9rfy5oTsLpdSoXNuhubbnODAm4UUuX75MXNxEHn+8GQcPHmDWrCQWLXpXEoXwGM4qU+5p5c+N3llEAGNybCcDK3NsdwR6OSgm4SW2b/+esLC+/PLLbjp0+DcxMbFUrlzZ7LCEsI+zypR7WPlzo8kid/1nW9vCh124cIHY2PHMnp1IlSo3s2jRuzzxxFNmhyW8RO/er7v+os4qU+5B5c+NJovcTbC1LXzU5s1fEh7ej4MHD9ClS3dGjhxDuXLlzQ5LeBH54mEOo8kiUCk1Nsd2qVzb7jkiI1zmzJm/GTNmJIsWzaNmzVqsWLGGpk2lELFwvH37rNO66tSR5XNdyWiyWArkXLMyJdf2Ow6LSHicTz5Zy6BBYZw4cZw+ffoTFTWMoKAgs8MSXioycgDgunkWDudBs7ZzMlp1thuAUupW4B6gHHAG+DHrkVrhg06dOsXw4VGsWLGcevXqs2DBUho1ut/ssIQwX34JwcNmbedktOpsENZSH88CacB5oAxQXim1CnhVa33BWUEK92KxWFix4j2io6M4e/YsUVHD6N9/ICVKSG+kEAUlBE+btZ2T0WVVY4HSQC2tdbDWuobWuhJQO+v1N5wVoHAvf/55hM6dX6J37/9Qq1ZtPv98M5GRQyRRCJElv4SQfafhSbO2czKaLEKB17TWh3K+qLU+CPQGOjg4LuFmMjMzWbAgmaZNm7Bp00bGjp3AmjXruPPOemaHJoRbKSghZM/azsmdZ23nZHSAu6LW+kBeO7TWvymlKjgsIuF2fvttHwMH9uerrzbTrFlzJk2aRs2atcwOS/io8PBBZodQoILKeGTP2s7dReXOk/GyGU0WpZRSh/LZ5weUdFA8wo1cuXKF2bNnEBsbQ4kSgSQkvMUrr3TGz0/mYArzNG/e0uwQClRgQgBuvak0I3o8yMXLV6hcNtAjEgUYTxatnBqFcDs//7yL8PC+7NixnSeffJo335zMzTdXNTssIdi58ycAGja82+RI8pFfGQ/yHvguV8MzJq0afXR2Y1EvpJRqBMzHOiC+A+istU4v4PgZWJdyPQ8cAv6jtT5R1DhEwS5dusSUKfFMnTqJChUqMHfufJ555jm5mxBuY8SIIYCbz7PIo4zHmXTPfRIKjA9wO8IsIFprXQe4CPS1cfyHwJ1a63uB77E+kSWcaNu2rTz+eDMmTYolNPR5Nm/+jmef7SCJQggH8OQnocBFyUIpVRWoA3yU9dICrE9Y5UtrvVZrnf1f9ies64ALJzh//jzh4eE8/XQbzp49y9Kl7zFjxlwqVQo2OzQhvIYnPwkFrruzqIZ1Vb36SqnFwOGs14zqyvUl0YWDfPnlBpo3f5gpU6bwf//XnU2bvuXxx58wOywhvI6nrV+Rm+GV8hxBa70LeFUpZfjhfKVU9nNys+y9XnBwGXtPuSYkpGyhz/UEaWlpREZGkpSURN26ddm4cSOPPeZ7hf+8/fecmze0NyDrw9ZoW9ypzcGVynB79Qr8dTadSmVLUbVyafz9Hd/N64w2uypZHAGqKqX8tNYWrHcVR2ydpJTqgrW7qk3WeXZJTT1HZqb9z6SFhJTl5Mmzdp/nKdau/YioqHBOnTrJ66+HExk5hBo1bvLqNufF23/PuXlLewcNGg5gqC3u2OYSfnBzuZKAhdTUcw5//8K22d/fr8Av2C5JFlrro0qpvUBbrOMWXcjRraSUWggc0VoPzfFaKBAGtJa6U45x4sQJoqOjWLVqBfXrN2Tx4ne5555GZoclhF2aNHnQ7BB8kiufhuoNTFBK7QeCgMQc+2oAuR/inwtUAtYrpXYopT5wTZjex2KxsGzZOzRr9gBr165h6NARfPrpBkkUwiNt3fotW7d+a3YYxvlZH5s9dPI8Zy5e8dh1RV02ZqG13o61vHle+1rk8VqIs2PyBYcP/8GgQWF8/vk6GjduwpQpidxxhzI7LCEKbcKEMYCbz7PI5sElyXNz5Z2FcKHMzEySk+fSrNmDfP31FsaPj+XDDz+RRCGECxVUgdbTuPRpKOEa+/fvJTz8db755iuaN29JfPxUbrutptlhCeFzCpqI5wmztnOSZOFFrly5wowZ04mLm0DJkqWYNm0mL730iszAFsIkBVWg9TTSDeUldu78iSefbEVMzChat/4XmzdvpWPHTpIohDCRp0/Ey0nuLDzcxYsXmTz5TaZPT6BSpWCSkhbRvv2zZoclhNOMG+dBC3PmV4HWwwa3QZKFR9u69VvCw/uyd+8eXnrpFcaOnUDFipXMDksIp3Lb0uT5yaMCrSeSZOGBzp07x4QJY0hKmkO1atVJSVlBq1aPmx2WEC6xceN6wP0XQfI2kiw8zPr1nxMZOYDDh/+ge/eeREePokwZ96l9I4SzJSTEAZIsXE2ShYdISzvNyJHDSElZQp06dVm16mMeeuhhs8MSQvgISRYeYM2a1QwZEkFq6ikGDIggImIwJUvKsudCCNeRZOHGjh8/ztChkaxZs4oGDe7mnXeW07BhnhVThBC+zM86Wzzt3GUuW/wo4Y/DB9IlWbghi8XCu+8uZeTIoaSnpxMdPYo+ffoTEOB5z2YLIZzMRfWnJFm4mUOHDhIZOYANG76gSZOHSEh4i7p17zA7LCHcRnz8VLNDcCv51Z+K7fOIQ0uKSLJwE9bCf3OIiRmDn58fEyfG063bf/D3l0n2QuRUp05ds0NwK66qPyXJwg3s3buH8PB+bN36DS1btiY+fiq33lrD7LCEcEuffLIWgCeeeMrkSNyDq+pPyddWE2VkZDBlSjwtWz7Cnj2/Mn36LFJSVkiiEKIAM2dOZ+bM6WaH4TZcVX9K7ixM8tNPOwgL68euXT/Rvn0oEyfGc9NNN5kdlhDC0+SqP3VzcBlK+FvkaShPl56ezqRJsSQmTiU4uDLJyYtp1+4Zs8MSQniyHPWnQkLKcPLkWYdfQpKFC33zzdeEh/dl//59vPzyq4wZM54KFSqaHZYQQtgkycIFzp07S0zMaJKT51Kjxm0sW7aSFi1amR2WEEIYJsnCyb74Yh2RkWEcOXKYXr16M2TICMqUKWN2WEJ4rMTEOWaH4JMkWTjJX3+lMnLkMJYte4c77lCsWfMpDzzwoNlhCeHxqlWrbnYIPkmShYNZLBY+/HAlQ4ZEkpZ2moEDBxEeHkVgYKDZoQnhFVaufB+A0NDnTY7Et0iycKDjx48RFTWQtWvXcM89jVi2bCUNGjQ0OywhvMr8+UmAJAtXk0l5DmCxWFi6dBGPPvoA69d/xogRY1m79nNJFEIIryF3FkV08OABIiIG8OWX63n44UeZPHkat98utWuEEN5FkkUhXb16laSk2UyYMBZ//2K8+WYCXbp0k8J/QgivJMmiELT+lbCwvnz//Xe0bt2G+Pip8oSGEMKrSbKww+XLl5k+PYGEhDjKlCnDjBlzef75F/Hz8zM7NCF8RlLSIrND8EmSLAzaseMHwsL6sXv3LkJDOzB+fBwhISFmhyWEzwkODjY7BJ8kycKG9PR03nxzAjNnTick5CYWLHiHp5562uywhPBZKSlLAOjYsZPJkfgWlyULpVQjYD5QGtgBdNZapxdwfAUgBagD/AV00lrvdXqgOXz11WbCw/vx+++/0blzV0aOHEv58hVcGYIQIhdJFuZw5aM7s4BorXUd4CLQ18bxQ4FdWcfPARKcHN81Z86cYdCgcEJD25KZmcn773/IpEnTJFEIIXyWS5KFUqoq1juEj7JeWgCE2jgtFMgeyVoCPK6UcnrNjA0bvqB+/fosWjSP//63Hxs2fE2zZs2dfVkhhHBrruqGqgYcBeorpYYA47Nes3XOKaXUB0A/4G/gZuCg0YsGB9tf3XX8+FGUK1eO5cuX8+CDvlX4LySkrNkhuJyvtdkb2huQtXyo0bZ4Q5vt5Yw2u3SAW2u9C3hVKVXP4CmXtNbPASil7L5eauo5MjPtW1tw5cqPqV69MqdPpztltSl3FRJS1qfaC77XZm9pb0bGVQBDbfGWNtujsG329/cr8Au2q5LFEaCqUspPa23BetdwxMY5h/nn7qIUUB445twwoXTp0hQvLg+JCeGuli5dbnYIPsklYxZa66PAXqBt1ktdgJXZ+5VSC5VSE3OdthJ4Nevnl4HPtNaXnBupEMLdBQUFERQUZHYYPseVX6F7A/OVUtOA7UBijn01gMxcx8cC7yil9gGngVdcEqUQwq0lJ88FoHv3niZH4ltcliy01tuBe/LZ1yKP104DTzo5LCGEh1m9+gNAkoWrSYlUIYQQNkmyEEIIYZMkCyGEEDZ56zOixcD63HBhFeVcTyVt9n7e0N6bb74ZMN4Wb2izvQrT5hznFMtrv5/FYt+kNQ/RFNhkdhBCCOGBmgGbc7/orckiEHgAa4mRqybHIoQQnqAYUBX4DrhhTpu3JgshhBAOJAPcQgghbJJkIYQQwiZJFkIIIWySZCGEEMImSRZCCCFskmQhhBDCJkkWQgghbPLWch8FUko1AuYDpYEdQGetdXoBx1cAUoA6wF9AJ631XqcH6iCFaO8MoA1wHjgE/EdrfcL5kTqOvW3OcV5XYB7QMGsZYI9RiN9zcSABaA+cA5ZorXMvQubWCtHmO4G5QBmsk9BGaq1XOj1QB1JKDQF6ArUx8HfqqM8vX72zmAVEa63rABeBvjaOHwrsyjp+DtZ/YJ7E3vZ+CNyptb4X+B7rQlSext42o5SqAryINUF6Invb3A+oByitdQMgycnxOYO9bY4FFmqtGwFdsSYOT7MeaA0cNHi8Qz6/fC5ZKKWqYs2wH2W9tAAItXFaKLAo6+clwONKqUBnxOdohWmv1nqt1jq7TMpPwC1OC9AJCvk7BpgKRAMeV9agkG3uCkzMXq7YA+8eC9PmTKBs1s9lgCNOCc6JtNbfaq0P2HFKKA74/PK5ZAFUw1ozqr5SajFwOOs1W+ecUkp9AFQC/gZudmqUjlOY9ubUlRzrpXsIu9uslHoOOJ61oqMnKszvuRZwv1Jqu1LqK6VUK2cH6WCFaXMY0E0p9QewHOjm1Ajdg0M+v3wxWQCgtd6ltX7VjlMuaa2f01p73DcRKFR7UUoNyvpxlhNCcjqjbc7q0x0BjHR6UE5m5++5FFADuA/oD7yrlCrhtOCcxM429wIStda3Yu1yTMkau/F2Rf788sVkcQSoqpTKLt5eDdu3ote+sSilSgHlgWNOi9CxCtNelFJdsN6+vqy19rRuGXvbXB+4CfhWKfVr1vGrlVL3OTdMhyrM7/kosFJrbdFab8Naodmeu06zFabN/YH3AbTWG4AKwK1Ois9dOOTzy+eShdb6KLAXaJv1UhdydLMopRYqpXI/EbISyP7m8jLwWXY/r7srTHuVUqFYb9fbaa0vuCRQB7K3zVrrLVrr6lrrO7XWd2L9wHlGa/2DC8MukkL+Xa8DWmbtr4u1tP+fTg/WQQrZ5iP80+aGWNt81OnBuogzP798Lllk6Q1MUErtB4KAxBz7amCt6Z5TLNBQKbUv69xwl0TpOPa2dy7Wvs31SqkdWX2dnsbeNnsDe9schbW/fwfWb9tdPeVLUA72trkbMEgp9SPWQd9OWuuLLonUQZRSo5VSh4HqwGdKqQ05djvt80vWsxBCCGGTr95ZCCGEsIMkCyGEEDZJshBCCGGTJAshhBA2SbIQQghhkyQLIYQQNkmyEMKBlFKtlVIWpdTQHK8dUEo9nmM7Rik1P8d2DaXUKqXUGaXUKaWUR5ZXEd5NkoUQjtUO+APrGhE2KaX8sVZN/QPrJKs6wNdOi06IQvKFAlpCuNLTwBhgtlIqRGt90sbx7bFWAB2otb6c9doCZwYoRGHInYUQDqKUugNr2e9lwM/8U7OoIPcB23IkCiHckiQLIRynPfCN1vos1iJ97QycUwXrUpdCuDVJFkI4Tjvgs6yfPwOeUEoFYF2dLbfs145jLdoohFuTZCGEAyilygOPAsOUUhexloUuCzQHznP9v7XiwLmsn7djXa0uwHXRCmE/SRZCOMaTQCpQSmtdUmtdEvgQa9fUZqCTUqpU1rrRbYHstTJWAyeBSUqpckqpskopu1Y0FMIVJFkI4RhPA6u01jm7nD7A2jUVDZTGurDQj8BaYCFA1vFPYx0YPwIcAlq4LGohDJL1LIQQQtgkdxZCCCFskmQhhBDCJkkWQgghbJJkIYQQwiZJFkIIIWySZCGEEMImSRZCCCFskmQhhBDCJkkWQgghbPp/opy3YAtmXJQAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df_2d_jac, go_chrom = run_egad(df_exp_clust, df_exp_corr)\n", "import matplotlib.pyplot as plt\n", "sns.scatterplot(df_2d_jac['AUC'], df_2d_jac['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d_jac['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d_jac['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 65, "id": "6009e1de", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AUCAVG_NODE_DEGREEDEGREE_NULL_AUCP_Value
50090.47749714849.1929530.7340177.107784e-03
49860.69322714704.1593090.7059215.800097e-06
77910.86208710024.0881790.3694731.460376e-06
78790.80358612468.6848550.5409884.875874e-08
78900.48572614874.5525470.7194311.348610e-01
49960.67630814890.8824470.6972096.733674e-02
71760.9371318975.9562600.2941041.625090e-41
77240.66147414837.4344730.7092322.428424e-06
71230.65396513835.6405620.6424523.308297e-02
72030.88011011555.7850940.4617237.914039e-32
77180.69826813318.2302690.6069711.677333e-02
76280.9029687872.4996770.2381502.870997e-05
69800.78019012653.5583820.5424132.100931e-11
78880.48466814616.0047430.6629013.569038e-01
69950.70929914543.6215820.6791461.585328e-08
69890.63240314754.5756190.6888561.689262e-02
71600.8654208632.5905440.2937111.734122e-03
71980.9137937023.3994350.1687273.608949e-04
71590.9242987428.4856540.2149705.191061e-06
74500.9261308085.8016980.2469075.711609e-06
69790.86820111776.6214500.4627667.367518e-09
66970.9977885755.0274060.1077695.856530e-11
73510.98319811917.1483000.4556392.151726e-15
69780.69817612483.1748880.5409516.025233e-03
78770.71849014182.7649720.6482283.443170e-02
49270.83905313381.3152790.5918975.721071e-05
73880.63018113685.7356710.6546291.125468e-01
\n", "
" ], "text/plain": [ " AUC AVG_NODE_DEGREE DEGREE_NULL_AUC P_Value\n", "5009 0.477497 14849.192953 0.734017 7.107784e-03\n", "4986 0.693227 14704.159309 0.705921 5.800097e-06\n", "7791 0.862087 10024.088179 0.369473 1.460376e-06\n", "7879 0.803586 12468.684855 0.540988 4.875874e-08\n", "7890 0.485726 14874.552547 0.719431 1.348610e-01\n", "4996 0.676308 14890.882447 0.697209 6.733674e-02\n", "7176 0.937131 8975.956260 0.294104 1.625090e-41\n", "7724 0.661474 14837.434473 0.709232 2.428424e-06\n", "7123 0.653965 13835.640562 0.642452 3.308297e-02\n", "7203 0.880110 11555.785094 0.461723 7.914039e-32\n", "7718 0.698268 13318.230269 0.606971 1.677333e-02\n", "7628 0.902968 7872.499677 0.238150 2.870997e-05\n", "6980 0.780190 12653.558382 0.542413 2.100931e-11\n", "7888 0.484668 14616.004743 0.662901 3.569038e-01\n", "6995 0.709299 14543.621582 0.679146 1.585328e-08\n", "6989 0.632403 14754.575619 0.688856 1.689262e-02\n", "7160 0.865420 8632.590544 0.293711 1.734122e-03\n", "7198 0.913793 7023.399435 0.168727 3.608949e-04\n", "7159 0.924298 7428.485654 0.214970 5.191061e-06\n", "7450 0.926130 8085.801698 0.246907 5.711609e-06\n", "6979 0.868201 11776.621450 0.462766 7.367518e-09\n", "6697 0.997788 5755.027406 0.107769 5.856530e-11\n", "7351 0.983198 11917.148300 0.455639 2.151726e-15\n", "6978 0.698176 12483.174888 0.540951 6.025233e-03\n", "7877 0.718490 14182.764972 0.648228 3.443170e-02\n", "4927 0.839053 13381.315279 0.591897 5.721071e-05\n", "7388 0.630181 13685.735671 0.654629 1.125468e-01" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_2d_jac" ] }, { "cell_type": "code", "execution_count": null, "id": "0cc2fdcf", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "hicexp", "language": "python", "name": "hicexp" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }