{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ea141d2c", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "from scipy.stats import mannwhitneyu" ] }, { "cell_type": "code", "execution_count": 2, "id": "7cf78cd3", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "def run_egad(go, nw, **kwargs):\n", " \"\"\"EGAD running function\n", " \n", " Wrapper to lower level functions for EGAD\n", "\n", " EGAD measures modularity of gene lists in co-expression networks. \n", "\n", " This was translated from the MATLAB version, which does tiled Cross Validation\n", " \n", " The useful kwargs are:\n", " int - nFold : Number of CV folds to do, default is 3, \n", " int - {min,max}_count : limits for number of terms in each gene list, these are exclusive values\n", "\n", "\n", " Arguments:\n", " go {pd.DataFrame} -- dataframe of genes x terms of values [0,1], where 1 is included in gene lists\n", " nw {pd.DataFrame} -- dataframe of co-expression network, genes x genes\n", " **kwargs \n", " \n", " Returns:\n", " pd.DataFrame -- dataframe of terms x metrics where the metrics are \n", " ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " \"\"\"\n", " assert nw.shape[0] == nw.shape[1] , 'Network is not square'\n", " #print(nw.index)\n", " #nw.columns = nw.columns.astype(int)\n", " #print(nw.columns.astype(int))\n", " assert np.all(nw.index == nw.columns) , 'Network index and columns are not in the same order'\n", "\n", " #nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " #nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " #np.fill_diagonal(nw.values, 1)\n", " return _runNV(go, nw, **kwargs)\n", "\n", "def _runNV(go, nw, nFold=3, min_count=10, max_count=1000000):\n", "\n", " #Make sure genes are same in go and nw\n", " #go.index = go.index.map(str) \n", " #nw.index = nw.index.map(str)\n", " #nw.index = nw.index.str.replace('_', '')\n", " #go.index = go.index.str.replace('_', '')\n", " #print (nw)\n", " genes_intersect = go.index.intersection(nw.index)\n", "\n", "\n", " #print (genes_intersect)\n", " go = go.loc[genes_intersect, :]\n", " nw = nw.loc[genes_intersect, genes_intersect]\n", " #print (go)\n", " print (nw.shape)\n", " print (go.shape)\n", " sparsity = 1.0 - np.count_nonzero(go) / go.size\n", " print (sparsity)\n", " sparsity = 1.0 - np.count_nonzero(nw) / nw.size\n", " print (sparsity)\n", " #print(nw\n", " #print(go\n", " nw_mask = nw.isna().sum(axis=1) != nw.shape[1]\n", " nw = nw.loc[nw_mask, nw_mask].astype('float')\n", " np.fill_diagonal(nw.values, 1)\n", " #Make sure there aren't duplicates\n", " duplicates = nw.index.duplicated(keep='first')\n", " nw = nw.loc[~duplicates, ~duplicates]\n", "\n", " go = go.loc[:, (go.sum(axis=0) > min_count) & (go.sum(axis=0) < max_count)]\n", " go = go.loc[~go.index.duplicated(keep='first'), :]\n", " #print(go)\n", "\n", " roc = _new_egad(go.values, nw.values, nFold)\n", "\n", " col_names = ['AUC', 'AVG_NODE_DEGREE', 'DEGREE_NULL_AUC', 'P_Value']\n", " #Put output in dataframe\n", " return pd.DataFrame(dict(zip(col_names, roc)), index=go.columns), go\n", "\n", "def _new_egad(go, nw, nFold):\n", "\n", " #Build Cross validated Positive\n", " x, y = np.where(go)\n", " #print(x, y)\n", " cvgo = {}\n", " for i in np.arange(nFold):\n", " a = x[i::nFold]\n", " #print(a)\n", " b = y[i::nFold]\n", " dat = np.ones_like(a)\n", " mask = sparse.coo_matrix((dat, (a, b)), shape=go.shape)\n", " cvgo[i] = go - mask.toarray()\n", "\n", " CVgo = np.concatenate(list(cvgo.values()), axis=1)\n", " #print(CVgo)\n", "\n", " sumin = np.matmul(nw.T, CVgo)\n", "\n", " degree = np.sum(nw, axis=0)\n", " #print(degree)\n", " #print(degree[:, None])\n", "\n", " predicts = sumin / degree[:, None]\n", " #print(predicts)\n", "\n", " np.place(predicts, CVgo > 0, np.nan)\n", "\n", " #print(predicts)\n", "\n", " #Calculate ranks of positives\n", " rank_abs = lambda x: stats.rankdata(np.abs(x))\n", " predicts2 = np.apply_along_axis(rank_abs, 0, predicts)\n", " #print(predicts2)\n", "\n", " #Masking Nans that were ranked (how tiedrank works in matlab)\n", " predicts2[np.isnan(predicts)] = np.nan\n", " #print(predicts2)\n", "\n", " filtering = np.tile(go, nFold)\n", " #print(filtering)\n", "\n", " #negatives :filtering == 0\n", " #Sets Ranks of negatives to 0\n", " np.place(predicts2, filtering == 0, 0)\n", "\n", " #Sum of ranks for each prediction\n", " p = bottleneck.nansum(predicts2, axis=0)\n", " n_p = np.sum(filtering, axis=0) - np.sum(CVgo, axis=0)\n", "\n", " #Number of negatives\n", " #Number of GO terms - number of postiive\n", " n_n = filtering.shape[0] - np.sum(filtering, axis=0)\n", "\n", " roc = (p / n_p - (n_p + 1) / 2) / n_n\n", " U = roc * n_p * n_n\n", " Z = (np.abs(U - (n_p * n_n / 2))) / np.sqrt(n_p * n_n *\n", " (n_p + n_n + 1) / 12)\n", " roc = roc.reshape(nFold, go.shape[1])\n", " Z = Z.reshape(nFold, go.shape[1])\n", " #Stouffer Z method\n", " Z = bottleneck.nansum(Z, axis=0) / np.sqrt(nFold)\n", " #Calc ROC of Neighbor Voting\n", " roc = bottleneck.nanmean(roc, axis=0)\n", " P = stats.norm.sf(Z)\n", "\n", " #Average degree for nodes in each go term\n", " avg_degree = degree.dot(go) / np.sum(go, axis=0)\n", "\n", " #Calc null auc for degree\n", " ranks = np.tile(stats.rankdata(degree), (go.shape[1], 1)).T\n", "\n", " np.place(ranks, go == 0, 0)\n", "\n", " n_p = bottleneck.nansum(go, axis=0)\n", " nn = go.shape[0] - n_p\n", " p = bottleneck.nansum(ranks, axis=0)\n", "\n", " roc_null = (p / n_p - ((n_p + 1) / 2)) / nn\n", " #print(roc)\n", " return roc, avg_degree, roc_null, P" ] }, { "cell_type": "code", "execution_count": 4, "id": "b402c242", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:numexpr.utils:Note: detected 192 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "INFO:numexpr.utils:Note: NumExpr detected 192 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n" ] } ], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler" ] }, { "cell_type": "code", "execution_count": 5, "id": "6086839a", "metadata": {}, "outputs": [], "source": [ "exp_file_path=f'/grid/gillis/data/lohia/hi_c_data_processing/software/CoCoCoNet/networks/human_prioAggNet.h5'\n", "\n", "jac_exp = hm.hiCMatrix(exp_file_path)\n", "all_genes = [x[3].decode() for x in jac_exp.cut_intervals]\n", "df_exp_corr = pd.DataFrame(jac_exp.matrix.toarray() , index=all_genes, columns = all_genes)" ] }, { "cell_type": "code", "execution_count": 6, "id": "a3e0922b", "metadata": {}, "outputs": [], "source": [ "resolution_human = 10000\n", "species = \"human\"\n", "SRP_name = \"aggregates\"\n", "resolution = \"10kbp_raw\"\n", "\n", "\n", "\n", "input_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_{species}/{SRP_name}/{resolution}/max/'\n", "bins_bed = pd.read_csv(f'{input_path}/all_bins.bed', names=['chr', 'start', 'end', 'bin_id'])" ] }, { "cell_type": "code", "execution_count": 7, "id": "5532edb2", "metadata": {}, "outputs": [], "source": [ " if species == 'human':\n", "\n", " df_cre = pd.read_csv('/grid/gillis/data/lohia/hi_c_data_processing/data_human/aggregates/li2022/GRCh38-cCREs.bed', sep='\\t', names=['chr', 'start', 'end', 't1', 't2', 't3'])\n", "\n", " else:\n", "\n", " df_cre = pd.read_csv('/grid/gillis/data/lohia/ATAC_Risa/mm10-cCREs.bed', sep='\\t', names=['chr', 'start', 'end', 't1', 't2', 't3'])\n", "\n", " df_cre['start_bin'] = df_cre['start']/resolution_human\n", " df_cre['start_bin'] = df_cre['start_bin'].astype('int')\n", " df_cre['start_bin'] = df_cre['start_bin']*resolution_human\n", " df_cre['start_bin'] = df_cre['start_bin'].astype('str')\n", " df_cre['start_bin'] = df_cre['chr'] + '_' + df_cre['start_bin']\n", " #df_cre_1kb_encode = df_cre.drop_duplicates(subset=['start_bin'])\n", " df_cre['cre'] = 1\n", " df_cre = df_cre.groupby(['start_bin'])['cre'].sum().reset_index()\n", " input_path=f'/grid/gillis/data/lohia/hi_c_data_processing/data_{species}/{SRP_name}/{resolution}/max/'\n", " bins_bed = pd.read_csv(f'{input_path}/all_bins.bed', names=['chr', 'start', 'end', 'bin_id'])\n", " bins_bed['bin_id'] = bins_bed.index\n", " bins_bed['pos'] = bins_bed['chr'] + '_' + bins_bed['start'].astype('str')\n", " df_cre_1kb_encode = df_cre.merge(bins_bed, left_on='start_bin', right_on='pos')\n", " " ] }, { "cell_type": "code", "execution_count": 8, "id": "3e7bd086", "metadata": {}, "outputs": [], "source": [ "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, sparse\n", "import bottleneck\n", "from scipy.stats import mannwhitneyu\n", "import h5py\n", "import h5py\n", "import logging\n", "import numpy as np\n", "import pandas as pd\n", "from hicmatrix import HiCMatrix as hm\n", "from hicmatrix.lib import MatrixFileHandler\n", "from scipy.sparse import csr_matrix, dia_matrix, triu, tril, coo_matrix\n", "import scipy.stats as stats\n", "import os.path" ] }, { "cell_type": "code", "execution_count": 9, "id": "d03fd8d4", "metadata": {}, "outputs": [], "source": [ "with h5py.File(f'/grid/gillis/data/lohia/hi_c_data_processing/data_{species}/{SRP_name}/{resolution}/max/hic_gene_gw_none_by_allbins_none_ranked_inter.h5', 'r') as hf:\n", " my_data = hf['matrix'][:]\n", " gene_list = hf['gene_list'][:]\n", " bins_bed = hf['bins_bed'][:]" ] }, { "cell_type": "code", "execution_count": 11, "id": "3be24c11", "metadata": {}, "outputs": [], "source": [ "my_percen = np.nanpercentile(my_data, 99, axis=1, keepdims=True)" ] }, { "cell_type": "code", "execution_count": 12, "id": "526480eb", "metadata": {}, "outputs": [], "source": [ "my_data_thresh = my_data > my_percen\n", "\n", "my_data_thresh = my_data_thresh.astype(int)" ] }, { "cell_type": "code", "execution_count": 13, "id": "7b69abf9", "metadata": {}, "outputs": [], "source": [ "df_gene_tp = pd.DataFrame(my_data_thresh , index=[x.decode() for x in gene_list.tolist()], columns = bins_bed.tolist())\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "e5d02b91", "metadata": {}, "outputs": [], "source": [ "exp_genes = pd.read_csv(\"/grid/gillis/data/lohia/hi_c_data_processing/software/CoCoCoNet/Homo_sapiens_average_rank.csv\")\n", "\n", "exp_genes['genes'] = [x.split('.')[0] for x in exp_genes['genes']]\n", "\n", "exp_genes.set_index('genes', inplace=True)\n", "\n", "exp_genes['avg_rank'] = exp_genes.sum(axis=1)\n", "\n", "exp_genes['avg_rank'] = [ x/ exp_genes.shape[1] for x in exp_genes['avg_rank']]\n", "\n", "exp_genes = exp_genes[['avg_rank']]\n", "\n", "exp_genes.reset_index(inplace=True)\n", "\n", "exp_genes.drop_duplicates(['genes'], inplace=True)" ] }, { "cell_type": "code", "execution_count": 15, "id": "59c5598d", "metadata": {}, "outputs": [], "source": [ "high_auc_gene = exp_genes[exp_genes['avg_rank'] > 0.7]['genes'].tolist()" ] }, { "cell_type": "code", "execution_count": 16, "id": "1f626939", "metadata": {}, "outputs": [], "source": [ "df_gene_tp_sel = df_gene_tp[df_gene_tp.index.isin(high_auc_gene)]" ] }, { "cell_type": "code", "execution_count": 17, "id": "f5702b8b", "metadata": {}, "outputs": [], "source": [ "cre_bins = df_cre_1kb_encode[df_cre_1kb_encode['cre']>14]['bin_id'].tolist()" ] }, { "cell_type": "code", "execution_count": 18, "id": "0bbae578", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.780666666666667" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(cre_bins)/3000" ] }, { "cell_type": "code", "execution_count": 19, "id": "d7ddb46e", "metadata": {}, "outputs": [], "source": [ "df_gene_tp_sel = df_gene_tp_sel[cre_bins]" ] }, { "cell_type": "code", "execution_count": 20, "id": "e72f6d7e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(13157, 13157)\n", "(13157, 8342)\n", "0.9693905994526353\n", "0.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ ":133: RuntimeWarning: invalid value encountered in true_divide\n", " roc = (p / n_p - (n_p + 1) / 2) / n_n\n" ] } ], "source": [ "df_2d_jac, go_chrom = run_egad(df_gene_tp_sel, df_exp_corr)" ] }, { "cell_type": "code", "execution_count": 22, "id": "396d06db", "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 23, "id": "978e41dd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/grid/gillis/home/lohia/.conda/envs/hicexplorer/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "sns.scatterplot(df_2d_jac['AUC'], df_2d_jac['DEGREE_NULL_AUC'])\n", "plt.plot([0, 1], [0, 1], c='black')\n", "plt.axvline(x=df_2d_jac['AUC'].mean(),c='black',ls='--')\n", "plt.axhline(y=df_2d_jac['DEGREE_NULL_AUC'].mean(), c='black', ls='--')" ] }, { "cell_type": "code", "execution_count": 25, "id": "1954db7d", "metadata": {}, "outputs": [], "source": [ "df_t = df_2d_jac.merge(df_gene_tp_sel.sum().reset_index(), left_on=df_2d_jac.index, right_on='index')\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "1060c9b9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AUCAVG_NODE_DEGREEDEGREE_NULL_AUCP_Valueindex0
00.54864810432.2513300.5000192.071635e-02177492147
10.59345410135.5782460.4425645.234711e-0217755726
20.53645410060.6682460.4078402.975338e-0117756520
30.62454310300.1791060.4781415.764654e-07177566127
40.46189710385.4440570.5161286.432477e-02177580129
.....................
78760.60138910454.0277720.5109801.831122e-08163474249
78770.56014910544.3219320.5298884.074555e-03163477159
78780.52562910027.9083660.4159762.571734e-0116355760
78790.66011110531.9339670.4986883.683678e-0316356232
78800.69296910628.6372470.5471721.431164e-17163634162
\n", "

7881 rows × 6 columns

\n", "
" ], "text/plain": [ " AUC AVG_NODE_DEGREE DEGREE_NULL_AUC P_Value index 0\n", "0 0.548648 10432.251330 0.500019 2.071635e-02 177492 147\n", "1 0.593454 10135.578246 0.442564 5.234711e-02 177557 26\n", "2 0.536454 10060.668246 0.407840 2.975338e-01 177565 20\n", "3 0.624543 10300.179106 0.478141 5.764654e-07 177566 127\n", "4 0.461897 10385.444057 0.516128 6.432477e-02 177580 129\n", "... ... ... ... ... ... ...\n", "7876 0.601389 10454.027772 0.510980 1.831122e-08 163474 249\n", "7877 0.560149 10544.321932 0.529888 4.074555e-03 163477 159\n", "7878 0.525629 10027.908366 0.415976 2.571734e-01 163557 60\n", "7879 0.660111 10531.933967 0.498688 3.683678e-03 163562 32\n", "7880 0.692969 10628.637247 0.547172 1.431164e-17 163634 162\n", "\n", "[7881 rows x 6 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_t" ] }, { "cell_type": "code", "execution_count": 28, "id": "490e14bb", "metadata": {}, "outputs": [], "source": [ "df_t['quintile'] = pd.qcut(df_t[0] , 10, labels=np.arange(10, 0, -1))" ] }, { "cell_type": "code", "execution_count": 29, "id": "3fdbb02c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/grid/gillis/home/lohia/.conda/envs/hicexplorer/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.boxplot(df_t['quintile'], df_t['AUC'])" ] }, { "cell_type": "code", "execution_count": 34, "id": "1205a311", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'kl' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mkl\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'kl' is not defined" ] } ], "source": [ "kl" ] }, { "cell_type": "code", "execution_count": 31, "id": "0dfb9a48", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AUCAVG_NODE_DEGREEDEGREE_NULL_AUCP_Valueindex0quintile
50.61639310677.1105810.5749542.229388e-6817759119831
140.66338910572.3408110.5441119.583433e-10517771615361
150.70715310588.8458150.5397165.304060e-17417773116041
180.70747810582.9089730.5417112.018835e-23417773522061
230.59815810705.3178710.5886702.556946e-6317776326361
........................
77610.64228010809.8336240.6107192.569722e-12215695424421
77660.52647010515.7064320.5290046.451476e-0415699612771
77690.51880310666.1000790.5771768.463337e-0415708925031
77740.57720610723.2433490.5917328.175634e-3815716324671
77780.70211310578.2503680.5377833.649385e-18815721718391
\n", "

787 rows × 7 columns

\n", "
" ], "text/plain": [ " AUC AVG_NODE_DEGREE DEGREE_NULL_AUC P_Value index 0 \\\n", "5 0.616393 10677.110581 0.574954 2.229388e-68 177591 1983 \n", "14 0.663389 10572.340811 0.544111 9.583433e-105 177716 1536 \n", "15 0.707153 10588.845815 0.539716 5.304060e-174 177731 1604 \n", "18 0.707478 10582.908973 0.541711 2.018835e-234 177735 2206 \n", "23 0.598158 10705.317871 0.588670 2.556946e-63 177763 2636 \n", "... ... ... ... ... ... ... \n", "7761 0.642280 10809.833624 0.610719 2.569722e-122 156954 2442 \n", "7766 0.526470 10515.706432 0.529004 6.451476e-04 156996 1277 \n", "7769 0.518803 10666.100079 0.577176 8.463337e-04 157089 2503 \n", "7774 0.577206 10723.243349 0.591732 8.175634e-38 157163 2467 \n", "7778 0.702113 10578.250368 0.537783 3.649385e-188 157217 1839 \n", "\n", " quintile \n", "5 1 \n", "14 1 \n", "15 1 \n", "18 1 \n", "23 1 \n", "... ... \n", "7761 1 \n", "7766 1 \n", "7769 1 \n", "7774 1 \n", "7778 1 \n", "\n", "[787 rows x 7 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_t[df_t['quintile'] == 1]" ] }, { "cell_type": "code", "execution_count": null, "id": "6dfa454b", "metadata": {}, "outputs": [], "source": [ "kl" ] }, { "cell_type": "code", "execution_count": null, "id": "f157531b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "hicexp", "language": "python", "name": "hicexp" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }