Commit d1208fbb7c7f8802231af440548e8ce61bbac0bf

Authored by anurag
1 parent 70ad245597
Exists in master

Updated README and Python Code

Showing 1 changed file with 217 additions and 0 deletions   Show diff stats
nbi_simulation_new_for_git.ipynb
... ... @@ -0,0 +1,217 @@
  1 +{
  2 + "cells": [
  3 + {
  4 + "cell_type": "code",
  5 + "execution_count": 1,
  6 + "metadata": {
  7 + "collapsed": false
  8 + },
  9 + "outputs": [],
  10 + "source": [
  11 + "import numpy as np\n",
  12 + "import csv\n",
  13 + "import numpy.matlib\n",
  14 + "from operator import itemgetter, attrgetter\n",
  15 + "from sklearn.model_selection import KFold\n",
  16 + "from sklearn.metrics import roc_curve, auc\n",
  17 + "import matplotlib.pyplot as plt"
  18 + ]
  19 + },
  20 + {
  21 + "cell_type": "code",
  22 + "execution_count": 2,
  23 + "metadata": {
  24 + "collapsed": false
  25 + },
  26 + "outputs": [
  27 + {
  28 + "name": "stdout",
  29 + "output_type": "stream",
  30 + "text": [
  31 + "6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n"
  32 + ]
  33 + }
  34 + ],
  35 + "source": [
  36 + "#fp = open('dt_new_and_fda_unique.csv','r')\n",
  37 + "fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n",
  38 + "drugid = []\n",
  39 + "targetid = []\n",
  40 + "\n",
  41 + "##1. Reading edge list line by line##\n",
  42 + "for line in fp:\n",
  43 + " line = line.strip()\n",
  44 + " tmp = line.split(',')\n",
  45 + " drugid.append(tmp[0])\n",
  46 + " targetid.append(tmp[1])\n",
  47 + "fp.close()\n",
  48 + "##End 1##\n",
  49 + "\n",
  50 + "drug = np.array(drugid)\n",
  51 + "target =np.array(targetid)\n",
  52 + "\n",
  53 + "uni_drugid = np.unique(np.array(drugid))\n",
  54 + "uni_targetid = np.unique(np.array(targetid))\n",
  55 + "\n",
  56 + "##creating zero incidence matrix for the graph##\n",
  57 + "\n",
  58 + "A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n",
  59 + "\n",
  60 + "for i in range(len(drugid)):\n",
  61 + " idx1 = np.where(uni_targetid==targetid[i])\n",
  62 + " idx2 = np.where(uni_drugid==drugid[i])\n",
  63 + " A[idx1,idx2] = 1\n",
  64 + "\n",
  65 + "nd = uni_drugid.shape[0]\n",
  66 + "mt = uni_targetid.shape[0]\n",
  67 + "\n",
  68 + "A_T = np.transpose(A)\n",
  69 + "no_edges = np.sum(A)\n",
  70 + "print nd, mt, A.shape, np.sum(A), A_T.shape\n"
  71 + ]
  72 + },
  73 + {
  74 + "cell_type": "code",
  75 + "execution_count": 3,
  76 + "metadata": {
  77 + "collapsed": false
  78 + },
  79 + "outputs": [
  80 + {
  81 + "name": "stdout",
  82 + "output_type": "stream",
  83 + "text": [
  84 + "(3772, 3772)\n",
  85 + "(3772, 6319)\n"
  86 + ]
  87 + }
  88 + ],
  89 + "source": [
  90 + "#NBI calculation for A\n",
  91 + "\n",
  92 + "Ky = np.diag((1/sum(A))) \n",
  93 + "n = A.shape[0]\n",
  94 + "m = A.shape[1]\n",
  95 + "#print n, m, Ky.shape\n",
  96 + "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n",
  97 + "kx = np.transpose(np.sum(A,1))\n",
  98 + "#print kx.shape\n",
  99 + "Nx = np.matlib.repmat(1/kx,n,1)\n",
  100 + "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n",
  101 + "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n",
  102 + "W = np.transpose(np.dot(A, Ky))\n",
  103 + "W1 = np.dot(A, W)\n",
  104 + "W2 = np.multiply(Nx, W1)\n",
  105 + "print W2.shape\n",
  106 + "NBIscore = np.dot(W2, A)\n",
  107 + "print NBIscore.shape"
  108 + ]
  109 + },
  110 + {
  111 + "cell_type": "code",
  112 + "execution_count": 4,
  113 + "metadata": {
  114 + "collapsed": true
  115 + },
  116 + "outputs": [],
  117 + "source": [
  118 + "#NBI calculation for A_T\n",
  119 + "\n",
  120 + "Ky = np.diag((1/sum(A_T)))\n",
  121 + "n = A_T.shape[0]\n",
  122 + "m = A_T.shape[1]\n",
  123 + "#print n, m, Ky.shape\n",
  124 + "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n",
  125 + "kx = np.transpose(np.sum(A_T,1))\n",
  126 + "#print kx.shape\n",
  127 + "Nx = np.matlib.repmat(1/kx,n,1)\n",
  128 + "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n",
  129 + "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n",
  130 + "W = np.transpose(np.dot(A_T, Ky))\n",
  131 + "W1 = np.dot(A_T, W)\n",
  132 + "W2 = np.multiply(Nx, W1)\n",
  133 + "NBIscore_T = np.dot(W2, A_T)"
  134 + ]
  135 + },
  136 + {
  137 + "cell_type": "code",
  138 + "execution_count": 5,
  139 + "metadata": {
  140 + "collapsed": false
  141 + },
  142 + "outputs": [],
  143 + "source": [
  144 + "##Normalizing NBI scores\n",
  145 + "NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n",
  146 + "NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))"
  147 + ]
  148 + },
  149 + {
  150 + "cell_type": "code",
  151 + "execution_count": 7,
  152 + "metadata": {
  153 + "collapsed": false
  154 + },
  155 + "outputs": [],
  156 + "source": [
  157 + "nbi_idx = np.argsort(NBIscore, axis=0)\n",
  158 + "nbi_idx_T= np.argsort(NBIscore_T, axis=0)"
  159 + ]
  160 + },
  161 + {
  162 + "cell_type": "code",
  163 + "execution_count": 8,
  164 + "metadata": {
  165 + "collapsed": false
  166 + },
  167 + "outputs": [],
  168 + "source": [
  169 + "wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n",
  170 + "for d in range(nd):\n",
  171 + " idx1 = nbi_idx[:,d]\n",
  172 + " #idx1 = idx1[::-1]\n",
  173 + " idx2 = A[:,d]\n",
  174 + " idx3 = idx2[idx1]\n",
  175 + " idx4 = np.where(idx3 == 0)[0]\n",
  176 + " p_targets_idx = idx1[idx4[-n:]]\n",
  177 + " p_targets_idx = p_targets_idx[::-1]\n",
  178 + " p_targets = NBIscore[p_targets_idx,d]\n",
  179 + " if p_targets[0] == 0.0:\n",
  180 + " continue\n",
  181 + " else:\n",
  182 + " p_diff = np.diff(p_targets)\n",
  183 + " th = p_targets[0]*0.20\n",
  184 + " th_f = p_targets[0]-th\n",
  185 + " f_idx = p_targets_idx[p_targets > th_f]\n",
  186 + " f_scores = p_targets[p_targets > th_f]\n",
  187 + " f_targets = uni_targetid[f_idx]\n",
  188 + " #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n",
  189 + " for i,t in enumerate(f_targets):\n",
  190 + " wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n",
  191 + "\n",
  192 + "wp.close()"
  193 + ]
  194 + }
  195 + ],
  196 + "metadata": {
  197 + "kernelspec": {
  198 + "display_name": "Python 2",
  199 + "language": "python",
  200 + "name": "python2"
  201 + },
  202 + "language_info": {
  203 + "codemirror_mode": {
  204 + "name": "ipython",
  205 + "version": 2
  206 + },
  207 + "file_extension": ".py",
  208 + "mimetype": "text/x-python",
  209 + "name": "python",
  210 + "nbconvert_exporter": "python",
  211 + "pygments_lexer": "ipython2",
  212 + "version": "2.7.6"
  213 + }
  214 + },
  215 + "nbformat": 4,
  216 + "nbformat_minor": 2
  217 +}
... ...