Commit d1208fbb7c7f8802231af440548e8ce61bbac0bf
1 parent
70ad245597
Exists in
master
Updated README and Python Code
Showing
1 changed file
with
217 additions
and
0 deletions
Show diff stats
nbi_simulation_new_for_git.ipynb
File was created | 1 | { | |
2 | "cells": [ | ||
3 | { | ||
4 | "cell_type": "code", | ||
5 | "execution_count": 1, | ||
6 | "metadata": { | ||
7 | "collapsed": false | ||
8 | }, | ||
9 | "outputs": [], | ||
10 | "source": [ | ||
11 | "import numpy as np\n", | ||
12 | "import csv\n", | ||
13 | "import numpy.matlib\n", | ||
14 | "from operator import itemgetter, attrgetter\n", | ||
15 | "from sklearn.model_selection import KFold\n", | ||
16 | "from sklearn.metrics import roc_curve, auc\n", | ||
17 | "import matplotlib.pyplot as plt" | ||
18 | ] | ||
19 | }, | ||
20 | { | ||
21 | "cell_type": "code", | ||
22 | "execution_count": 2, | ||
23 | "metadata": { | ||
24 | "collapsed": false | ||
25 | }, | ||
26 | "outputs": [ | ||
27 | { | ||
28 | "name": "stdout", | ||
29 | "output_type": "stream", | ||
30 | "text": [ | ||
31 | "6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n" | ||
32 | ] | ||
33 | } | ||
34 | ], | ||
35 | "source": [ | ||
36 | "#fp = open('dt_new_and_fda_unique.csv','r')\n", | ||
37 | "fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n", | ||
38 | "drugid = []\n", | ||
39 | "targetid = []\n", | ||
40 | "\n", | ||
41 | "##1. Reading edge list line by line##\n", | ||
42 | "for line in fp:\n", | ||
43 | " line = line.strip()\n", | ||
44 | " tmp = line.split(',')\n", | ||
45 | " drugid.append(tmp[0])\n", | ||
46 | " targetid.append(tmp[1])\n", | ||
47 | "fp.close()\n", | ||
48 | "##End 1##\n", | ||
49 | "\n", | ||
50 | "drug = np.array(drugid)\n", | ||
51 | "target =np.array(targetid)\n", | ||
52 | "\n", | ||
53 | "uni_drugid = np.unique(np.array(drugid))\n", | ||
54 | "uni_targetid = np.unique(np.array(targetid))\n", | ||
55 | "\n", | ||
56 | "##creating zero incidence matrix for the graph##\n", | ||
57 | "\n", | ||
58 | "A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n", | ||
59 | "\n", | ||
60 | "for i in range(len(drugid)):\n", | ||
61 | " idx1 = np.where(uni_targetid==targetid[i])\n", | ||
62 | " idx2 = np.where(uni_drugid==drugid[i])\n", | ||
63 | " A[idx1,idx2] = 1\n", | ||
64 | "\n", | ||
65 | "nd = uni_drugid.shape[0]\n", | ||
66 | "mt = uni_targetid.shape[0]\n", | ||
67 | "\n", | ||
68 | "A_T = np.transpose(A)\n", | ||
69 | "no_edges = np.sum(A)\n", | ||
70 | "print nd, mt, A.shape, np.sum(A), A_T.shape\n" | ||
71 | ] | ||
72 | }, | ||
73 | { | ||
74 | "cell_type": "code", | ||
75 | "execution_count": 3, | ||
76 | "metadata": { | ||
77 | "collapsed": false | ||
78 | }, | ||
79 | "outputs": [ | ||
80 | { | ||
81 | "name": "stdout", | ||
82 | "output_type": "stream", | ||
83 | "text": [ | ||
84 | "(3772, 3772)\n", | ||
85 | "(3772, 6319)\n" | ||
86 | ] | ||
87 | } | ||
88 | ], | ||
89 | "source": [ | ||
90 | "#NBI calculation for A\n", | ||
91 | "\n", | ||
92 | "Ky = np.diag((1/sum(A))) \n", | ||
93 | "n = A.shape[0]\n", | ||
94 | "m = A.shape[1]\n", | ||
95 | "#print n, m, Ky.shape\n", | ||
96 | "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n", | ||
97 | "kx = np.transpose(np.sum(A,1))\n", | ||
98 | "#print kx.shape\n", | ||
99 | "Nx = np.matlib.repmat(1/kx,n,1)\n", | ||
100 | "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n", | ||
101 | "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n", | ||
102 | "W = np.transpose(np.dot(A, Ky))\n", | ||
103 | "W1 = np.dot(A, W)\n", | ||
104 | "W2 = np.multiply(Nx, W1)\n", | ||
105 | "print W2.shape\n", | ||
106 | "NBIscore = np.dot(W2, A)\n", | ||
107 | "print NBIscore.shape" | ||
108 | ] | ||
109 | }, | ||
110 | { | ||
111 | "cell_type": "code", | ||
112 | "execution_count": 4, | ||
113 | "metadata": { | ||
114 | "collapsed": true | ||
115 | }, | ||
116 | "outputs": [], | ||
117 | "source": [ | ||
118 | "#NBI calculation for A_T\n", | ||
119 | "\n", | ||
120 | "Ky = np.diag((1/sum(A_T)))\n", | ||
121 | "n = A_T.shape[0]\n", | ||
122 | "m = A_T.shape[1]\n", | ||
123 | "#print n, m, Ky.shape\n", | ||
124 | "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n", | ||
125 | "kx = np.transpose(np.sum(A_T,1))\n", | ||
126 | "#print kx.shape\n", | ||
127 | "Nx = np.matlib.repmat(1/kx,n,1)\n", | ||
128 | "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n", | ||
129 | "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n", | ||
130 | "W = np.transpose(np.dot(A_T, Ky))\n", | ||
131 | "W1 = np.dot(A_T, W)\n", | ||
132 | "W2 = np.multiply(Nx, W1)\n", | ||
133 | "NBIscore_T = np.dot(W2, A_T)" | ||
134 | ] | ||
135 | }, | ||
136 | { | ||
137 | "cell_type": "code", | ||
138 | "execution_count": 5, | ||
139 | "metadata": { | ||
140 | "collapsed": false | ||
141 | }, | ||
142 | "outputs": [], | ||
143 | "source": [ | ||
144 | "##Normalizing NBI scores\n", | ||
145 | "NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n", | ||
146 | "NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))" | ||
147 | ] | ||
148 | }, | ||
149 | { | ||
150 | "cell_type": "code", | ||
151 | "execution_count": 7, | ||
152 | "metadata": { | ||
153 | "collapsed": false | ||
154 | }, | ||
155 | "outputs": [], | ||
156 | "source": [ | ||
157 | "nbi_idx = np.argsort(NBIscore, axis=0)\n", | ||
158 | "nbi_idx_T= np.argsort(NBIscore_T, axis=0)" | ||
159 | ] | ||
160 | }, | ||
161 | { | ||
162 | "cell_type": "code", | ||
163 | "execution_count": 8, | ||
164 | "metadata": { | ||
165 | "collapsed": false | ||
166 | }, | ||
167 | "outputs": [], | ||
168 | "source": [ | ||
169 | "wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n", | ||
170 | "for d in range(nd):\n", | ||
171 | " idx1 = nbi_idx[:,d]\n", | ||
172 | " #idx1 = idx1[::-1]\n", | ||
173 | " idx2 = A[:,d]\n", | ||
174 | " idx3 = idx2[idx1]\n", | ||
175 | " idx4 = np.where(idx3 == 0)[0]\n", | ||
176 | " p_targets_idx = idx1[idx4[-n:]]\n", | ||
177 | " p_targets_idx = p_targets_idx[::-1]\n", | ||
178 | " p_targets = NBIscore[p_targets_idx,d]\n", | ||
179 | " if p_targets[0] == 0.0:\n", | ||
180 | " continue\n", | ||
181 | " else:\n", | ||
182 | " p_diff = np.diff(p_targets)\n", | ||
183 | " th = p_targets[0]*0.20\n", | ||
184 | " th_f = p_targets[0]-th\n", | ||
185 | " f_idx = p_targets_idx[p_targets > th_f]\n", | ||
186 | " f_scores = p_targets[p_targets > th_f]\n", | ||
187 | " f_targets = uni_targetid[f_idx]\n", | ||
188 | " #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n", | ||
189 | " for i,t in enumerate(f_targets):\n", | ||
190 | " wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n", | ||
191 | "\n", | ||
192 | "wp.close()" | ||
193 | ] | ||
194 | } | ||
195 | ], | ||
196 | "metadata": { | ||
197 | "kernelspec": { | ||
198 | "display_name": "Python 2", | ||
199 | "language": "python", | ||
200 | "name": "python2" | ||
201 | }, | ||
202 | "language_info": { | ||
203 | "codemirror_mode": { | ||
204 | "name": "ipython", | ||
205 | "version": 2 | ||
206 | }, | ||
207 | "file_extension": ".py", | ||
208 | "mimetype": "text/x-python", | ||
209 | "name": "python", | ||
210 | "nbconvert_exporter": "python", | ||
211 | "pygments_lexer": "ipython2", | ||
212 | "version": "2.7.6" | ||
213 | } | ||
214 | }, | ||
215 | "nbformat": 4, | ||
216 | "nbformat_minor": 2 | ||
217 | } | ||
218 |