Commit d1208fbb7c7f8802231af440548e8ce61bbac0bf
1 parent
70ad245597
Exists in
master
Updated README and Python Code
Showing
1 changed file
with
217 additions
and
0 deletions
Show diff stats
nbi_simulation_new_for_git.ipynb
... | ... | @@ -0,0 +1,217 @@ |
1 | +{ | |
2 | + "cells": [ | |
3 | + { | |
4 | + "cell_type": "code", | |
5 | + "execution_count": 1, | |
6 | + "metadata": { | |
7 | + "collapsed": false | |
8 | + }, | |
9 | + "outputs": [], | |
10 | + "source": [ | |
11 | + "import numpy as np\n", | |
12 | + "import csv\n", | |
13 | + "import numpy.matlib\n", | |
14 | + "from operator import itemgetter, attrgetter\n", | |
15 | + "from sklearn.model_selection import KFold\n", | |
16 | + "from sklearn.metrics import roc_curve, auc\n", | |
17 | + "import matplotlib.pyplot as plt" | |
18 | + ] | |
19 | + }, | |
20 | + { | |
21 | + "cell_type": "code", | |
22 | + "execution_count": 2, | |
23 | + "metadata": { | |
24 | + "collapsed": false | |
25 | + }, | |
26 | + "outputs": [ | |
27 | + { | |
28 | + "name": "stdout", | |
29 | + "output_type": "stream", | |
30 | + "text": [ | |
31 | + "6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n" | |
32 | + ] | |
33 | + } | |
34 | + ], | |
35 | + "source": [ | |
36 | + "#fp = open('dt_new_and_fda_unique.csv','r')\n", | |
37 | + "fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n", | |
38 | + "drugid = []\n", | |
39 | + "targetid = []\n", | |
40 | + "\n", | |
41 | + "##1. Reading edge list line by line##\n", | |
42 | + "for line in fp:\n", | |
43 | + " line = line.strip()\n", | |
44 | + " tmp = line.split(',')\n", | |
45 | + " drugid.append(tmp[0])\n", | |
46 | + " targetid.append(tmp[1])\n", | |
47 | + "fp.close()\n", | |
48 | + "##End 1##\n", | |
49 | + "\n", | |
50 | + "drug = np.array(drugid)\n", | |
51 | + "target =np.array(targetid)\n", | |
52 | + "\n", | |
53 | + "uni_drugid = np.unique(np.array(drugid))\n", | |
54 | + "uni_targetid = np.unique(np.array(targetid))\n", | |
55 | + "\n", | |
56 | + "##creating zero incidence matrix for the graph##\n", | |
57 | + "\n", | |
58 | + "A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n", | |
59 | + "\n", | |
60 | + "for i in range(len(drugid)):\n", | |
61 | + " idx1 = np.where(uni_targetid==targetid[i])\n", | |
62 | + " idx2 = np.where(uni_drugid==drugid[i])\n", | |
63 | + " A[idx1,idx2] = 1\n", | |
64 | + "\n", | |
65 | + "nd = uni_drugid.shape[0]\n", | |
66 | + "mt = uni_targetid.shape[0]\n", | |
67 | + "\n", | |
68 | + "A_T = np.transpose(A)\n", | |
69 | + "no_edges = np.sum(A)\n", | |
70 | + "print nd, mt, A.shape, np.sum(A), A_T.shape\n" | |
71 | + ] | |
72 | + }, | |
73 | + { | |
74 | + "cell_type": "code", | |
75 | + "execution_count": 3, | |
76 | + "metadata": { | |
77 | + "collapsed": false | |
78 | + }, | |
79 | + "outputs": [ | |
80 | + { | |
81 | + "name": "stdout", | |
82 | + "output_type": "stream", | |
83 | + "text": [ | |
84 | + "(3772, 3772)\n", | |
85 | + "(3772, 6319)\n" | |
86 | + ] | |
87 | + } | |
88 | + ], | |
89 | + "source": [ | |
90 | + "#NBI calculation for A\n", | |
91 | + "\n", | |
92 | + "Ky = np.diag((1/sum(A))) \n", | |
93 | + "n = A.shape[0]\n", | |
94 | + "m = A.shape[1]\n", | |
95 | + "#print n, m, Ky.shape\n", | |
96 | + "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n", | |
97 | + "kx = np.transpose(np.sum(A,1))\n", | |
98 | + "#print kx.shape\n", | |
99 | + "Nx = np.matlib.repmat(1/kx,n,1)\n", | |
100 | + "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n", | |
101 | + "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n", | |
102 | + "W = np.transpose(np.dot(A, Ky))\n", | |
103 | + "W1 = np.dot(A, W)\n", | |
104 | + "W2 = np.multiply(Nx, W1)\n", | |
105 | + "print W2.shape\n", | |
106 | + "NBIscore = np.dot(W2, A)\n", | |
107 | + "print NBIscore.shape" | |
108 | + ] | |
109 | + }, | |
110 | + { | |
111 | + "cell_type": "code", | |
112 | + "execution_count": 4, | |
113 | + "metadata": { | |
114 | + "collapsed": true | |
115 | + }, | |
116 | + "outputs": [], | |
117 | + "source": [ | |
118 | + "#NBI calculation for A_T\n", | |
119 | + "\n", | |
120 | + "Ky = np.diag((1/sum(A_T)))\n", | |
121 | + "n = A_T.shape[0]\n", | |
122 | + "m = A_T.shape[1]\n", | |
123 | + "#print n, m, Ky.shape\n", | |
124 | + "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n", | |
125 | + "kx = np.transpose(np.sum(A_T,1))\n", | |
126 | + "#print kx.shape\n", | |
127 | + "Nx = np.matlib.repmat(1/kx,n,1)\n", | |
128 | + "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n", | |
129 | + "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n", | |
130 | + "W = np.transpose(np.dot(A_T, Ky))\n", | |
131 | + "W1 = np.dot(A_T, W)\n", | |
132 | + "W2 = np.multiply(Nx, W1)\n", | |
133 | + "NBIscore_T = np.dot(W2, A_T)" | |
134 | + ] | |
135 | + }, | |
136 | + { | |
137 | + "cell_type": "code", | |
138 | + "execution_count": 5, | |
139 | + "metadata": { | |
140 | + "collapsed": false | |
141 | + }, | |
142 | + "outputs": [], | |
143 | + "source": [ | |
144 | + "##Normalizing NBI scores\n", | |
145 | + "NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n", | |
146 | + "NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))" | |
147 | + ] | |
148 | + }, | |
149 | + { | |
150 | + "cell_type": "code", | |
151 | + "execution_count": 7, | |
152 | + "metadata": { | |
153 | + "collapsed": false | |
154 | + }, | |
155 | + "outputs": [], | |
156 | + "source": [ | |
157 | + "nbi_idx = np.argsort(NBIscore, axis=0)\n", | |
158 | + "nbi_idx_T= np.argsort(NBIscore_T, axis=0)" | |
159 | + ] | |
160 | + }, | |
161 | + { | |
162 | + "cell_type": "code", | |
163 | + "execution_count": 8, | |
164 | + "metadata": { | |
165 | + "collapsed": false | |
166 | + }, | |
167 | + "outputs": [], | |
168 | + "source": [ | |
169 | + "wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n", | |
170 | + "for d in range(nd):\n", | |
171 | + " idx1 = nbi_idx[:,d]\n", | |
172 | + " #idx1 = idx1[::-1]\n", | |
173 | + " idx2 = A[:,d]\n", | |
174 | + " idx3 = idx2[idx1]\n", | |
175 | + " idx4 = np.where(idx3 == 0)[0]\n", | |
176 | + " p_targets_idx = idx1[idx4[-n:]]\n", | |
177 | + " p_targets_idx = p_targets_idx[::-1]\n", | |
178 | + " p_targets = NBIscore[p_targets_idx,d]\n", | |
179 | + " if p_targets[0] == 0.0:\n", | |
180 | + " continue\n", | |
181 | + " else:\n", | |
182 | + " p_diff = np.diff(p_targets)\n", | |
183 | + " th = p_targets[0]*0.20\n", | |
184 | + " th_f = p_targets[0]-th\n", | |
185 | + " f_idx = p_targets_idx[p_targets > th_f]\n", | |
186 | + " f_scores = p_targets[p_targets > th_f]\n", | |
187 | + " f_targets = uni_targetid[f_idx]\n", | |
188 | + " #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n", | |
189 | + " for i,t in enumerate(f_targets):\n", | |
190 | + " wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n", | |
191 | + "\n", | |
192 | + "wp.close()" | |
193 | + ] | |
194 | + } | |
195 | + ], | |
196 | + "metadata": { | |
197 | + "kernelspec": { | |
198 | + "display_name": "Python 2", | |
199 | + "language": "python", | |
200 | + "name": "python2" | |
201 | + }, | |
202 | + "language_info": { | |
203 | + "codemirror_mode": { | |
204 | + "name": "ipython", | |
205 | + "version": 2 | |
206 | + }, | |
207 | + "file_extension": ".py", | |
208 | + "mimetype": "text/x-python", | |
209 | + "name": "python", | |
210 | + "nbconvert_exporter": "python", | |
211 | + "pygments_lexer": "ipython2", | |
212 | + "version": "2.7.6" | |
213 | + } | |
214 | + }, | |
215 | + "nbformat": 4, | |
216 | + "nbformat_minor": 2 | |
217 | +} | ... | ... |