Commit d1208fbb7c7f8802231af440548e8ce61bbac0bf

Authored by anurag
1 parent 70ad245597
Exists in master

Updated README and Python Code

Showing 1 changed file with 217 additions and 0 deletions   Show diff stats
nbi_simulation_new_for_git.ipynb
File was created 1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {
7 "collapsed": false
8 },
9 "outputs": [],
10 "source": [
11 "import numpy as np\n",
12 "import csv\n",
13 "import numpy.matlib\n",
14 "from operator import itemgetter, attrgetter\n",
15 "from sklearn.model_selection import KFold\n",
16 "from sklearn.metrics import roc_curve, auc\n",
17 "import matplotlib.pyplot as plt"
18 ]
19 },
20 {
21 "cell_type": "code",
22 "execution_count": 2,
23 "metadata": {
24 "collapsed": false
25 },
26 "outputs": [
27 {
28 "name": "stdout",
29 "output_type": "stream",
30 "text": [
31 "6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n"
32 ]
33 }
34 ],
35 "source": [
36 "#fp = open('dt_new_and_fda_unique.csv','r')\n",
37 "fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n",
38 "drugid = []\n",
39 "targetid = []\n",
40 "\n",
41 "##1. Reading edge list line by line##\n",
42 "for line in fp:\n",
43 " line = line.strip()\n",
44 " tmp = line.split(',')\n",
45 " drugid.append(tmp[0])\n",
46 " targetid.append(tmp[1])\n",
47 "fp.close()\n",
48 "##End 1##\n",
49 "\n",
50 "drug = np.array(drugid)\n",
51 "target =np.array(targetid)\n",
52 "\n",
53 "uni_drugid = np.unique(np.array(drugid))\n",
54 "uni_targetid = np.unique(np.array(targetid))\n",
55 "\n",
56 "##creating zero incidence matrix for the graph##\n",
57 "\n",
58 "A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n",
59 "\n",
60 "for i in range(len(drugid)):\n",
61 " idx1 = np.where(uni_targetid==targetid[i])\n",
62 " idx2 = np.where(uni_drugid==drugid[i])\n",
63 " A[idx1,idx2] = 1\n",
64 "\n",
65 "nd = uni_drugid.shape[0]\n",
66 "mt = uni_targetid.shape[0]\n",
67 "\n",
68 "A_T = np.transpose(A)\n",
69 "no_edges = np.sum(A)\n",
70 "print nd, mt, A.shape, np.sum(A), A_T.shape\n"
71 ]
72 },
73 {
74 "cell_type": "code",
75 "execution_count": 3,
76 "metadata": {
77 "collapsed": false
78 },
79 "outputs": [
80 {
81 "name": "stdout",
82 "output_type": "stream",
83 "text": [
84 "(3772, 3772)\n",
85 "(3772, 6319)\n"
86 ]
87 }
88 ],
89 "source": [
90 "#NBI calculation for A\n",
91 "\n",
92 "Ky = np.diag((1/sum(A))) \n",
93 "n = A.shape[0]\n",
94 "m = A.shape[1]\n",
95 "#print n, m, Ky.shape\n",
96 "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n",
97 "kx = np.transpose(np.sum(A,1))\n",
98 "#print kx.shape\n",
99 "Nx = np.matlib.repmat(1/kx,n,1)\n",
100 "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n",
101 "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n",
102 "W = np.transpose(np.dot(A, Ky))\n",
103 "W1 = np.dot(A, W)\n",
104 "W2 = np.multiply(Nx, W1)\n",
105 "print W2.shape\n",
106 "NBIscore = np.dot(W2, A)\n",
107 "print NBIscore.shape"
108 ]
109 },
110 {
111 "cell_type": "code",
112 "execution_count": 4,
113 "metadata": {
114 "collapsed": true
115 },
116 "outputs": [],
117 "source": [
118 "#NBI calculation for A_T\n",
119 "\n",
120 "Ky = np.diag((1/sum(A_T)))\n",
121 "n = A_T.shape[0]\n",
122 "m = A_T.shape[1]\n",
123 "#print n, m, Ky.shape\n",
124 "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n",
125 "kx = np.transpose(np.sum(A_T,1))\n",
126 "#print kx.shape\n",
127 "Nx = np.matlib.repmat(1/kx,n,1)\n",
128 "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n",
129 "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n",
130 "W = np.transpose(np.dot(A_T, Ky))\n",
131 "W1 = np.dot(A_T, W)\n",
132 "W2 = np.multiply(Nx, W1)\n",
133 "NBIscore_T = np.dot(W2, A_T)"
134 ]
135 },
136 {
137 "cell_type": "code",
138 "execution_count": 5,
139 "metadata": {
140 "collapsed": false
141 },
142 "outputs": [],
143 "source": [
144 "##Normalizing NBI scores\n",
145 "NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n",
146 "NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))"
147 ]
148 },
149 {
150 "cell_type": "code",
151 "execution_count": 7,
152 "metadata": {
153 "collapsed": false
154 },
155 "outputs": [],
156 "source": [
157 "nbi_idx = np.argsort(NBIscore, axis=0)\n",
158 "nbi_idx_T= np.argsort(NBIscore_T, axis=0)"
159 ]
160 },
161 {
162 "cell_type": "code",
163 "execution_count": 8,
164 "metadata": {
165 "collapsed": false
166 },
167 "outputs": [],
168 "source": [
169 "wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n",
170 "for d in range(nd):\n",
171 " idx1 = nbi_idx[:,d]\n",
172 " #idx1 = idx1[::-1]\n",
173 " idx2 = A[:,d]\n",
174 " idx3 = idx2[idx1]\n",
175 " idx4 = np.where(idx3 == 0)[0]\n",
176 " p_targets_idx = idx1[idx4[-n:]]\n",
177 " p_targets_idx = p_targets_idx[::-1]\n",
178 " p_targets = NBIscore[p_targets_idx,d]\n",
179 " if p_targets[0] == 0.0:\n",
180 " continue\n",
181 " else:\n",
182 " p_diff = np.diff(p_targets)\n",
183 " th = p_targets[0]*0.20\n",
184 " th_f = p_targets[0]-th\n",
185 " f_idx = p_targets_idx[p_targets > th_f]\n",
186 " f_scores = p_targets[p_targets > th_f]\n",
187 " f_targets = uni_targetid[f_idx]\n",
188 " #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n",
189 " for i,t in enumerate(f_targets):\n",
190 " wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n",
191 "\n",
192 "wp.close()"
193 ]
194 }
195 ],
196 "metadata": {
197 "kernelspec": {
198 "display_name": "Python 2",
199 "language": "python",
200 "name": "python2"
201 },
202 "language_info": {
203 "codemirror_mode": {
204 "name": "ipython",
205 "version": 2
206 },
207 "file_extension": ".py",
208 "mimetype": "text/x-python",
209 "name": "python",
210 "nbconvert_exporter": "python",
211 "pygments_lexer": "ipython2",
212 "version": "2.7.6"
213 }
214 },
215 "nbformat": 4,
216 "nbformat_minor": 2
217 }
218