From d1208fbb7c7f8802231af440548e8ce61bbac0bf Mon Sep 17 00:00:00 2001 From: anurag Date: Fri, 13 Apr 2018 15:54:09 +0530 Subject: [PATCH] Updated README and Python Code --- nbi_simulation_new_for_git.ipynb | 217 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 nbi_simulation_new_for_git.ipynb diff --git a/nbi_simulation_new_for_git.ipynb b/nbi_simulation_new_for_git.ipynb new file mode 100644 index 0000000..c142bfb --- /dev/null +++ b/nbi_simulation_new_for_git.ipynb @@ -0,0 +1,217 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import csv\n", + "import numpy.matlib\n", + "from operator import itemgetter, attrgetter\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.metrics import roc_curve, auc\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n" + ] + } + ], + "source": [ + "#fp = open('dt_new_and_fda_unique.csv','r')\n", + "fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n", + "drugid = []\n", + "targetid = []\n", + "\n", + "##1. Reading edge list line by line##\n", + "for line in fp:\n", + " line = line.strip()\n", + " tmp = line.split(',')\n", + " drugid.append(tmp[0])\n", + " targetid.append(tmp[1])\n", + "fp.close()\n", + "##End 1##\n", + "\n", + "drug = np.array(drugid)\n", + "target =np.array(targetid)\n", + "\n", + "uni_drugid = np.unique(np.array(drugid))\n", + "uni_targetid = np.unique(np.array(targetid))\n", + "\n", + "##creating zero incidence matrix for the graph##\n", + "\n", + "A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n", + "\n", + "for i in range(len(drugid)):\n", + " idx1 = np.where(uni_targetid==targetid[i])\n", + " idx2 = np.where(uni_drugid==drugid[i])\n", + " A[idx1,idx2] = 1\n", + "\n", + "nd = uni_drugid.shape[0]\n", + "mt = uni_targetid.shape[0]\n", + "\n", + "A_T = np.transpose(A)\n", + "no_edges = np.sum(A)\n", + "print nd, mt, A.shape, np.sum(A), A_T.shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3772, 3772)\n", + "(3772, 6319)\n" + ] + } + ], + "source": [ + "#NBI calculation for A\n", + "\n", + "Ky = np.diag((1/sum(A))) \n", + "n = A.shape[0]\n", + "m = A.shape[1]\n", + "#print n, m, Ky.shape\n", + "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n", + "kx = np.transpose(np.sum(A,1))\n", + "#print kx.shape\n", + "Nx = np.matlib.repmat(1/kx,n,1)\n", + "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n", + "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n", + "W = np.transpose(np.dot(A, Ky))\n", + "W1 = np.dot(A, W)\n", + "W2 = np.multiply(Nx, W1)\n", + "print W2.shape\n", + "NBIscore = np.dot(W2, A)\n", + "print NBIscore.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#NBI calculation for A_T\n", + "\n", + "Ky = np.diag((1/sum(A_T)))\n", + "n = A_T.shape[0]\n", + "m = A_T.shape[1]\n", + "#print n, m, Ky.shape\n", + "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n", + "kx = np.transpose(np.sum(A_T,1))\n", + "#print kx.shape\n", + "Nx = np.matlib.repmat(1/kx,n,1)\n", + "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n", + "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n", + "W = np.transpose(np.dot(A_T, Ky))\n", + "W1 = np.dot(A_T, W)\n", + "W2 = np.multiply(Nx, W1)\n", + "NBIscore_T = np.dot(W2, A_T)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "##Normalizing NBI scores\n", + "NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n", + "NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "nbi_idx = np.argsort(NBIscore, axis=0)\n", + "nbi_idx_T= np.argsort(NBIscore_T, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n", + "for d in range(nd):\n", + " idx1 = nbi_idx[:,d]\n", + " #idx1 = idx1[::-1]\n", + " idx2 = A[:,d]\n", + " idx3 = idx2[idx1]\n", + " idx4 = np.where(idx3 == 0)[0]\n", + " p_targets_idx = idx1[idx4[-n:]]\n", + " p_targets_idx = p_targets_idx[::-1]\n", + " p_targets = NBIscore[p_targets_idx,d]\n", + " if p_targets[0] == 0.0:\n", + " continue\n", + " else:\n", + " p_diff = np.diff(p_targets)\n", + " th = p_targets[0]*0.20\n", + " th_f = p_targets[0]-th\n", + " f_idx = p_targets_idx[p_targets > th_f]\n", + " f_scores = p_targets[p_targets > th_f]\n", + " f_targets = uni_targetid[f_idx]\n", + " #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n", + " for i,t in enumerate(f_targets):\n", + " wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n", + "\n", + "wp.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- 2.0.0