nbi_simulation_new_for_git.ipynb 5.52 KB
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import csv\n",
    "import numpy.matlib\n",
    "from operator import itemgetter, attrgetter\n",
    "from sklearn.model_selection import KFold\n",
    "from sklearn.metrics import roc_curve, auc\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n"
     ]
    }
   ],
   "source": [
    "#fp = open('dt_new_and_fda_unique.csv','r')\n",
    "fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n",
    "drugid = []\n",
    "targetid = []\n",
    "\n",
    "##1. Reading edge list line by line##\n",
    "for line in fp:\n",
    "        line = line.strip()\n",
    "        tmp = line.split(',')\n",
    "        drugid.append(tmp[0])\n",
    "        targetid.append(tmp[1])\n",
    "fp.close()\n",
    "##End 1##\n",
    "\n",
    "drug = np.array(drugid)\n",
    "target =np.array(targetid)\n",
    "\n",
    "uni_drugid = np.unique(np.array(drugid))\n",
    "uni_targetid = np.unique(np.array(targetid))\n",
    "\n",
    "##creating zero incidence matrix for the graph##\n",
    "\n",
    "A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n",
    "\n",
    "for i in range(len(drugid)):\n",
    "                idx1 = np.where(uni_targetid==targetid[i])\n",
    "                idx2 = np.where(uni_drugid==drugid[i])\n",
    "                A[idx1,idx2] = 1\n",
    "\n",
    "nd = uni_drugid.shape[0]\n",
    "mt = uni_targetid.shape[0]\n",
    "\n",
    "A_T = np.transpose(A)\n",
    "no_edges = np.sum(A)\n",
    "print nd, mt, A.shape, np.sum(A), A_T.shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3772, 3772)\n",
      "(3772, 6319)\n"
     ]
    }
   ],
   "source": [
    "#NBI calculation for A\n",
    "\n",
    "Ky = np.diag((1/sum(A))) \n",
    "n = A.shape[0]\n",
    "m = A.shape[1]\n",
    "#print n, m, Ky.shape\n",
    "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n",
    "kx = np.transpose(np.sum(A,1))\n",
    "#print kx.shape\n",
    "Nx = np.matlib.repmat(1/kx,n,1)\n",
    "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n",
    "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n",
    "W = np.transpose(np.dot(A, Ky))\n",
    "W1 = np.dot(A, W)\n",
    "W2 = np.multiply(Nx, W1)\n",
    "print W2.shape\n",
    "NBIscore = np.dot(W2, A)\n",
    "print NBIscore.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#NBI calculation for A_T\n",
    "\n",
    "Ky = np.diag((1/sum(A_T)))\n",
    "n = A_T.shape[0]\n",
    "m = A_T.shape[1]\n",
    "#print n, m, Ky.shape\n",
    "Ky[np.isinf(Ky) | np.isnan(Ky)] = 0\n",
    "kx = np.transpose(np.sum(A_T,1))\n",
    "#print kx.shape\n",
    "Nx = np.matlib.repmat(1/kx,n,1)\n",
    "Nx[np.isinf(Nx) | np.isnan(Nx)] = 0\n",
    "#kx[np.isinf(kx) | np.isnan(kx)] = 0\n",
    "W = np.transpose(np.dot(A_T, Ky))\n",
    "W1 = np.dot(A_T, W)\n",
    "W2 = np.multiply(Nx, W1)\n",
    "NBIscore_T = np.dot(W2, A_T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "##Normalizing NBI scores\n",
    "NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n",
    "NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "nbi_idx = np.argsort(NBIscore, axis=0)\n",
    "nbi_idx_T= np.argsort(NBIscore_T, axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n",
    "for d in range(nd):\n",
    "    idx1 = nbi_idx[:,d]\n",
    "    #idx1 = idx1[::-1]\n",
    "    idx2 = A[:,d]\n",
    "    idx3 = idx2[idx1]\n",
    "    idx4 = np.where(idx3 == 0)[0]\n",
    "    p_targets_idx = idx1[idx4[-n:]]\n",
    "    p_targets_idx = p_targets_idx[::-1]\n",
    "    p_targets = NBIscore[p_targets_idx,d]\n",
    "    if p_targets[0] == 0.0:\n",
    "        continue\n",
    "    else:\n",
    "        p_diff = np.diff(p_targets)\n",
    "        th = p_targets[0]*0.20\n",
    "        th_f = p_targets[0]-th\n",
    "        f_idx = p_targets_idx[p_targets > th_f]\n",
    "        f_scores = p_targets[p_targets > th_f]\n",
    "        f_targets = uni_targetid[f_idx]\n",
    "        #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n",
    "        for i,t in enumerate(f_targets):\n",
    "            wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n",
    "\n",
    "wp.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}