OpenLAB / RepTB

Browse Code »

Commit d1208fbb7c7f8802231af440548e8ce61bbac0bf

Authored by anurag 2018-04-13 15:54:09 +0530

1 parent 70ad245597

Exists in master

Updated README and Python Code

Showing 1 changed file with 217 additions and 0 deletions Show diff stats

nbi_simulation_new_for_git.ipynb

Diff comments View file @d1208fb

File was created	1	{
	2	"cells": [
	3	{
	4	"cell_type": "code",
	5	"execution_count": 1,
	6	"metadata": {
	7	"collapsed": false
	8	},
	9	"outputs": [],
	10	"source": [
	11	"import numpy as np\n",
	12	"import csv\n",
	13	"import numpy.matlib\n",
	14	"from operator import itemgetter, attrgetter\n",
	15	"from sklearn.model_selection import KFold\n",
	16	"from sklearn.metrics import roc_curve, auc\n",
	17	"import matplotlib.pyplot as plt"
	18	]
	19	},
	20	{
	21	"cell_type": "code",
	22	"execution_count": 2,
	23	"metadata": {
	24	"collapsed": false
	25	},
	26	"outputs": [
	27	{
	28	"name": "stdout",
	29	"output_type": "stream",
	30	"text": [
	31	"6319 3772 (3772, 6319) 26093.0 (6319, 3772)\n"
	32	]
	33	}
	34	],
	35	"source": [
	36	"#fp = open('dt_new_and_fda_unique.csv','r')\n",
	37	"fp = open('new_dt_from_go_and_db_unique_latest.csv','r')\n",
	38	"drugid = []\n",
	39	"targetid = []\n",
	40	"\n",
	41	"##1. Reading edge list line by line##\n",
	42	"for line in fp:\n",
	43	" line = line.strip()\n",
	44	" tmp = line.split(',')\n",
	45	" drugid.append(tmp[0])\n",
	46	" targetid.append(tmp[1])\n",
	47	"fp.close()\n",
	48	"##End 1##\n",
	49	"\n",
	50	"drug = np.array(drugid)\n",
	51	"target =np.array(targetid)\n",
	52	"\n",
	53	"uni_drugid = np.unique(np.array(drugid))\n",
	54	"uni_targetid = np.unique(np.array(targetid))\n",
	55	"\n",
	56	"##creating zero incidence matrix for the graph##\n",
	57	"\n",
	58	"A = np.zeros((uni_targetid.shape[0], uni_drugid.shape[0]))\n",
	59	"\n",
	60	"for i in range(len(drugid)):\n",
	61	" idx1 = np.where(uni_targetid==targetid[i])\n",
	62	" idx2 = np.where(uni_drugid==drugid[i])\n",
	63	" A[idx1,idx2] = 1\n",
	64	"\n",
	65	"nd = uni_drugid.shape[0]\n",
	66	"mt = uni_targetid.shape[0]\n",
	67	"\n",
	68	"A_T = np.transpose(A)\n",
	69	"no_edges = np.sum(A)\n",
	70	"print nd, mt, A.shape, np.sum(A), A_T.shape\n"
	71	]
	72	},
	73	{
	74	"cell_type": "code",
	75	"execution_count": 3,
	76	"metadata": {
	77	"collapsed": false
	78	},
	79	"outputs": [
	80	{
	81	"name": "stdout",
	82	"output_type": "stream",
	83	"text": [
	84	"(3772, 3772)\n",
	85	"(3772, 6319)\n"
	86	]
	87	}
	88	],
	89	"source": [
	90	"#NBI calculation for A\n",
	91	"\n",
	92	"Ky = np.diag((1/sum(A))) \n",
	93	"n = A.shape[0]\n",
	94	"m = A.shape[1]\n",
	95	"#print n, m, Ky.shape\n",
	96	"Ky[np.isinf(Ky) \| np.isnan(Ky)] = 0\n",
	97	"kx = np.transpose(np.sum(A,1))\n",
	98	"#print kx.shape\n",
	99	"Nx = np.matlib.repmat(1/kx,n,1)\n",
	100	"Nx[np.isinf(Nx) \| np.isnan(Nx)] = 0\n",
	101	"#kx[np.isinf(kx) \| np.isnan(kx)] = 0\n",
	102	"W = np.transpose(np.dot(A, Ky))\n",
	103	"W1 = np.dot(A, W)\n",
	104	"W2 = np.multiply(Nx, W1)\n",
	105	"print W2.shape\n",
	106	"NBIscore = np.dot(W2, A)\n",
	107	"print NBIscore.shape"
	108	]
	109	},
	110	{
	111	"cell_type": "code",
	112	"execution_count": 4,
	113	"metadata": {
	114	"collapsed": true
	115	},
	116	"outputs": [],
	117	"source": [
	118	"#NBI calculation for A_T\n",
	119	"\n",
	120	"Ky = np.diag((1/sum(A_T)))\n",
	121	"n = A_T.shape[0]\n",
	122	"m = A_T.shape[1]\n",
	123	"#print n, m, Ky.shape\n",
	124	"Ky[np.isinf(Ky) \| np.isnan(Ky)] = 0\n",
	125	"kx = np.transpose(np.sum(A_T,1))\n",
	126	"#print kx.shape\n",
	127	"Nx = np.matlib.repmat(1/kx,n,1)\n",
	128	"Nx[np.isinf(Nx) \| np.isnan(Nx)] = 0\n",
	129	"#kx[np.isinf(kx) \| np.isnan(kx)] = 0\n",
	130	"W = np.transpose(np.dot(A_T, Ky))\n",
	131	"W1 = np.dot(A_T, W)\n",
	132	"W2 = np.multiply(Nx, W1)\n",
	133	"NBIscore_T = np.dot(W2, A_T)"
	134	]
	135	},
	136	{
	137	"cell_type": "code",
	138	"execution_count": 5,
	139	"metadata": {
	140	"collapsed": false
	141	},
	142	"outputs": [],
	143	"source": [
	144	"##Normalizing NBI scores\n",
	145	"NBIscore = np.true_divide(NBIscore, np.max(NBIscore, axis=0))\n",
	146	"NBIscore_T = np.true_divide(NBIscore_T, np.max(NBIscore_T, axis=0))"
	147	]
	148	},
	149	{
	150	"cell_type": "code",
	151	"execution_count": 7,
	152	"metadata": {
	153	"collapsed": false
	154	},
	155	"outputs": [],
	156	"source": [
	157	"nbi_idx = np.argsort(NBIscore, axis=0)\n",
	158	"nbi_idx_T= np.argsort(NBIscore_T, axis=0)"
	159	]
	160	},
	161	{
	162	"cell_type": "code",
	163	"execution_count": 8,
	164	"metadata": {
	165	"collapsed": false
	166	},
	167	"outputs": [],
	168	"source": [
	169	"wp = open('predicted_targets_for_all_drugs_using_percent_diff_0.20_.csv','w')\n",
	170	"for d in range(nd):\n",
	171	" idx1 = nbi_idx[:,d]\n",
	172	" #idx1 = idx1[::-1]\n",
	173	" idx2 = A[:,d]\n",
	174	" idx3 = idx2[idx1]\n",
	175	" idx4 = np.where(idx3 == 0)[0]\n",
	176	" p_targets_idx = idx1[idx4[-n:]]\n",
	177	" p_targets_idx = p_targets_idx[::-1]\n",
	178	" p_targets = NBIscore[p_targets_idx,d]\n",
	179	" if p_targets[0] == 0.0:\n",
	180	" continue\n",
	181	" else:\n",
	182	" p_diff = np.diff(p_targets)\n",
	183	" th = p_targets[0]*0.20\n",
	184	" th_f = p_targets[0]-th\n",
	185	" f_idx = p_targets_idx[p_targets > th_f]\n",
	186	" f_scores = p_targets[p_targets > th_f]\n",
	187	" f_targets = uni_targetid[f_idx]\n",
	188	" #print p_targets[0], th, th_f, p_targets[p_targets > th_f], p_targets\n",
	189	" for i,t in enumerate(f_targets):\n",
	190	" wp.write(uni_drugid[d] + ',' + t + ',' + str(f_scores[i]) + '\\n')\n",
	191	"\n",
	192	"wp.close()"
	193	]
	194	}
	195	],
	196	"metadata": {
	197	"kernelspec": {
	198	"display_name": "Python 2",
	199	"language": "python",
	200	"name": "python2"
	201	},
	202	"language_info": {
	203	"codemirror_mode": {
	204	"name": "ipython",
	205	"version": 2
	206	},
	207	"file_extension": ".py",
	208	"mimetype": "text/x-python",
	209	"name": "python",
	210	"nbconvert_exporter": "python",
	211	"pygments_lexer": "ipython2",
	212	"version": "2.7.6"
	213	}
	214	},
	215	"nbformat": 4,
	216	"nbformat_minor": 2
	217	}
	218