Vasiliki commited on
Commit
3a25d7a
1 Parent(s): 9688da2

first_implementation_of_RNN

Browse files
Files changed (1) hide show
  1. matching_chains_classificationRNNS.py +602 -0
matching_chains_classificationRNNS.py ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import statistics
4
+ import pandas as pd
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ from sklearn.metrics import classification_report, confusion_matrix, \
7
+ accuracy_score, roc_auc_score, roc_curve, f1_score, recall_score, precision_score
8
+ import matplotlib.pyplot as plt
9
+ import copy
10
+ from sklearn import preprocessing, tree
11
+ from sklearn.linear_model import LogisticRegression, LinearRegression
12
+ from sklearn.tree import DecisionTreeClassifier
13
+ from scipy.spatial import distance
14
+ from sklearn.naive_bayes import GaussianNB
15
+ import itertools
16
+ import os
17
+ from sklearn.model_selection import train_test_split
18
+ import matplotlib.pyplot as plt
19
+ import random
20
+ from sklearn.utils import shuffle
21
+ from imblearn.under_sampling import NearMiss,TomekLinks
22
+ from imblearn.over_sampling import SMOTE
23
+ from collections import Counter
24
+ from imblearn.combine import SMOTETomek, SMOTEENN
25
+ from sklearn.model_selection import StratifiedKFold
26
+ from imblearn.pipeline import make_pipeline
27
+
28
+ from matplotlib import pyplot
29
+ from scipy import interp
30
+ from sklearn.metrics import roc_curve,auc
31
+
32
+ #keras
33
+ from keras.models import Sequential
34
+ from keras.layers import Dense, SimpleRNN, LSTM
35
+
36
+ # Read ComE node embs per timestep [id, emb]
37
+
38
+ folder = os.listdir('ComE_per_timestep/embs')
39
+ path = 'ComE_per_timestep/embs'
40
+ ComE_id_embs = []
41
+ for file in folder:
42
+ ComE_id_embs.append(np.genfromtxt(os.path.join(path, file), dtype=None).tolist())
43
+
44
+ # Read ComE labels per timestep
45
+
46
+ folder = os.listdir('ComE_per_timestep/labels_pred')
47
+ path = 'ComE_per_timestep/labels_pred'
48
+ ComE_lbls = []
49
+ for file in folder:
50
+ ComE_lbls.append(np.genfromtxt(os.path.join(path, file), dtype=None).tolist())
51
+
52
+ # Node ids per timestep
53
+
54
+ node_ids = []
55
+ for step in ComE_id_embs:
56
+ tmp = [id_emb[0] for id_emb in step]
57
+ node_ids.append(tmp)
58
+
59
+ # [Node_id, clr] per timestep
60
+
61
+ id_clr = []
62
+ for i in range(len(node_ids)):
63
+ tmp = {}
64
+ for ind,node in enumerate(node_ids[i]):
65
+ tmp[node] = ComE_lbls[i][ind]
66
+ id_clr.append(tmp)
67
+
68
+ # Clustered nodes per timestep
69
+ clustered_nodes_init = []
70
+ for ind,i in enumerate(id_clr):
71
+ clrids_uniq = set(i.values())
72
+ d = {}
73
+ for clrid in clrids_uniq:
74
+ d[clrid] = [k for k in i.keys() if i[k] == clrid]
75
+ clustered_nodes_init.append(d)
76
+
77
+ clustered_nodes = []
78
+ for s in clustered_nodes_init:
79
+ per_step = []
80
+ for k,v in sorted(s.items()):
81
+ per_step.append(v)
82
+ clustered_nodes.append(per_step)
83
+
84
+ # ------------------------------ READ FEATURES -------------------------------
85
+
86
+ # ComE FEATURES
87
+
88
+ folder = os.listdir('ComE_features_per_timestep/')
89
+ path = 'ComE_features_per_timestep/'
90
+ id_ComE_feats_clr = []
91
+ id_ComE_feats_out = []
92
+ id_ComE_feats_gbl = []
93
+ id_ComE_feats_clrout = []
94
+ id_ComE_feats_clrgbl = []
95
+ id_ComE_feats_all = []
96
+ for file in folder:
97
+ df_ComE = pd.read_csv(os.path.join(path,file), names=['node_id', \
98
+ 'distin_med_eucl', 'distin_med_cos', 'distin_med_l1',\
99
+ 'distout_med_eucl', 'distout_med_cos', 'distout_med_l1',\
100
+ 'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg',\
101
+ 'distin_cos_max', 'distin_cos_min', 'distin_cos_avg',\
102
+ 'distin_l1_max', 'distin_l1_min', 'distin_l1_avg',\
103
+ 'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg',\
104
+ 'distout_cos_max', 'distout_cos_min', 'distout_cos_avg',\
105
+ 'distout_l1_max', 'distout_l1_min', 'distout_l1_avg', \
106
+ 'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl', \
107
+ 'dist_glob_max_cos', 'dist_glob_min_cos', 'dist_glob_avg_cos', \
108
+ 'dist_glob_max_l1', 'dist_glob_min_l1', 'dist_glob_avg_l1'], skiprows=1)
109
+ df_ComE_clr = df_ComE[['node_id', 'distin_med_eucl', \
110
+ 'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg']]
111
+ df_ComE_out = df_ComE[['node_id', 'distout_med_eucl', \
112
+ 'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg']]
113
+ df_ComE_gbl = df_ComE[['node_id', 'distout_med_eucl', \
114
+ 'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl']]
115
+ df_ComE_clrout = df_ComE[['node_id', 'distin_med_eucl', 'distout_med_eucl', \
116
+ 'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg', \
117
+ 'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg']]
118
+ df_ComE_clrgbl = df_ComE[['node_id', 'distin_med_eucl', \
119
+ 'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg', \
120
+ 'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl']]
121
+ df_ComE_all = df_ComE[['node_id', 'distin_med_eucl', 'distout_med_eucl', \
122
+ 'distin_eucl_max', 'distin_eucl_min', 'distin_eucl_avg', \
123
+ 'distout_eucl_max', 'distout_eucl_min', 'distout_eucl_avg', \
124
+ 'dist_glob_max_eucl', 'dist_glob_min_eucl', 'dist_glob_avg_eucl']]
125
+ df_ComE_clr_lst = df_ComE_clr.values.tolist()
126
+ df_ComE_out_lst = df_ComE_out.values.tolist()
127
+ df_ComE_gbl_lst = df_ComE_gbl.values.tolist()
128
+ df_ComE_clrout_lst = df_ComE_clrout.values.tolist()
129
+ df_ComE_clrgbl_lst = df_ComE_clrgbl.values.tolist()
130
+ df_ComE_all_lst = df_ComE_all.values.tolist()
131
+ id_ComE_feats_clr.append(df_ComE_clr_lst)
132
+ id_ComE_feats_out.append(df_ComE_out_lst)
133
+ id_ComE_feats_gbl.append(df_ComE_gbl_lst)
134
+ id_ComE_feats_clrout.append(df_ComE_clrout_lst)
135
+ id_ComE_feats_clrgbl.append(df_ComE_clrgbl_lst)
136
+ id_ComE_feats_all.append(df_ComE_all_lst)
137
+ #sort by node id
138
+ for i in id_ComE_feats_clr:
139
+ i.sort()
140
+ for i in id_ComE_feats_out:
141
+ i.sort()
142
+ for i in id_ComE_feats_gbl:
143
+ i.sort()
144
+ for i in id_ComE_feats_clrout:
145
+ i.sort()
146
+ for i in id_ComE_feats_clrgbl:
147
+ i.sort()
148
+ for i in id_ComE_feats_all:
149
+ i.sort()
150
+
151
+ # Classic FEATURES
152
+
153
+ folder = os.listdir('classic_features_per_timestep/classic_features')
154
+ path = 'classic_features_per_timestep/classic_features'
155
+ id_classic_clr = []
156
+ id_classic_gbl = []
157
+ id_classic_all = []
158
+ id_classic_nodeg = []
159
+ for file in folder:
160
+ df_classic = pd.read_csv(os.path.join(path,file), names=['node_id', \
161
+ 'degree', 'betweenness', 'closeness', 'eigenvector', \
162
+ 'degree_ntwk', 'betweenness_ntwk', 'closeness_ntwk', 'eigenvector_ntwk'], \
163
+ skiprows=1)
164
+ df_classic_clr = df_classic[['node_id', \
165
+ 'degree', 'betweenness', 'closeness', 'eigenvector']]
166
+ df_classic_gbl = df_classic[['node_id', \
167
+ 'degree_ntwk', 'betweenness_ntwk', 'closeness_ntwk', 'eigenvector_ntwk']]
168
+ df_classic_nodeg = pd.read_csv(os.path.join(path,file), names=['node_id', \
169
+ 'betweenness', 'closeness', 'eigenvector', \
170
+ 'betweenness_ntwk', 'closeness_ntwk', 'eigenvector_ntwk'], \
171
+ skiprows=1)
172
+ df_classic_all_lst = df_classic.values.tolist()
173
+ df_classic_clr_lst = df_classic_clr.values.tolist()
174
+ id_classic_gbl_lst = df_classic_gbl.values.tolist()
175
+ id_classic_nodeg_lst = df_classic_nodeg.values.tolist()
176
+ id_classic_all.append(df_classic_all_lst)
177
+ id_classic_clr.append(df_classic_clr_lst)
178
+ id_classic_gbl.append(id_classic_gbl_lst)
179
+ id_classic_nodeg.append(id_classic_nodeg_lst)
180
+ #sort by node id
181
+ for i in id_classic_all:
182
+ i.sort()
183
+ for i in id_classic_clr:
184
+ i.sort()
185
+ for i in id_classic_gbl:
186
+ i.sort()
187
+ for i in id_classic_nodeg:
188
+ i.sort()
189
+
190
+ id_combo_ComE_clrout_classic_all = []
191
+ for ind,s in enumerate(id_ComE_feats_clrout):
192
+ temp = []
193
+ for inx,row in enumerate(s):
194
+ tmp = row[:]
195
+ tmp.extend(id_classic_all[ind][inx][1:])
196
+ temp.append(tmp)
197
+ id_combo_ComE_clrout_classic_all.append(temp)
198
+
199
+ #-------------------------------- MATCHING ------------------------------------
200
+
201
+ # [clr_x_tn, clr_y_tn+1, common_nodes_tn_tn+1]
202
+ #print(clustered_nodes[0])
203
+ matching = []
204
+ a = 0
205
+ while a<len(clustered_nodes)-1:
206
+ matching_two = []
207
+ for indcurr,clrcurr in enumerate(clustered_nodes[a]):
208
+ tmp = []
209
+ for indnxt,clrnxt in enumerate(clustered_nodes[a+1]):
210
+ num_of_common = len(list(set(clrcurr)&set(clrnxt)))
211
+ tmp.append([indcurr,indnxt,num_of_common])
212
+ tmp_max = max(item[-1] for item in tmp)
213
+ for t in tmp:
214
+ if t[-1] == tmp_max:
215
+ maxtmp = t
216
+ matching_two.append(maxtmp)
217
+ matching.append(matching_two)
218
+ a += 1
219
+ #print(matching,a)
220
+
221
+
222
+
223
+
224
+ #--------------------------------- CHAINS -------------------------------------
225
+ #************ SCD #Stay-Change-Drop
226
+ # 2-chain
227
+ def twoChain_scd(features):
228
+ two_chain_scd = []
229
+ for ind,step in enumerate(matching[:-1]):
230
+ per_step = []
231
+ for inx,clr in enumerate(step):
232
+ for nodeid in clustered_nodes[ind][clr[0]]:
233
+ tmp = [nodeid]
234
+ for idfeatures in features[ind]:
235
+ if nodeid == idfeatures[0]:
236
+ tmp.extend(idfeatures[1:])
237
+ if nodeid in clustered_nodes[ind+1][clr[1]]:
238
+ tmp.append(0)#stay
239
+ #for idfeatures in features[ind+1]: remove second set of features
240
+ #if nodeid == idfeatures[0]:
241
+ #tmp.extend(idfeatures[1:])
242
+ '''for cl in matching[ind+1]:
243
+ if nodeid in clustered_nodes[ind+1][cl[0]]:
244
+ if nodeid in clustered_nodes[ind+2][cl[1]]:
245
+ tmp.append(0)#stay
246
+ break
247
+ elif nodeid in node_ids[ind+2]:
248
+ tmp.append(1)#move
249
+ break
250
+ else:
251
+ tmp.append(2)#drop
252
+ break'''
253
+ elif nodeid in node_ids[ind+1]:
254
+ tmp.append(1)#move
255
+ #for idfeatures in features[ind+1]:
256
+ #if nodeid == idfeatures[0]:
257
+ #tmp.extend(idfeatures[1:])
258
+ #for cl in matching[ind+1]:
259
+ #if nodeid in clustered_nodes[ind+1][cl[0]]:
260
+ #if nodeid in clustered_nodes[ind+2][cl[1]]:
261
+ #tmp.append(0)#stay
262
+ # break
263
+ #elif nodeid in node_ids[ind+2]:
264
+ # tmp.append(1)#move
265
+ # break
266
+ # else:
267
+ # tmp.append(2)#drop
268
+ # break
269
+ else:
270
+ # tmp.extend([-1]*(len(features[0][0][1:])+1)) remove extend vasia
271
+ tmp.append(2)#drop
272
+ per_step.append(tmp)
273
+ two_chain_scd.append(per_step)
274
+ return(two_chain_scd)
275
+
276
+ def chains_scd(prev_chain_scd, features, a):
277
+ curr_chain_scd = copy.deepcopy(prev_chain_scd[:-1])
278
+ for ind,step in enumerate(curr_chain_scd):
279
+ for row in step:
280
+ if row[-1] == 0 or row[-1] == 1:
281
+ for idfeatures in features[ind+2+a]:
282
+ if row[0] == idfeatures[0]:
283
+ row.extend(idfeatures[1:])
284
+ for cl in matching[ind+2+a]:
285
+ if row[0] in clustered_nodes[ind+2+a][cl[0]]:
286
+ if row[0] in clustered_nodes[ind+3+a][cl[1]]:
287
+ row.append(0)#stay
288
+ break
289
+ elif row[0] in node_ids[ind+3+a]:
290
+ row.append(1)#move
291
+ break
292
+ else:
293
+ row.append(2)#drop
294
+ break
295
+ else:
296
+ row[-1:-1] = [-1]*(len(features[0][0][1:])+1)#add -1*(#feats + ev)
297
+ return(curr_chain_scd)
298
+
299
+ # ----------------------------------------------------------------------------
300
+
301
+ #************ SL
302
+ def chains_sl(chainsSCD): #Stay-Leave
303
+ chainsSL = copy.deepcopy(chainsSCD)
304
+ for row in chainsSL:
305
+ if row[-1] == 2:
306
+ row[-1] = 1
307
+ return(chainsSL)
308
+
309
+ # ----------------------------------------------------------------------------
310
+
311
+ #************ SC #Stay-Change
312
+ def chains_sc(chainsSCD):
313
+ chainsSC = []
314
+ for row in chainsSCD:
315
+ if row[-1] != 2:
316
+ chainsSC.append(row)
317
+ return(chainsSC)
318
+
319
+ # ----------------------------------------------------------------------------
320
+
321
+ def per_chain_all_chains_scd(feats):
322
+ two_chain_scd = twoChain_scd(feats)
323
+ three_chain_scd = chains_scd(two_chain_scd, feats, 0)
324
+ four_chain_scd = chains_scd(three_chain_scd, feats, 1)
325
+ five_chain_scd = chains_scd(four_chain_scd, feats, 2)
326
+ six_chain_scd = chains_scd(five_chain_scd, feats, 3)
327
+ seven_chain_scd = chains_scd(six_chain_scd, feats, 4)
328
+ eight_chain_scd = chains_scd(seven_chain_scd, feats, 5)
329
+ nine_chain_scd = chains_scd(eight_chain_scd, feats, 6)
330
+ two_chain_scd = [row for s in two_chain_scd for row in s]#flat
331
+ three_chain_scd = [row for s in three_chain_scd for row in s]#flat
332
+ four_chain_scd = [row for s in four_chain_scd for row in s]#flat
333
+ five_chain_scd = [row for s in five_chain_scd for row in s]#flat
334
+ six_chain_scd = [row for s in six_chain_scd for row in s]#flat
335
+ seven_chain_scd = [row for s in seven_chain_scd for row in s]#flat
336
+ eight_chain_scd = [row for s in eight_chain_scd for row in s]#flat
337
+ nine_chain_scd = [row for s in nine_chain_scd for row in s]#flat
338
+ # merge chains
339
+ all_chains_scd = []
340
+ all_chains_scd.append(two_chain_scd)
341
+ all_chains_scd.append(three_chain_scd)
342
+ all_chains_scd.append(four_chain_scd)
343
+ all_chains_scd.append(five_chain_scd)
344
+ all_chains_scd.append(six_chain_scd)
345
+ all_chains_scd.append(seven_chain_scd)
346
+ all_chains_scd.append(eight_chain_scd)
347
+ all_chains_scd.append(nine_chain_scd)
348
+ all_chains_scd = [row for chain in all_chains_scd for row in chain]
349
+ return(two_chain_scd, three_chain_scd, four_chain_scd, five_chain_scd, \
350
+ six_chain_scd, seven_chain_scd, eight_chain_scd, nine_chain_scd, \
351
+ all_chains_scd)
352
+
353
+ # ----------------------------------------------------------------------------
354
+
355
+ # CHAINS ----------------------
356
+ # ComE
357
+
358
+ # clr
359
+ two_chain_ComE_clr_scd, three_chain_ComE_clr_scd, four_chain_ComE_clr_scd, \
360
+ five_chain_ComE_clr_scd, six_chain_ComE_clr_scd, seven_chain_ComE_clr_scd, \
361
+ eight_chain_ComE_clr_scd, nine_chain_ComE_clr_scd, \
362
+ chains_ComE_clr_scd = per_chain_all_chains_scd(id_ComE_feats_clr)
363
+ # per chain
364
+ two_chain_ComE_clr_sl = chains_sl(two_chain_ComE_clr_scd)
365
+ two_chain_ComE_clr_sc = chains_sc(two_chain_ComE_clr_scd)
366
+ three_chain_ComE_clr_sl = chains_sl(three_chain_ComE_clr_scd)
367
+ three_chain_ComE_clr_sc = chains_sc(three_chain_ComE_clr_scd)
368
+ four_chain_ComE_clr_sl = chains_sl(four_chain_ComE_clr_scd)
369
+ four_chain_ComE_clr_sc = chains_sc(four_chain_ComE_clr_scd)
370
+ five_chain_ComE_clr_sl = chains_sl(five_chain_ComE_clr_scd)
371
+ five_chain_ComE_clr_sc = chains_sc(five_chain_ComE_clr_scd)
372
+ six_chain_ComE_clr_sl = chains_sl(six_chain_ComE_clr_scd)
373
+ six_chain_ComE_clr_sc = chains_sc(six_chain_ComE_clr_scd)
374
+ seven_chain_ComE_clr_sl = chains_sl(seven_chain_ComE_clr_scd)
375
+ seven_chain_ComE_clr_sc = chains_sc(seven_chain_ComE_clr_scd)
376
+ eight_chain_ComE_clr_sl = chains_sl(eight_chain_ComE_clr_scd)
377
+ eight_chain_ComE_clr_sc = chains_sc(eight_chain_ComE_clr_scd)
378
+ nine_chain_ComE_clr_sl = chains_sl(nine_chain_ComE_clr_scd)
379
+ nine_chain_ComE_clr_sc = chains_sc(nine_chain_ComE_clr_scd)
380
+ # SL
381
+ chains_ComE_clr_sl = chains_sl(chains_ComE_clr_scd)
382
+ # SC
383
+ chains_ComE_clr_sc = chains_sc(chains_ComE_clr_scd)
384
+
385
+ # out
386
+ two_chain_ComE_out_scd, three_chain_ComE_out_scd, four_chain_ComE_out_scd, \
387
+ five_chain_ComE_out_scd, six_chain_ComE_out_scd, seven_chain_ComE_out_scd, \
388
+ eight_chain_ComE_out_scd, nine_chain_ComE_out_scd, \
389
+ chains_ComE_out_scd = per_chain_all_chains_scd(id_ComE_feats_out)
390
+ # per chain
391
+ two_chain_ComE_out_sl = chains_sl(two_chain_ComE_out_scd)
392
+ two_chain_ComE_out_sc = chains_sc(two_chain_ComE_out_scd)
393
+ three_chain_ComE_out_sl = chains_sl(three_chain_ComE_out_scd)
394
+ three_chain_ComE_out_sc = chains_sc(three_chain_ComE_out_scd)
395
+ four_chain_ComE_out_sl = chains_sl(four_chain_ComE_out_scd)
396
+ four_chain_ComE_out_sc = chains_sc(four_chain_ComE_out_scd)
397
+ five_chain_ComE_out_sl = chains_sl(five_chain_ComE_out_scd)
398
+ five_chain_ComE_out_sc = chains_sc(five_chain_ComE_out_scd)
399
+ six_chain_ComE_out_sl = chains_sl(six_chain_ComE_out_scd)
400
+ six_chain_ComE_out_sc = chains_sc(six_chain_ComE_out_scd)
401
+ seven_chain_ComE_out_sl = chains_sl(seven_chain_ComE_out_scd)
402
+ seven_chain_ComE_out_sc = chains_sc(seven_chain_ComE_out_scd)
403
+ eight_chain_ComE_out_sl = chains_sl(eight_chain_ComE_out_scd)
404
+ eight_chain_ComE_out_sc = chains_sc(eight_chain_ComE_out_scd)
405
+ nine_chain_ComE_out_sl = chains_sl(nine_chain_ComE_out_scd)
406
+ nine_chain_ComE_out_sc = chains_sc(nine_chain_ComE_out_scd)
407
+ # SL
408
+ chains_ComE_out_sl = chains_sl(chains_ComE_out_scd)
409
+ # SC
410
+ chains_ComE_out_sc = chains_sc(chains_ComE_out_scd)
411
+
412
+ # clrout
413
+ two_chain_ComE_clrout_scd, three_chain_ComE_clrout_scd, four_chain_ComE_clrout_scd, \
414
+ five_chain_ComE_clrout_scd, six_chain_ComE_clrout_scd, seven_chain_ComE_clrout_scd, \
415
+ eight_chain_ComE_clrout_scd, nine_chain_ComE_clrout_scd, \
416
+ chains_ComE_clrout_scd = per_chain_all_chains_scd(id_ComE_feats_clrout)
417
+ # per chain
418
+ two_chain_ComE_clrout_sl = chains_sl(two_chain_ComE_clrout_scd)
419
+ two_chain_ComE_clrout_sc = chains_sc(two_chain_ComE_clrout_scd)
420
+ three_chain_ComE_clrout_sl = chains_sl(three_chain_ComE_clrout_scd)
421
+ three_chain_ComE_clrout_sc = chains_sc(three_chain_ComE_clrout_scd)
422
+ four_chain_ComE_clrout_sl = chains_sl(four_chain_ComE_clrout_scd)
423
+ four_chain_ComE_clrout_sc = chains_sc(four_chain_ComE_clrout_scd)
424
+ five_chain_ComE_clrout_sl = chains_sl(five_chain_ComE_clrout_scd)
425
+ five_chain_ComE_clrout_sc = chains_sc(five_chain_ComE_clrout_scd)
426
+ six_chain_ComE_clrout_sl = chains_sl(six_chain_ComE_clrout_scd)
427
+ six_chain_ComE_clrout_sc = chains_sc(six_chain_ComE_clrout_scd)
428
+ seven_chain_ComE_clrout_sl = chains_sl(seven_chain_ComE_clrout_scd)
429
+ seven_chain_ComE_clrout_sc = chains_sc(seven_chain_ComE_clrout_scd)
430
+ eight_chain_ComE_clrout_sl = chains_sl(eight_chain_ComE_clrout_scd)
431
+ eight_chain_ComE_clrout_sc = chains_sc(eight_chain_ComE_clrout_scd)
432
+ nine_chain_ComE_clrout_sl = chains_sl(nine_chain_ComE_clrout_scd)
433
+ nine_chain_ComE_clrout_sc = chains_sc(nine_chain_ComE_clrout_scd)
434
+ # SL
435
+ chains_ComE_clrout_sl = chains_sl(chains_ComE_clrout_scd)
436
+ # SC
437
+ chains_ComE_clrout_sc = chains_sc(chains_ComE_clrout_scd)
438
+
439
+
440
+
441
+ # ----------------------------------------------------------------------------
442
+
443
+ # Classic
444
+ #clr
445
+ # SCD
446
+ two_chain_classic_clr_scd, three_chain_classic_clr_scd, four_chain_classic_clr_scd, \
447
+ five_chain_classic_clr_scd, six_chain_classic_clr_scd, seven_chain_classic_clr_scd, \
448
+ eight_chain_classic_clr_scd, nine_chain_classic_clr_scd, \
449
+ chains_classic_clr_scd = per_chain_all_chains_scd(id_classic_clr)
450
+ # per chain
451
+ two_chain_classic_clr_sl = chains_sl(two_chain_classic_clr_scd)
452
+ two_chain_classic_clr_sc = chains_sc(two_chain_classic_clr_scd)
453
+ three_chain_classic_clr_sl = chains_sl(three_chain_classic_clr_scd)
454
+ three_chain_classic_clr_sc = chains_sc(three_chain_classic_clr_scd)
455
+ four_chain_classic_clr_sl = chains_sl(four_chain_classic_clr_scd)
456
+ four_chain_classic_clr_sc = chains_sc(four_chain_classic_clr_scd)
457
+ five_chain_classic_clr_sl = chains_sl(five_chain_classic_clr_scd)
458
+ five_chain_classic_clr_sc = chains_sc(five_chain_classic_clr_scd)
459
+ six_chain_classic_clr_sl = chains_sl(six_chain_classic_clr_scd)
460
+ six_chain_classic_clr_sc = chains_sc(six_chain_classic_clr_scd)
461
+ seven_chain_classic_clr_sl = chains_sl(seven_chain_classic_clr_scd)
462
+ seven_chain_classic_clr_sc = chains_sc(seven_chain_classic_clr_scd)
463
+ eight_chain_classic_clr_sl = chains_sl(eight_chain_classic_clr_scd)
464
+ eight_chain_classic_clr_sc = chains_sc(eight_chain_classic_clr_scd)
465
+ nine_chain_classic_clr_sl = chains_sl(nine_chain_classic_clr_scd)
466
+ nine_chain_classic_clr_sc = chains_sc(nine_chain_classic_clr_scd)
467
+
468
+ # SL
469
+ chains_classic_clr_sl = chains_sl(chains_classic_clr_scd)
470
+ # SC
471
+ chains_classic_clr_sc = chains_sc(chains_classic_clr_scd)
472
+
473
+ #gbl
474
+ # SCD
475
+ two_chain_classic_gbl_scd, three_chain_classic_gbl_scd, four_chain_classic_gbl_scd, \
476
+ five_chain_classic_gbl_scd, six_chain_classic_gbl_scd, seven_chain_classic_gbl_scd, \
477
+ eight_chain_classic_gbl_scd, nine_chain_classic_gbl_scd, \
478
+ chains_classic_gbl_scd = per_chain_all_chains_scd(id_classic_gbl)
479
+ # per chain
480
+ two_chain_classic_gbl_sl = chains_sl(two_chain_classic_gbl_scd)
481
+ two_chain_classic_gbl_sc = chains_sc(two_chain_classic_gbl_scd)
482
+ three_chain_classic_gbl_sl = chains_sl(three_chain_classic_gbl_scd)
483
+ three_chain_classic_gbl_sc = chains_sc(three_chain_classic_gbl_scd)
484
+ four_chain_classic_gbl_sl = chains_sl(four_chain_classic_gbl_scd)
485
+ four_chain_classic_gbl_sc = chains_sc(four_chain_classic_gbl_scd)
486
+ five_chain_classic_gbl_sl = chains_sl(five_chain_classic_gbl_scd)
487
+ five_chain_classic_gbl_sc = chains_sc(five_chain_classic_gbl_scd)
488
+ six_chain_classic_gbl_sl = chains_sl(six_chain_classic_gbl_scd)
489
+ six_chain_classic_gbl_sc = chains_sc(six_chain_classic_gbl_scd)
490
+ seven_chain_classic_gbl_sl = chains_sl(seven_chain_classic_gbl_scd)
491
+ seven_chain_classic_gbl_sc = chains_sc(seven_chain_classic_gbl_scd)
492
+ eight_chain_classic_gbl_sl = chains_sl(eight_chain_classic_gbl_scd)
493
+ eight_chain_classic_gbl_sc = chains_sc(eight_chain_classic_gbl_scd)
494
+ nine_chain_classic_gbl_sl = chains_sl(nine_chain_classic_gbl_scd)
495
+ nine_chain_classic_gbl_sc = chains_sc(nine_chain_classic_gbl_scd)
496
+
497
+ # SL
498
+ chains_classic_gbl_sl = chains_sl(chains_classic_gbl_scd)
499
+ # SC
500
+ chains_classic_gbl_sc = chains_sc(chains_classic_gbl_scd)
501
+
502
+ #all
503
+ # SCD
504
+ two_chain_classic_all_scd, three_chain_classic_all_scd, four_chain_classic_all_scd, \
505
+ five_chain_classic_all_scd, six_chain_classic_all_scd, seven_chain_classic_all_scd, \
506
+ eight_chain_classic_all_scd, nine_chain_classic_all_scd, \
507
+ chains_classic_all_scd = per_chain_all_chains_scd(id_classic_all)
508
+ # per chain
509
+ two_chain_classic_all_sl = chains_sl(two_chain_classic_all_scd)
510
+ two_chain_classic_all_sc = chains_sc(two_chain_classic_all_scd)
511
+ three_chain_classic_all_sl = chains_sl(three_chain_classic_all_scd)
512
+ three_chain_classic_all_sc = chains_sc(three_chain_classic_all_scd)
513
+ four_chain_classic_all_sl = chains_sl(four_chain_classic_all_scd)
514
+ four_chain_classic_all_sc = chains_sc(four_chain_classic_all_scd)
515
+ five_chain_classic_all_sl = chains_sl(five_chain_classic_all_scd)
516
+ five_chain_classic_all_sc = chains_sc(five_chain_classic_all_scd)
517
+ six_chain_classic_all_sl = chains_sl(six_chain_classic_all_scd)
518
+ six_chain_classic_all_sc = chains_sc(six_chain_classic_all_scd)
519
+ seven_chain_classic_all_sl = chains_sl(seven_chain_classic_all_scd)
520
+ seven_chain_classic_all_sc = chains_sc(seven_chain_classic_all_scd)
521
+ eight_chain_classic_all_sl = chains_sl(eight_chain_classic_all_scd)
522
+ eight_chain_classic_all_sc = chains_sc(eight_chain_classic_all_scd)
523
+ nine_chain_classic_all_sl = chains_sl(nine_chain_classic_all_scd)
524
+ nine_chain_classic_all_sc = chains_sc(nine_chain_classic_all_scd)
525
+
526
+ # SL
527
+ chains_classic_all_sl = chains_sl(chains_classic_all_scd)
528
+ # SC
529
+ chains_classic_all_sc = chains_sc(chains_classic_all_scd)
530
+
531
+
532
+ #RNN model
533
+ def create_RNN(hidden_units, dense_units, input_shape, activation):
534
+ model=Sequential()
535
+ model.add(LSTM(hidden_units,input_shape=input_shape))
536
+ model.add(Dense(units=dense_units,activation=activation))
537
+ model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
538
+ model.summary()
539
+ return model
540
+
541
+ # stratified kfold
542
+ def Classification(chain):
543
+ #print("chain1 = ",len(chain),chain[0])
544
+
545
+ chain = shuffle(np.array(chain))
546
+ print("chain2 = ",chain.shape)
547
+ X = [i[1:-1] for i in chain.tolist()]
548
+ Y = [i[-1] for i in chain.tolist()]
549
+
550
+ # padding
551
+ longest = len(max(X,key=len))
552
+ print(longest)
553
+
554
+ for row in X:
555
+ while len(row)<longest:
556
+ row.append(-99)
557
+ #######
558
+ X = np.array(X)
559
+ Y = np.array(Y)
560
+ print('Y_dataset:', Counter(Y))
561
+ skf = StratifiedKFold(n_splits=5)
562
+ fold = 0
563
+ k=0
564
+ cvscores = []
565
+ for train_index, test_index in skf.split(X, Y):
566
+ X_train, X_test = X[train_index], X[test_index]
567
+
568
+ y_train, y_test = Y[train_index], Y[test_index]
569
+ y_train_cnt = pd.DataFrame([Counter(y_train)]).transpose()
570
+ print(y_train_cnt)
571
+ y_train_cnt.sort_index(inplace=True)
572
+ print('y_train:', y_train_cnt)
573
+ # print("X",X_train.shape,X_train[0])
574
+
575
+ X_train_3d = X_train.reshape((X_train.shape[0], 4, 1))
576
+ X_test_3d = X_test.reshape((X_test.shape[0], 4, 1))
577
+ # print(X_test_3d)
578
+
579
+ rnn_model = create_RNN(100,2,input_shape=(4,1),activation='sigmoid')
580
+
581
+ rnn_model.fit(X_train_3d, y_train, epochs=50, batch_size=5,verbose=2)
582
+ #test_pred = rnn_model.predict(X_test_3d)
583
+
584
+ scores = rnn_model.evaluate(X_test_3d, y_test, verbose=0)
585
+ print("score",scores)
586
+ exit()
587
+ print("%s: %.2f%%" % (rnn_model.metrics_names[1], scores[1]*100))
588
+ cvscores.append(scores[1] * 100)
589
+ print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
590
+
591
+
592
+
593
+
594
+ # Classic features
595
+ # clr - 1 chain
596
+ with open('results_details.csv','a') as fd:
597
+ fd.write('two_chain_classic_clr_sl'+'\n')
598
+ with open('results.csv','a') as fd:
599
+ fd.write('two_chain_classic_clr_sl'+'\n')
600
+ Classification(two_chain_classic_clr_sl)
601
+
602
+