giulio98 commited on
Commit
01353be
1 Parent(s): 09f0839

Update dataflow_match.py

Browse files
Files changed (1) hide show
  1. dataflow_match.py +1272 -5
dataflow_match.py CHANGED
@@ -1,14 +1,1281 @@
1
  # Copyright (c) Microsoft Corporation.
2
  # Licensed under the MIT license.
3
 
4
- from parser import DFG_python,DFG_java,DFG_ruby,DFG_go,DFG_php,DFG_javascript,DFG_csharp
5
- from parser import (remove_comments_and_docstrings,
6
- tree_to_token_index,
7
- index_to_code_token,
8
- tree_to_variable_index)
9
  from tree_sitter import Language, Parser
10
  import pdb
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  dfg_function={
13
  'python':DFG_python,
14
  'java':DFG_java,
 
1
  # Copyright (c) Microsoft Corporation.
2
  # Licensed under the MIT license.
3
 
4
+
 
 
 
 
5
  from tree_sitter import Language, Parser
6
  import pdb
7
 
8
+ import re
9
+ from io import StringIO
10
+ import tokenize
11
+ def remove_comments_and_docstrings(source,lang):
12
+ if lang in ['python']:
13
+ """
14
+ Returns 'source' minus comments and docstrings.
15
+ """
16
+ io_obj = StringIO(source)
17
+ out = ""
18
+ prev_toktype = tokenize.INDENT
19
+ last_lineno = -1
20
+ last_col = 0
21
+ for tok in tokenize.generate_tokens(io_obj.readline):
22
+ token_type = tok[0]
23
+ token_string = tok[1]
24
+ start_line, start_col = tok[2]
25
+ end_line, end_col = tok[3]
26
+ ltext = tok[4]
27
+ if start_line > last_lineno:
28
+ last_col = 0
29
+ if start_col > last_col:
30
+ out += (" " * (start_col - last_col))
31
+ # Remove comments:
32
+ if token_type == tokenize.COMMENT:
33
+ pass
34
+ # This series of conditionals removes docstrings:
35
+ elif token_type == tokenize.STRING:
36
+ if prev_toktype != tokenize.INDENT:
37
+ # This is likely a docstring; double-check we're not inside an operator:
38
+ if prev_toktype != tokenize.NEWLINE:
39
+ if start_col > 0:
40
+ out += token_string
41
+ else:
42
+ out += token_string
43
+ prev_toktype = token_type
44
+ last_col = end_col
45
+ last_lineno = end_line
46
+ temp=[]
47
+ for x in out.split('\n'):
48
+ if x.strip()!="":
49
+ temp.append(x)
50
+ return '\n'.join(temp)
51
+ elif lang in ['ruby']:
52
+ return source
53
+ else:
54
+ def replacer(match):
55
+ s = match.group(0)
56
+ if s.startswith('/'):
57
+ return " " # note: a space and not an empty string
58
+ else:
59
+ return s
60
+ pattern = re.compile(
61
+ r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
62
+ re.DOTALL | re.MULTILINE
63
+ )
64
+ temp=[]
65
+ for x in re.sub(pattern, replacer, source).split('\n'):
66
+ if x.strip()!="":
67
+ temp.append(x)
68
+ return '\n'.join(temp)
69
+
70
+ def tree_to_token_index(root_node):
71
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
72
+ return [(root_node.start_point,root_node.end_point)]
73
+ else:
74
+ code_tokens=[]
75
+ for child in root_node.children:
76
+ code_tokens+=tree_to_token_index(child)
77
+ return code_tokens
78
+
79
+ def tree_to_variable_index(root_node,index_to_code):
80
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
81
+ index=(root_node.start_point,root_node.end_point)
82
+ _,code=index_to_code[index]
83
+ if root_node.type!=code:
84
+ return [(root_node.start_point,root_node.end_point)]
85
+ else:
86
+ return []
87
+ else:
88
+ code_tokens=[]
89
+ for child in root_node.children:
90
+ code_tokens+=tree_to_variable_index(child,index_to_code)
91
+ return code_tokens
92
+
93
+ def index_to_code_token(index,code):
94
+ start_point=index[0]
95
+ end_point=index[1]
96
+ if start_point[0]==end_point[0]:
97
+ s=code[start_point[0]][start_point[1]:end_point[1]]
98
+ else:
99
+ s=""
100
+ s+=code[start_point[0]][start_point[1]:]
101
+ for i in range(start_point[0]+1,end_point[0]):
102
+ s+=code[i]
103
+ s+=code[end_point[0]][:end_point[1]]
104
+ return s
105
+
106
+
107
+ def DFG_python(root_node,index_to_code,states):
108
+ assignment=['assignment','augmented_assignment','for_in_clause']
109
+ if_statement=['if_statement']
110
+ for_statement=['for_statement']
111
+ while_statement=['while_statement']
112
+ do_first_statement=['for_in_clause']
113
+ def_statement=['default_parameter']
114
+ states=states.copy()
115
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
116
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
117
+ if root_node.type==code:
118
+ return [],states
119
+ elif code in states:
120
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
121
+ else:
122
+ if root_node.type=='identifier':
123
+ states[code]=[idx]
124
+ return [(code,idx,'comesFrom',[],[])],states
125
+ elif root_node.type in def_statement:
126
+ name=root_node.child_by_field_name('name')
127
+ value=root_node.child_by_field_name('value')
128
+ DFG=[]
129
+ if value is None:
130
+ indexs=tree_to_variable_index(name,index_to_code)
131
+ for index in indexs:
132
+ idx,code=index_to_code[index]
133
+ DFG.append((code,idx,'comesFrom',[],[]))
134
+ states[code]=[idx]
135
+ return sorted(DFG,key=lambda x:x[1]),states
136
+ else:
137
+ name_indexs=tree_to_variable_index(name,index_to_code)
138
+ value_indexs=tree_to_variable_index(value,index_to_code)
139
+ temp,states=DFG_python(value,index_to_code,states)
140
+ DFG+=temp
141
+ for index1 in name_indexs:
142
+ idx1,code1=index_to_code[index1]
143
+ for index2 in value_indexs:
144
+ idx2,code2=index_to_code[index2]
145
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
146
+ states[code1]=[idx1]
147
+ return sorted(DFG,key=lambda x:x[1]),states
148
+ elif root_node.type in assignment:
149
+ if root_node.type=='for_in_clause':
150
+ right_nodes=[root_node.children[-1]]
151
+ left_nodes=[root_node.child_by_field_name('left')]
152
+ else:
153
+ if root_node.child_by_field_name('right') is None:
154
+ return [],states
155
+ left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
156
+ right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
157
+ if len(right_nodes)!=len(left_nodes):
158
+ left_nodes=[root_node.child_by_field_name('left')]
159
+ right_nodes=[root_node.child_by_field_name('right')]
160
+ if len(left_nodes)==0:
161
+ left_nodes=[root_node.child_by_field_name('left')]
162
+ if len(right_nodes)==0:
163
+ right_nodes=[root_node.child_by_field_name('right')]
164
+ DFG=[]
165
+ for node in right_nodes:
166
+ temp,states=DFG_python(node,index_to_code,states)
167
+ DFG+=temp
168
+
169
+ for left_node,right_node in zip(left_nodes,right_nodes):
170
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
171
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
172
+ temp=[]
173
+ for token1_index in left_tokens_index:
174
+ idx1,code1=index_to_code[token1_index]
175
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
176
+ [index_to_code[x][0] for x in right_tokens_index]))
177
+ states[code1]=[idx1]
178
+ DFG+=temp
179
+ return sorted(DFG,key=lambda x:x[1]),states
180
+ elif root_node.type in if_statement:
181
+ DFG=[]
182
+ current_states=states.copy()
183
+ others_states=[]
184
+ tag=False
185
+ if 'else' in root_node.type:
186
+ tag=True
187
+ for child in root_node.children:
188
+ if 'else' in child.type:
189
+ tag=True
190
+ if child.type not in ['elif_clause','else_clause']:
191
+ temp,current_states=DFG_python(child,index_to_code,current_states)
192
+ DFG+=temp
193
+ else:
194
+ temp,new_states=DFG_python(child,index_to_code,states)
195
+ DFG+=temp
196
+ others_states.append(new_states)
197
+ others_states.append(current_states)
198
+ if tag is False:
199
+ others_states.append(states)
200
+ new_states={}
201
+ for dic in others_states:
202
+ for key in dic:
203
+ if key not in new_states:
204
+ new_states[key]=dic[key].copy()
205
+ else:
206
+ new_states[key]+=dic[key]
207
+ for key in new_states:
208
+ new_states[key]=sorted(list(set(new_states[key])))
209
+ return sorted(DFG,key=lambda x:x[1]),new_states
210
+ elif root_node.type in for_statement:
211
+ DFG=[]
212
+ for i in range(2):
213
+ right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
214
+ left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
215
+ if len(right_nodes)!=len(left_nodes):
216
+ left_nodes=[root_node.child_by_field_name('left')]
217
+ right_nodes=[root_node.child_by_field_name('right')]
218
+ if len(left_nodes)==0:
219
+ left_nodes=[root_node.child_by_field_name('left')]
220
+ if len(right_nodes)==0:
221
+ right_nodes=[root_node.child_by_field_name('right')]
222
+ for node in right_nodes:
223
+ temp,states=DFG_python(node,index_to_code,states)
224
+ DFG+=temp
225
+ for left_node,right_node in zip(left_nodes,right_nodes):
226
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
227
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
228
+ temp=[]
229
+ for token1_index in left_tokens_index:
230
+ idx1,code1=index_to_code[token1_index]
231
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
232
+ [index_to_code[x][0] for x in right_tokens_index]))
233
+ states[code1]=[idx1]
234
+ DFG+=temp
235
+ if root_node.children[-1].type=="block":
236
+ temp,states=DFG_python(root_node.children[-1],index_to_code,states)
237
+ DFG+=temp
238
+ dic={}
239
+ for x in DFG:
240
+ if (x[0],x[1],x[2]) not in dic:
241
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
242
+ else:
243
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
244
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
245
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
246
+ return sorted(DFG,key=lambda x:x[1]),states
247
+ elif root_node.type in while_statement:
248
+ DFG=[]
249
+ for i in range(2):
250
+ for child in root_node.children:
251
+ temp,states=DFG_python(child,index_to_code,states)
252
+ DFG+=temp
253
+ dic={}
254
+ for x in DFG:
255
+ if (x[0],x[1],x[2]) not in dic:
256
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
257
+ else:
258
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
259
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
260
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
261
+ return sorted(DFG,key=lambda x:x[1]),states
262
+ else:
263
+ DFG=[]
264
+ for child in root_node.children:
265
+ if child.type in do_first_statement:
266
+ temp,states=DFG_python(child,index_to_code,states)
267
+ DFG+=temp
268
+ for child in root_node.children:
269
+ if child.type not in do_first_statement:
270
+ temp,states=DFG_python(child,index_to_code,states)
271
+ DFG+=temp
272
+
273
+ return sorted(DFG,key=lambda x:x[1]),states
274
+
275
+
276
+ def DFG_java(root_node,index_to_code,states):
277
+ assignment=['assignment_expression']
278
+ def_statement=['variable_declarator']
279
+ increment_statement=['update_expression']
280
+ if_statement=['if_statement','else']
281
+ for_statement=['for_statement']
282
+ enhanced_for_statement=['enhanced_for_statement']
283
+ while_statement=['while_statement']
284
+ do_first_statement=[]
285
+ states=states.copy()
286
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
287
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
288
+ if root_node.type==code:
289
+ return [],states
290
+ elif code in states:
291
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
292
+ else:
293
+ if root_node.type=='identifier':
294
+ states[code]=[idx]
295
+ return [(code,idx,'comesFrom',[],[])],states
296
+ elif root_node.type in def_statement:
297
+ name=root_node.child_by_field_name('name')
298
+ value=root_node.child_by_field_name('value')
299
+ DFG=[]
300
+ if value is None:
301
+ indexs=tree_to_variable_index(name,index_to_code)
302
+ for index in indexs:
303
+ idx,code=index_to_code[index]
304
+ DFG.append((code,idx,'comesFrom',[],[]))
305
+ states[code]=[idx]
306
+ return sorted(DFG,key=lambda x:x[1]),states
307
+ else:
308
+ name_indexs=tree_to_variable_index(name,index_to_code)
309
+ value_indexs=tree_to_variable_index(value,index_to_code)
310
+ temp,states=DFG_java(value,index_to_code,states)
311
+ DFG+=temp
312
+ for index1 in name_indexs:
313
+ idx1,code1=index_to_code[index1]
314
+ for index2 in value_indexs:
315
+ idx2,code2=index_to_code[index2]
316
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
317
+ states[code1]=[idx1]
318
+ return sorted(DFG,key=lambda x:x[1]),states
319
+ elif root_node.type in assignment:
320
+ left_nodes=root_node.child_by_field_name('left')
321
+ right_nodes=root_node.child_by_field_name('right')
322
+ DFG=[]
323
+ temp,states=DFG_java(right_nodes,index_to_code,states)
324
+ DFG+=temp
325
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
326
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
327
+ for index1 in name_indexs:
328
+ idx1,code1=index_to_code[index1]
329
+ for index2 in value_indexs:
330
+ idx2,code2=index_to_code[index2]
331
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
332
+ states[code1]=[idx1]
333
+ return sorted(DFG,key=lambda x:x[1]),states
334
+ elif root_node.type in increment_statement:
335
+ DFG=[]
336
+ indexs=tree_to_variable_index(root_node,index_to_code)
337
+ for index1 in indexs:
338
+ idx1,code1=index_to_code[index1]
339
+ for index2 in indexs:
340
+ idx2,code2=index_to_code[index2]
341
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
342
+ states[code1]=[idx1]
343
+ return sorted(DFG,key=lambda x:x[1]),states
344
+ elif root_node.type in if_statement:
345
+ DFG=[]
346
+ current_states=states.copy()
347
+ others_states=[]
348
+ flag=False
349
+ tag=False
350
+ if 'else' in root_node.type:
351
+ tag=True
352
+ for child in root_node.children:
353
+ if 'else' in child.type:
354
+ tag=True
355
+ if child.type not in if_statement and flag is False:
356
+ temp,current_states=DFG_java(child,index_to_code,current_states)
357
+ DFG+=temp
358
+ else:
359
+ flag=True
360
+ temp,new_states=DFG_java(child,index_to_code,states)
361
+ DFG+=temp
362
+ others_states.append(new_states)
363
+ others_states.append(current_states)
364
+ if tag is False:
365
+ others_states.append(states)
366
+ new_states={}
367
+ for dic in others_states:
368
+ for key in dic:
369
+ if key not in new_states:
370
+ new_states[key]=dic[key].copy()
371
+ else:
372
+ new_states[key]+=dic[key]
373
+ for key in new_states:
374
+ new_states[key]=sorted(list(set(new_states[key])))
375
+ return sorted(DFG,key=lambda x:x[1]),new_states
376
+ elif root_node.type in for_statement:
377
+ DFG=[]
378
+ for child in root_node.children:
379
+ temp,states=DFG_java(child,index_to_code,states)
380
+ DFG+=temp
381
+ flag=False
382
+ for child in root_node.children:
383
+ if flag:
384
+ temp,states=DFG_java(child,index_to_code,states)
385
+ DFG+=temp
386
+ elif child.type=="local_variable_declaration":
387
+ flag=True
388
+ dic={}
389
+ for x in DFG:
390
+ if (x[0],x[1],x[2]) not in dic:
391
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
392
+ else:
393
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
394
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
395
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
396
+ return sorted(DFG,key=lambda x:x[1]),states
397
+ elif root_node.type in enhanced_for_statement:
398
+ name=root_node.child_by_field_name('name')
399
+ value=root_node.child_by_field_name('value')
400
+ body=root_node.child_by_field_name('body')
401
+ DFG=[]
402
+ for i in range(2):
403
+ temp,states=DFG_java(value,index_to_code,states)
404
+ DFG+=temp
405
+ name_indexs=tree_to_variable_index(name,index_to_code)
406
+ value_indexs=tree_to_variable_index(value,index_to_code)
407
+ for index1 in name_indexs:
408
+ idx1,code1=index_to_code[index1]
409
+ for index2 in value_indexs:
410
+ idx2,code2=index_to_code[index2]
411
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
412
+ states[code1]=[idx1]
413
+ temp,states=DFG_java(body,index_to_code,states)
414
+ DFG+=temp
415
+ dic={}
416
+ for x in DFG:
417
+ if (x[0],x[1],x[2]) not in dic:
418
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
419
+ else:
420
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
421
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
422
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
423
+ return sorted(DFG,key=lambda x:x[1]),states
424
+ elif root_node.type in while_statement:
425
+ DFG=[]
426
+ for i in range(2):
427
+ for child in root_node.children:
428
+ temp,states=DFG_java(child,index_to_code,states)
429
+ DFG+=temp
430
+ dic={}
431
+ for x in DFG:
432
+ if (x[0],x[1],x[2]) not in dic:
433
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
434
+ else:
435
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
436
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
437
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
438
+ return sorted(DFG,key=lambda x:x[1]),states
439
+ else:
440
+ DFG=[]
441
+ for child in root_node.children:
442
+ if child.type in do_first_statement:
443
+ temp,states=DFG_java(child,index_to_code,states)
444
+ DFG+=temp
445
+ for child in root_node.children:
446
+ if child.type not in do_first_statement:
447
+ temp,states=DFG_java(child,index_to_code,states)
448
+ DFG+=temp
449
+
450
+ return sorted(DFG,key=lambda x:x[1]),states
451
+
452
+ def DFG_csharp(root_node,index_to_code,states):
453
+ assignment=['assignment_expression']
454
+ def_statement=['variable_declarator']
455
+ increment_statement=['postfix_unary_expression']
456
+ if_statement=['if_statement','else']
457
+ for_statement=['for_statement']
458
+ enhanced_for_statement=['for_each_statement']
459
+ while_statement=['while_statement']
460
+ do_first_statement=[]
461
+ states=states.copy()
462
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
463
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
464
+ if root_node.type==code:
465
+ return [],states
466
+ elif code in states:
467
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
468
+ else:
469
+ if root_node.type=='identifier':
470
+ states[code]=[idx]
471
+ return [(code,idx,'comesFrom',[],[])],states
472
+ elif root_node.type in def_statement:
473
+ if len(root_node.children)==2:
474
+ name=root_node.children[0]
475
+ value=root_node.children[1]
476
+ else:
477
+ name=root_node.children[0]
478
+ value=None
479
+ DFG=[]
480
+ if value is None:
481
+ indexs=tree_to_variable_index(name,index_to_code)
482
+ for index in indexs:
483
+ idx,code=index_to_code[index]
484
+ DFG.append((code,idx,'comesFrom',[],[]))
485
+ states[code]=[idx]
486
+ return sorted(DFG,key=lambda x:x[1]),states
487
+ else:
488
+ name_indexs=tree_to_variable_index(name,index_to_code)
489
+ value_indexs=tree_to_variable_index(value,index_to_code)
490
+ temp,states=DFG_csharp(value,index_to_code,states)
491
+ DFG+=temp
492
+ for index1 in name_indexs:
493
+ idx1,code1=index_to_code[index1]
494
+ for index2 in value_indexs:
495
+ idx2,code2=index_to_code[index2]
496
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
497
+ states[code1]=[idx1]
498
+ return sorted(DFG,key=lambda x:x[1]),states
499
+ elif root_node.type in assignment:
500
+ left_nodes=root_node.child_by_field_name('left')
501
+ right_nodes=root_node.child_by_field_name('right')
502
+ DFG=[]
503
+ temp,states=DFG_csharp(right_nodes,index_to_code,states)
504
+ DFG+=temp
505
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
506
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
507
+ for index1 in name_indexs:
508
+ idx1,code1=index_to_code[index1]
509
+ for index2 in value_indexs:
510
+ idx2,code2=index_to_code[index2]
511
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
512
+ states[code1]=[idx1]
513
+ return sorted(DFG,key=lambda x:x[1]),states
514
+ elif root_node.type in increment_statement:
515
+ DFG=[]
516
+ indexs=tree_to_variable_index(root_node,index_to_code)
517
+ for index1 in indexs:
518
+ idx1,code1=index_to_code[index1]
519
+ for index2 in indexs:
520
+ idx2,code2=index_to_code[index2]
521
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
522
+ states[code1]=[idx1]
523
+ return sorted(DFG,key=lambda x:x[1]),states
524
+ elif root_node.type in if_statement:
525
+ DFG=[]
526
+ current_states=states.copy()
527
+ others_states=[]
528
+ flag=False
529
+ tag=False
530
+ if 'else' in root_node.type:
531
+ tag=True
532
+ for child in root_node.children:
533
+ if 'else' in child.type:
534
+ tag=True
535
+ if child.type not in if_statement and flag is False:
536
+ temp,current_states=DFG_csharp(child,index_to_code,current_states)
537
+ DFG+=temp
538
+ else:
539
+ flag=True
540
+ temp,new_states=DFG_csharp(child,index_to_code,states)
541
+ DFG+=temp
542
+ others_states.append(new_states)
543
+ others_states.append(current_states)
544
+ if tag is False:
545
+ others_states.append(states)
546
+ new_states={}
547
+ for dic in others_states:
548
+ for key in dic:
549
+ if key not in new_states:
550
+ new_states[key]=dic[key].copy()
551
+ else:
552
+ new_states[key]+=dic[key]
553
+ for key in new_states:
554
+ new_states[key]=sorted(list(set(new_states[key])))
555
+ return sorted(DFG,key=lambda x:x[1]),new_states
556
+ elif root_node.type in for_statement:
557
+ DFG=[]
558
+ for child in root_node.children:
559
+ temp,states=DFG_csharp(child,index_to_code,states)
560
+ DFG+=temp
561
+ flag=False
562
+ for child in root_node.children:
563
+ if flag:
564
+ temp,states=DFG_csharp(child,index_to_code,states)
565
+ DFG+=temp
566
+ elif child.type=="local_variable_declaration":
567
+ flag=True
568
+ dic={}
569
+ for x in DFG:
570
+ if (x[0],x[1],x[2]) not in dic:
571
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
572
+ else:
573
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
574
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
575
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
576
+ return sorted(DFG,key=lambda x:x[1]),states
577
+ elif root_node.type in enhanced_for_statement:
578
+ name=root_node.child_by_field_name('left')
579
+ value=root_node.child_by_field_name('right')
580
+ body=root_node.child_by_field_name('body')
581
+ DFG=[]
582
+ for i in range(2):
583
+ temp,states=DFG_csharp(value,index_to_code,states)
584
+ DFG+=temp
585
+ name_indexs=tree_to_variable_index(name,index_to_code)
586
+ value_indexs=tree_to_variable_index(value,index_to_code)
587
+ for index1 in name_indexs:
588
+ idx1,code1=index_to_code[index1]
589
+ for index2 in value_indexs:
590
+ idx2,code2=index_to_code[index2]
591
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
592
+ states[code1]=[idx1]
593
+ temp,states=DFG_csharp(body,index_to_code,states)
594
+ DFG+=temp
595
+ dic={}
596
+ for x in DFG:
597
+ if (x[0],x[1],x[2]) not in dic:
598
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
599
+ else:
600
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
601
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
602
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
603
+ return sorted(DFG,key=lambda x:x[1]),states
604
+ elif root_node.type in while_statement:
605
+ DFG=[]
606
+ for i in range(2):
607
+ for child in root_node.children:
608
+ temp,states=DFG_csharp(child,index_to_code,states)
609
+ DFG+=temp
610
+ dic={}
611
+ for x in DFG:
612
+ if (x[0],x[1],x[2]) not in dic:
613
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
614
+ else:
615
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
616
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
617
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
618
+ return sorted(DFG,key=lambda x:x[1]),states
619
+ else:
620
+ DFG=[]
621
+ for child in root_node.children:
622
+ if child.type in do_first_statement:
623
+ temp,states=DFG_csharp(child,index_to_code,states)
624
+ DFG+=temp
625
+ for child in root_node.children:
626
+ if child.type not in do_first_statement:
627
+ temp,states=DFG_csharp(child,index_to_code,states)
628
+ DFG+=temp
629
+
630
+ return sorted(DFG,key=lambda x:x[1]),states
631
+
632
+
633
+
634
+
635
+ def DFG_ruby(root_node,index_to_code,states):
636
+ assignment=['assignment','operator_assignment']
637
+ if_statement=['if','elsif','else','unless','when']
638
+ for_statement=['for']
639
+ while_statement=['while_modifier','until']
640
+ do_first_statement=[]
641
+ def_statement=['keyword_parameter']
642
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
643
+ states=states.copy()
644
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
645
+ if root_node.type==code:
646
+ return [],states
647
+ elif code in states:
648
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
649
+ else:
650
+ if root_node.type=='identifier':
651
+ states[code]=[idx]
652
+ return [(code,idx,'comesFrom',[],[])],states
653
+ elif root_node.type in def_statement:
654
+ name=root_node.child_by_field_name('name')
655
+ value=root_node.child_by_field_name('value')
656
+ DFG=[]
657
+ if value is None:
658
+ indexs=tree_to_variable_index(name,index_to_code)
659
+ for index in indexs:
660
+ idx,code=index_to_code[index]
661
+ DFG.append((code,idx,'comesFrom',[],[]))
662
+ states[code]=[idx]
663
+ return sorted(DFG,key=lambda x:x[1]),states
664
+ else:
665
+ name_indexs=tree_to_variable_index(name,index_to_code)
666
+ value_indexs=tree_to_variable_index(value,index_to_code)
667
+ temp,states=DFG_ruby(value,index_to_code,states)
668
+ DFG+=temp
669
+ for index1 in name_indexs:
670
+ idx1,code1=index_to_code[index1]
671
+ for index2 in value_indexs:
672
+ idx2,code2=index_to_code[index2]
673
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
674
+ states[code1]=[idx1]
675
+ return sorted(DFG,key=lambda x:x[1]),states
676
+ elif root_node.type in assignment:
677
+ left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
678
+ right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
679
+ if len(right_nodes)!=len(left_nodes):
680
+ left_nodes=[root_node.child_by_field_name('left')]
681
+ right_nodes=[root_node.child_by_field_name('right')]
682
+ if len(left_nodes)==0:
683
+ left_nodes=[root_node.child_by_field_name('left')]
684
+ if len(right_nodes)==0:
685
+ right_nodes=[root_node.child_by_field_name('right')]
686
+ if root_node.type=="operator_assignment":
687
+ left_nodes=[root_node.children[0]]
688
+ right_nodes=[root_node.children[-1]]
689
+
690
+ DFG=[]
691
+ for node in right_nodes:
692
+ temp,states=DFG_ruby(node,index_to_code,states)
693
+ DFG+=temp
694
+
695
+ for left_node,right_node in zip(left_nodes,right_nodes):
696
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
697
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
698
+ temp=[]
699
+ for token1_index in left_tokens_index:
700
+ idx1,code1=index_to_code[token1_index]
701
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
702
+ [index_to_code[x][0] for x in right_tokens_index]))
703
+ states[code1]=[idx1]
704
+ DFG+=temp
705
+ return sorted(DFG,key=lambda x:x[1]),states
706
+ elif root_node.type in if_statement:
707
+ DFG=[]
708
+ current_states=states.copy()
709
+ others_states=[]
710
+ tag=False
711
+ if 'else' in root_node.type:
712
+ tag=True
713
+ for child in root_node.children:
714
+ if 'else' in child.type:
715
+ tag=True
716
+ if child.type not in if_statement:
717
+ temp,current_states=DFG_ruby(child,index_to_code,current_states)
718
+ DFG+=temp
719
+ else:
720
+ temp,new_states=DFG_ruby(child,index_to_code,states)
721
+ DFG+=temp
722
+ others_states.append(new_states)
723
+ others_states.append(current_states)
724
+ if tag is False:
725
+ others_states.append(states)
726
+ new_states={}
727
+ for dic in others_states:
728
+ for key in dic:
729
+ if key not in new_states:
730
+ new_states[key]=dic[key].copy()
731
+ else:
732
+ new_states[key]+=dic[key]
733
+ for key in new_states:
734
+ new_states[key]=sorted(list(set(new_states[key])))
735
+ return sorted(DFG,key=lambda x:x[1]),new_states
736
+ elif root_node.type in for_statement:
737
+ DFG=[]
738
+ for i in range(2):
739
+ left_nodes=[root_node.child_by_field_name('pattern')]
740
+ right_nodes=[root_node.child_by_field_name('value')]
741
+ assert len(right_nodes)==len(left_nodes)
742
+ for node in right_nodes:
743
+ temp,states=DFG_ruby(node,index_to_code,states)
744
+ DFG+=temp
745
+ for left_node,right_node in zip(left_nodes,right_nodes):
746
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
747
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
748
+ temp=[]
749
+ for token1_index in left_tokens_index:
750
+ idx1,code1=index_to_code[token1_index]
751
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
752
+ [index_to_code[x][0] for x in right_tokens_index]))
753
+ states[code1]=[idx1]
754
+ DFG+=temp
755
+ temp,states=DFG_ruby(root_node.child_by_field_name('body'),index_to_code,states)
756
+ DFG+=temp
757
+ dic={}
758
+ for x in DFG:
759
+ if (x[0],x[1],x[2]) not in dic:
760
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
761
+ else:
762
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
763
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
764
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
765
+ return sorted(DFG,key=lambda x:x[1]),states
766
+ elif root_node.type in while_statement:
767
+ DFG=[]
768
+ for i in range(2):
769
+ for child in root_node.children:
770
+ temp,states=DFG_ruby(child,index_to_code,states)
771
+ DFG+=temp
772
+ dic={}
773
+ for x in DFG:
774
+ if (x[0],x[1],x[2]) not in dic:
775
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
776
+ else:
777
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
778
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
779
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
780
+ return sorted(DFG,key=lambda x:x[1]),states
781
+ else:
782
+ DFG=[]
783
+ for child in root_node.children:
784
+ if child.type in do_first_statement:
785
+ temp,states=DFG_ruby(child,index_to_code,states)
786
+ DFG+=temp
787
+ for child in root_node.children:
788
+ if child.type not in do_first_statement:
789
+ temp,states=DFG_ruby(child,index_to_code,states)
790
+ DFG+=temp
791
+
792
+ return sorted(DFG,key=lambda x:x[1]),states
793
+
794
+ def DFG_go(root_node,index_to_code,states):
795
+ assignment=['assignment_statement',]
796
+ def_statement=['var_spec']
797
+ increment_statement=['inc_statement']
798
+ if_statement=['if_statement','else']
799
+ for_statement=['for_statement']
800
+ enhanced_for_statement=[]
801
+ while_statement=[]
802
+ do_first_statement=[]
803
+ states=states.copy()
804
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
805
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
806
+ if root_node.type==code:
807
+ return [],states
808
+ elif code in states:
809
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
810
+ else:
811
+ if root_node.type=='identifier':
812
+ states[code]=[idx]
813
+ return [(code,idx,'comesFrom',[],[])],states
814
+ elif root_node.type in def_statement:
815
+ name=root_node.child_by_field_name('name')
816
+ value=root_node.child_by_field_name('value')
817
+ DFG=[]
818
+ if value is None:
819
+ indexs=tree_to_variable_index(name,index_to_code)
820
+ for index in indexs:
821
+ idx,code=index_to_code[index]
822
+ DFG.append((code,idx,'comesFrom',[],[]))
823
+ states[code]=[idx]
824
+ return sorted(DFG,key=lambda x:x[1]),states
825
+ else:
826
+ name_indexs=tree_to_variable_index(name,index_to_code)
827
+ value_indexs=tree_to_variable_index(value,index_to_code)
828
+ temp,states=DFG_go(value,index_to_code,states)
829
+ DFG+=temp
830
+ for index1 in name_indexs:
831
+ idx1,code1=index_to_code[index1]
832
+ for index2 in value_indexs:
833
+ idx2,code2=index_to_code[index2]
834
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
835
+ states[code1]=[idx1]
836
+ return sorted(DFG,key=lambda x:x[1]),states
837
+ elif root_node.type in assignment:
838
+ left_nodes=root_node.child_by_field_name('left')
839
+ right_nodes=root_node.child_by_field_name('right')
840
+ DFG=[]
841
+ temp,states=DFG_go(right_nodes,index_to_code,states)
842
+ DFG+=temp
843
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
844
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
845
+ for index1 in name_indexs:
846
+ idx1,code1=index_to_code[index1]
847
+ for index2 in value_indexs:
848
+ idx2,code2=index_to_code[index2]
849
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
850
+ states[code1]=[idx1]
851
+ return sorted(DFG,key=lambda x:x[1]),states
852
+ elif root_node.type in increment_statement:
853
+ DFG=[]
854
+ indexs=tree_to_variable_index(root_node,index_to_code)
855
+ for index1 in indexs:
856
+ idx1,code1=index_to_code[index1]
857
+ for index2 in indexs:
858
+ idx2,code2=index_to_code[index2]
859
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
860
+ states[code1]=[idx1]
861
+ return sorted(DFG,key=lambda x:x[1]),states
862
+ elif root_node.type in if_statement:
863
+ DFG=[]
864
+ current_states=states.copy()
865
+ others_states=[]
866
+ flag=False
867
+ tag=False
868
+ if 'else' in root_node.type:
869
+ tag=True
870
+ for child in root_node.children:
871
+ if 'else' in child.type:
872
+ tag=True
873
+ if child.type not in if_statement and flag is False:
874
+ temp,current_states=DFG_go(child,index_to_code,current_states)
875
+ DFG+=temp
876
+ else:
877
+ flag=True
878
+ temp,new_states=DFG_go(child,index_to_code,states)
879
+ DFG+=temp
880
+ others_states.append(new_states)
881
+ others_states.append(current_states)
882
+ if tag is False:
883
+ others_states.append(states)
884
+ new_states={}
885
+ for dic in others_states:
886
+ for key in dic:
887
+ if key not in new_states:
888
+ new_states[key]=dic[key].copy()
889
+ else:
890
+ new_states[key]+=dic[key]
891
+ for key in states:
892
+ if key not in new_states:
893
+ new_states[key]=states[key]
894
+ else:
895
+ new_states[key]+=states[key]
896
+ for key in new_states:
897
+ new_states[key]=sorted(list(set(new_states[key])))
898
+ return sorted(DFG,key=lambda x:x[1]),new_states
899
+ elif root_node.type in for_statement:
900
+ DFG=[]
901
+ for child in root_node.children:
902
+ temp,states=DFG_go(child,index_to_code,states)
903
+ DFG+=temp
904
+ flag=False
905
+ for child in root_node.children:
906
+ if flag:
907
+ temp,states=DFG_go(child,index_to_code,states)
908
+ DFG+=temp
909
+ elif child.type=="for_clause":
910
+ if child.child_by_field_name('update') is not None:
911
+ temp,states=DFG_go(child.child_by_field_name('update'),index_to_code,states)
912
+ DFG+=temp
913
+ flag=True
914
+ dic={}
915
+ for x in DFG:
916
+ if (x[0],x[1],x[2]) not in dic:
917
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
918
+ else:
919
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
920
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
921
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
922
+ return sorted(DFG,key=lambda x:x[1]),states
923
+ else:
924
+ DFG=[]
925
+ for child in root_node.children:
926
+ if child.type in do_first_statement:
927
+ temp,states=DFG_go(child,index_to_code,states)
928
+ DFG+=temp
929
+ for child in root_node.children:
930
+ if child.type not in do_first_statement:
931
+ temp,states=DFG_go(child,index_to_code,states)
932
+ DFG+=temp
933
+
934
+ return sorted(DFG,key=lambda x:x[1]),states
935
+
936
+
937
+
938
+
939
+ def DFG_php(root_node,index_to_code,states):
940
+ assignment=['assignment_expression','augmented_assignment_expression']
941
+ def_statement=['simple_parameter']
942
+ increment_statement=['update_expression']
943
+ if_statement=['if_statement','else_clause']
944
+ for_statement=['for_statement']
945
+ enhanced_for_statement=['foreach_statement']
946
+ while_statement=['while_statement']
947
+ do_first_statement=[]
948
+ states=states.copy()
949
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
950
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
951
+ if root_node.type==code:
952
+ return [],states
953
+ elif code in states:
954
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
955
+ else:
956
+ if root_node.type=='identifier':
957
+ states[code]=[idx]
958
+ return [(code,idx,'comesFrom',[],[])],states
959
+ elif root_node.type in def_statement:
960
+ name=root_node.child_by_field_name('name')
961
+ value=root_node.child_by_field_name('default_value')
962
+ DFG=[]
963
+ if value is None:
964
+ indexs=tree_to_variable_index(name,index_to_code)
965
+ for index in indexs:
966
+ idx,code=index_to_code[index]
967
+ DFG.append((code,idx,'comesFrom',[],[]))
968
+ states[code]=[idx]
969
+ return sorted(DFG,key=lambda x:x[1]),states
970
+ else:
971
+ name_indexs=tree_to_variable_index(name,index_to_code)
972
+ value_indexs=tree_to_variable_index(value,index_to_code)
973
+ temp,states=DFG_php(value,index_to_code,states)
974
+ DFG+=temp
975
+ for index1 in name_indexs:
976
+ idx1,code1=index_to_code[index1]
977
+ for index2 in value_indexs:
978
+ idx2,code2=index_to_code[index2]
979
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
980
+ states[code1]=[idx1]
981
+ return sorted(DFG,key=lambda x:x[1]),states
982
+ elif root_node.type in assignment:
983
+ left_nodes=root_node.child_by_field_name('left')
984
+ right_nodes=root_node.child_by_field_name('right')
985
+ DFG=[]
986
+ temp,states=DFG_php(right_nodes,index_to_code,states)
987
+ DFG+=temp
988
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
989
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
990
+ for index1 in name_indexs:
991
+ idx1,code1=index_to_code[index1]
992
+ for index2 in value_indexs:
993
+ idx2,code2=index_to_code[index2]
994
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
995
+ states[code1]=[idx1]
996
+ return sorted(DFG,key=lambda x:x[1]),states
997
+ elif root_node.type in increment_statement:
998
+ DFG=[]
999
+ indexs=tree_to_variable_index(root_node,index_to_code)
1000
+ for index1 in indexs:
1001
+ idx1,code1=index_to_code[index1]
1002
+ for index2 in indexs:
1003
+ idx2,code2=index_to_code[index2]
1004
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1005
+ states[code1]=[idx1]
1006
+ return sorted(DFG,key=lambda x:x[1]),states
1007
+ elif root_node.type in if_statement:
1008
+ DFG=[]
1009
+ current_states=states.copy()
1010
+ others_states=[]
1011
+ flag=False
1012
+ tag=False
1013
+ if 'else' in root_node.type:
1014
+ tag=True
1015
+ for child in root_node.children:
1016
+ if 'else' in child.type:
1017
+ tag=True
1018
+ if child.type not in if_statement and flag is False:
1019
+ temp,current_states=DFG_php(child,index_to_code,current_states)
1020
+ DFG+=temp
1021
+ else:
1022
+ flag=True
1023
+ temp,new_states=DFG_php(child,index_to_code,states)
1024
+ DFG+=temp
1025
+ others_states.append(new_states)
1026
+ others_states.append(current_states)
1027
+ new_states={}
1028
+ for dic in others_states:
1029
+ for key in dic:
1030
+ if key not in new_states:
1031
+ new_states[key]=dic[key].copy()
1032
+ else:
1033
+ new_states[key]+=dic[key]
1034
+ for key in states:
1035
+ if key not in new_states:
1036
+ new_states[key]=states[key]
1037
+ else:
1038
+ new_states[key]+=states[key]
1039
+ for key in new_states:
1040
+ new_states[key]=sorted(list(set(new_states[key])))
1041
+ return sorted(DFG,key=lambda x:x[1]),new_states
1042
+ elif root_node.type in for_statement:
1043
+ DFG=[]
1044
+ for child in root_node.children:
1045
+ temp,states=DFG_php(child,index_to_code,states)
1046
+ DFG+=temp
1047
+ flag=False
1048
+ for child in root_node.children:
1049
+ if flag:
1050
+ temp,states=DFG_php(child,index_to_code,states)
1051
+ DFG+=temp
1052
+ elif child.type=="assignment_expression":
1053
+ flag=True
1054
+ dic={}
1055
+ for x in DFG:
1056
+ if (x[0],x[1],x[2]) not in dic:
1057
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1058
+ else:
1059
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1060
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1061
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1062
+ return sorted(DFG,key=lambda x:x[1]),states
1063
+ elif root_node.type in enhanced_for_statement:
1064
+ name=None
1065
+ value=None
1066
+ for child in root_node.children:
1067
+ if child.type=='variable_name' and value is None:
1068
+ value=child
1069
+ elif child.type=='variable_name' and name is None:
1070
+ name=child
1071
+ break
1072
+ body=root_node.child_by_field_name('body')
1073
+ DFG=[]
1074
+ for i in range(2):
1075
+ temp,states=DFG_php(value,index_to_code,states)
1076
+ DFG+=temp
1077
+ name_indexs=tree_to_variable_index(name,index_to_code)
1078
+ value_indexs=tree_to_variable_index(value,index_to_code)
1079
+ for index1 in name_indexs:
1080
+ idx1,code1=index_to_code[index1]
1081
+ for index2 in value_indexs:
1082
+ idx2,code2=index_to_code[index2]
1083
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1084
+ states[code1]=[idx1]
1085
+ temp,states=DFG_php(body,index_to_code,states)
1086
+ DFG+=temp
1087
+ dic={}
1088
+ for x in DFG:
1089
+ if (x[0],x[1],x[2]) not in dic:
1090
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1091
+ else:
1092
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1093
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1094
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1095
+ return sorted(DFG,key=lambda x:x[1]),states
1096
+ elif root_node.type in while_statement:
1097
+ DFG=[]
1098
+ for i in range(2):
1099
+ for child in root_node.children:
1100
+ temp,states=DFG_php(child,index_to_code,states)
1101
+ DFG+=temp
1102
+ dic={}
1103
+ for x in DFG:
1104
+ if (x[0],x[1],x[2]) not in dic:
1105
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1106
+ else:
1107
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1108
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1109
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1110
+ return sorted(DFG,key=lambda x:x[1]),states
1111
+ else:
1112
+ DFG=[]
1113
+ for child in root_node.children:
1114
+ if child.type in do_first_statement:
1115
+ temp,states=DFG_php(child,index_to_code,states)
1116
+ DFG+=temp
1117
+ for child in root_node.children:
1118
+ if child.type not in do_first_statement:
1119
+ temp,states=DFG_php(child,index_to_code,states)
1120
+ DFG+=temp
1121
+
1122
+ return sorted(DFG,key=lambda x:x[1]),states
1123
+
1124
+
1125
+ def DFG_javascript(root_node,index_to_code,states):
1126
+ assignment=['assignment_pattern','augmented_assignment_expression']
1127
+ def_statement=['variable_declarator']
1128
+ increment_statement=['update_expression']
1129
+ if_statement=['if_statement','else']
1130
+ for_statement=['for_statement']
1131
+ enhanced_for_statement=[]
1132
+ while_statement=['while_statement']
1133
+ do_first_statement=[]
1134
+ states=states.copy()
1135
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
1136
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
1137
+ if root_node.type==code:
1138
+ return [],states
1139
+ elif code in states:
1140
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
1141
+ else:
1142
+ if root_node.type=='identifier':
1143
+ states[code]=[idx]
1144
+ return [(code,idx,'comesFrom',[],[])],states
1145
+ elif root_node.type in def_statement:
1146
+ name=root_node.child_by_field_name('name')
1147
+ value=root_node.child_by_field_name('value')
1148
+ DFG=[]
1149
+ if value is None:
1150
+ indexs=tree_to_variable_index(name,index_to_code)
1151
+ for index in indexs:
1152
+ idx,code=index_to_code[index]
1153
+ DFG.append((code,idx,'comesFrom',[],[]))
1154
+ states[code]=[idx]
1155
+ return sorted(DFG,key=lambda x:x[1]),states
1156
+ else:
1157
+ name_indexs=tree_to_variable_index(name,index_to_code)
1158
+ value_indexs=tree_to_variable_index(value,index_to_code)
1159
+ temp,states=DFG_javascript(value,index_to_code,states)
1160
+ DFG+=temp
1161
+ for index1 in name_indexs:
1162
+ idx1,code1=index_to_code[index1]
1163
+ for index2 in value_indexs:
1164
+ idx2,code2=index_to_code[index2]
1165
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
1166
+ states[code1]=[idx1]
1167
+ return sorted(DFG,key=lambda x:x[1]),states
1168
+ elif root_node.type in assignment:
1169
+ left_nodes=root_node.child_by_field_name('left')
1170
+ right_nodes=root_node.child_by_field_name('right')
1171
+ DFG=[]
1172
+ temp,states=DFG_javascript(right_nodes,index_to_code,states)
1173
+ DFG+=temp
1174
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
1175
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
1176
+ for index1 in name_indexs:
1177
+ idx1,code1=index_to_code[index1]
1178
+ for index2 in value_indexs:
1179
+ idx2,code2=index_to_code[index2]
1180
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1181
+ states[code1]=[idx1]
1182
+ return sorted(DFG,key=lambda x:x[1]),states
1183
+ elif root_node.type in increment_statement:
1184
+ DFG=[]
1185
+ indexs=tree_to_variable_index(root_node,index_to_code)
1186
+ for index1 in indexs:
1187
+ idx1,code1=index_to_code[index1]
1188
+ for index2 in indexs:
1189
+ idx2,code2=index_to_code[index2]
1190
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1191
+ states[code1]=[idx1]
1192
+ return sorted(DFG,key=lambda x:x[1]),states
1193
+ elif root_node.type in if_statement:
1194
+ DFG=[]
1195
+ current_states=states.copy()
1196
+ others_states=[]
1197
+ flag=False
1198
+ tag=False
1199
+ if 'else' in root_node.type:
1200
+ tag=True
1201
+ for child in root_node.children:
1202
+ if 'else' in child.type:
1203
+ tag=True
1204
+ if child.type not in if_statement and flag is False:
1205
+ temp,current_states=DFG_javascript(child,index_to_code,current_states)
1206
+ DFG+=temp
1207
+ else:
1208
+ flag=True
1209
+ temp,new_states=DFG_javascript(child,index_to_code,states)
1210
+ DFG+=temp
1211
+ others_states.append(new_states)
1212
+ others_states.append(current_states)
1213
+ if tag is False:
1214
+ others_states.append(states)
1215
+ new_states={}
1216
+ for dic in others_states:
1217
+ for key in dic:
1218
+ if key not in new_states:
1219
+ new_states[key]=dic[key].copy()
1220
+ else:
1221
+ new_states[key]+=dic[key]
1222
+ for key in states:
1223
+ if key not in new_states:
1224
+ new_states[key]=states[key]
1225
+ else:
1226
+ new_states[key]+=states[key]
1227
+ for key in new_states:
1228
+ new_states[key]=sorted(list(set(new_states[key])))
1229
+ return sorted(DFG,key=lambda x:x[1]),new_states
1230
+ elif root_node.type in for_statement:
1231
+ DFG=[]
1232
+ for child in root_node.children:
1233
+ temp,states=DFG_javascript(child,index_to_code,states)
1234
+ DFG+=temp
1235
+ flag=False
1236
+ for child in root_node.children:
1237
+ if flag:
1238
+ temp,states=DFG_javascript(child,index_to_code,states)
1239
+ DFG+=temp
1240
+ elif child.type=="variable_declaration":
1241
+ flag=True
1242
+ dic={}
1243
+ for x in DFG:
1244
+ if (x[0],x[1],x[2]) not in dic:
1245
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1246
+ else:
1247
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1248
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1249
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1250
+ return sorted(DFG,key=lambda x:x[1]),states
1251
+ elif root_node.type in while_statement:
1252
+ DFG=[]
1253
+ for i in range(2):
1254
+ for child in root_node.children:
1255
+ temp,states=DFG_javascript(child,index_to_code,states)
1256
+ DFG+=temp
1257
+ dic={}
1258
+ for x in DFG:
1259
+ if (x[0],x[1],x[2]) not in dic:
1260
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1261
+ else:
1262
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1263
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1264
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1265
+ return sorted(DFG,key=lambda x:x[1]),states
1266
+ else:
1267
+ DFG=[]
1268
+ for child in root_node.children:
1269
+ if child.type in do_first_statement:
1270
+ temp,states=DFG_javascript(child,index_to_code,states)
1271
+ DFG+=temp
1272
+ for child in root_node.children:
1273
+ if child.type not in do_first_statement:
1274
+ temp,states=DFG_javascript(child,index_to_code,states)
1275
+ DFG+=temp
1276
+
1277
+ return sorted(DFG,key=lambda x:x[1]),states
1278
+
1279
  dfg_function={
1280
  'python':DFG_python,
1281
  'java':DFG_java,