giulio98 commited on
Commit
09f0839
1 Parent(s): 78c0c06

Update syntax_match.py

Browse files
Files changed (1) hide show
  1. syntax_match.py +1272 -5
syntax_match.py CHANGED
@@ -1,13 +1,1280 @@
1
  # Copyright (c) Microsoft Corporation.
2
  # Licensed under the MIT license.
3
 
4
- from .parser.DFG import DFG_python,DFG_java,DFG_ruby,DFG_go,DFG_php,DFG_javascript,DFG_csharp
5
- from parser import (remove_comments_and_docstrings,
6
- tree_to_token_index,
7
- index_to_code_token,
8
- tree_to_variable_index)
9
  from tree_sitter import Language, Parser
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  dfg_function={
12
  'python':DFG_python,
13
  'java':DFG_java,
 
1
  # Copyright (c) Microsoft Corporation.
2
  # Licensed under the MIT license.
3
 
4
+
 
 
 
 
5
  from tree_sitter import Language, Parser
6
 
7
+ import re
8
+ from io import StringIO
9
+ import tokenize
10
+ def remove_comments_and_docstrings(source,lang):
11
+ if lang in ['python']:
12
+ """
13
+ Returns 'source' minus comments and docstrings.
14
+ """
15
+ io_obj = StringIO(source)
16
+ out = ""
17
+ prev_toktype = tokenize.INDENT
18
+ last_lineno = -1
19
+ last_col = 0
20
+ for tok in tokenize.generate_tokens(io_obj.readline):
21
+ token_type = tok[0]
22
+ token_string = tok[1]
23
+ start_line, start_col = tok[2]
24
+ end_line, end_col = tok[3]
25
+ ltext = tok[4]
26
+ if start_line > last_lineno:
27
+ last_col = 0
28
+ if start_col > last_col:
29
+ out += (" " * (start_col - last_col))
30
+ # Remove comments:
31
+ if token_type == tokenize.COMMENT:
32
+ pass
33
+ # This series of conditionals removes docstrings:
34
+ elif token_type == tokenize.STRING:
35
+ if prev_toktype != tokenize.INDENT:
36
+ # This is likely a docstring; double-check we're not inside an operator:
37
+ if prev_toktype != tokenize.NEWLINE:
38
+ if start_col > 0:
39
+ out += token_string
40
+ else:
41
+ out += token_string
42
+ prev_toktype = token_type
43
+ last_col = end_col
44
+ last_lineno = end_line
45
+ temp=[]
46
+ for x in out.split('\n'):
47
+ if x.strip()!="":
48
+ temp.append(x)
49
+ return '\n'.join(temp)
50
+ elif lang in ['ruby']:
51
+ return source
52
+ else:
53
+ def replacer(match):
54
+ s = match.group(0)
55
+ if s.startswith('/'):
56
+ return " " # note: a space and not an empty string
57
+ else:
58
+ return s
59
+ pattern = re.compile(
60
+ r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
61
+ re.DOTALL | re.MULTILINE
62
+ )
63
+ temp=[]
64
+ for x in re.sub(pattern, replacer, source).split('\n'):
65
+ if x.strip()!="":
66
+ temp.append(x)
67
+ return '\n'.join(temp)
68
+
69
+ def tree_to_token_index(root_node):
70
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
71
+ return [(root_node.start_point,root_node.end_point)]
72
+ else:
73
+ code_tokens=[]
74
+ for child in root_node.children:
75
+ code_tokens+=tree_to_token_index(child)
76
+ return code_tokens
77
+
78
+ def tree_to_variable_index(root_node,index_to_code):
79
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
80
+ index=(root_node.start_point,root_node.end_point)
81
+ _,code=index_to_code[index]
82
+ if root_node.type!=code:
83
+ return [(root_node.start_point,root_node.end_point)]
84
+ else:
85
+ return []
86
+ else:
87
+ code_tokens=[]
88
+ for child in root_node.children:
89
+ code_tokens+=tree_to_variable_index(child,index_to_code)
90
+ return code_tokens
91
+
92
+ def index_to_code_token(index,code):
93
+ start_point=index[0]
94
+ end_point=index[1]
95
+ if start_point[0]==end_point[0]:
96
+ s=code[start_point[0]][start_point[1]:end_point[1]]
97
+ else:
98
+ s=""
99
+ s+=code[start_point[0]][start_point[1]:]
100
+ for i in range(start_point[0]+1,end_point[0]):
101
+ s+=code[i]
102
+ s+=code[end_point[0]][:end_point[1]]
103
+ return s
104
+
105
+
106
+ def DFG_python(root_node,index_to_code,states):
107
+ assignment=['assignment','augmented_assignment','for_in_clause']
108
+ if_statement=['if_statement']
109
+ for_statement=['for_statement']
110
+ while_statement=['while_statement']
111
+ do_first_statement=['for_in_clause']
112
+ def_statement=['default_parameter']
113
+ states=states.copy()
114
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
115
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
116
+ if root_node.type==code:
117
+ return [],states
118
+ elif code in states:
119
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
120
+ else:
121
+ if root_node.type=='identifier':
122
+ states[code]=[idx]
123
+ return [(code,idx,'comesFrom',[],[])],states
124
+ elif root_node.type in def_statement:
125
+ name=root_node.child_by_field_name('name')
126
+ value=root_node.child_by_field_name('value')
127
+ DFG=[]
128
+ if value is None:
129
+ indexs=tree_to_variable_index(name,index_to_code)
130
+ for index in indexs:
131
+ idx,code=index_to_code[index]
132
+ DFG.append((code,idx,'comesFrom',[],[]))
133
+ states[code]=[idx]
134
+ return sorted(DFG,key=lambda x:x[1]),states
135
+ else:
136
+ name_indexs=tree_to_variable_index(name,index_to_code)
137
+ value_indexs=tree_to_variable_index(value,index_to_code)
138
+ temp,states=DFG_python(value,index_to_code,states)
139
+ DFG+=temp
140
+ for index1 in name_indexs:
141
+ idx1,code1=index_to_code[index1]
142
+ for index2 in value_indexs:
143
+ idx2,code2=index_to_code[index2]
144
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
145
+ states[code1]=[idx1]
146
+ return sorted(DFG,key=lambda x:x[1]),states
147
+ elif root_node.type in assignment:
148
+ if root_node.type=='for_in_clause':
149
+ right_nodes=[root_node.children[-1]]
150
+ left_nodes=[root_node.child_by_field_name('left')]
151
+ else:
152
+ if root_node.child_by_field_name('right') is None:
153
+ return [],states
154
+ left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
155
+ right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
156
+ if len(right_nodes)!=len(left_nodes):
157
+ left_nodes=[root_node.child_by_field_name('left')]
158
+ right_nodes=[root_node.child_by_field_name('right')]
159
+ if len(left_nodes)==0:
160
+ left_nodes=[root_node.child_by_field_name('left')]
161
+ if len(right_nodes)==0:
162
+ right_nodes=[root_node.child_by_field_name('right')]
163
+ DFG=[]
164
+ for node in right_nodes:
165
+ temp,states=DFG_python(node,index_to_code,states)
166
+ DFG+=temp
167
+
168
+ for left_node,right_node in zip(left_nodes,right_nodes):
169
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
170
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
171
+ temp=[]
172
+ for token1_index in left_tokens_index:
173
+ idx1,code1=index_to_code[token1_index]
174
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
175
+ [index_to_code[x][0] for x in right_tokens_index]))
176
+ states[code1]=[idx1]
177
+ DFG+=temp
178
+ return sorted(DFG,key=lambda x:x[1]),states
179
+ elif root_node.type in if_statement:
180
+ DFG=[]
181
+ current_states=states.copy()
182
+ others_states=[]
183
+ tag=False
184
+ if 'else' in root_node.type:
185
+ tag=True
186
+ for child in root_node.children:
187
+ if 'else' in child.type:
188
+ tag=True
189
+ if child.type not in ['elif_clause','else_clause']:
190
+ temp,current_states=DFG_python(child,index_to_code,current_states)
191
+ DFG+=temp
192
+ else:
193
+ temp,new_states=DFG_python(child,index_to_code,states)
194
+ DFG+=temp
195
+ others_states.append(new_states)
196
+ others_states.append(current_states)
197
+ if tag is False:
198
+ others_states.append(states)
199
+ new_states={}
200
+ for dic in others_states:
201
+ for key in dic:
202
+ if key not in new_states:
203
+ new_states[key]=dic[key].copy()
204
+ else:
205
+ new_states[key]+=dic[key]
206
+ for key in new_states:
207
+ new_states[key]=sorted(list(set(new_states[key])))
208
+ return sorted(DFG,key=lambda x:x[1]),new_states
209
+ elif root_node.type in for_statement:
210
+ DFG=[]
211
+ for i in range(2):
212
+ right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
213
+ left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
214
+ if len(right_nodes)!=len(left_nodes):
215
+ left_nodes=[root_node.child_by_field_name('left')]
216
+ right_nodes=[root_node.child_by_field_name('right')]
217
+ if len(left_nodes)==0:
218
+ left_nodes=[root_node.child_by_field_name('left')]
219
+ if len(right_nodes)==0:
220
+ right_nodes=[root_node.child_by_field_name('right')]
221
+ for node in right_nodes:
222
+ temp,states=DFG_python(node,index_to_code,states)
223
+ DFG+=temp
224
+ for left_node,right_node in zip(left_nodes,right_nodes):
225
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
226
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
227
+ temp=[]
228
+ for token1_index in left_tokens_index:
229
+ idx1,code1=index_to_code[token1_index]
230
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
231
+ [index_to_code[x][0] for x in right_tokens_index]))
232
+ states[code1]=[idx1]
233
+ DFG+=temp
234
+ if root_node.children[-1].type=="block":
235
+ temp,states=DFG_python(root_node.children[-1],index_to_code,states)
236
+ DFG+=temp
237
+ dic={}
238
+ for x in DFG:
239
+ if (x[0],x[1],x[2]) not in dic:
240
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
241
+ else:
242
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
243
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
244
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
245
+ return sorted(DFG,key=lambda x:x[1]),states
246
+ elif root_node.type in while_statement:
247
+ DFG=[]
248
+ for i in range(2):
249
+ for child in root_node.children:
250
+ temp,states=DFG_python(child,index_to_code,states)
251
+ DFG+=temp
252
+ dic={}
253
+ for x in DFG:
254
+ if (x[0],x[1],x[2]) not in dic:
255
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
256
+ else:
257
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
258
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
259
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
260
+ return sorted(DFG,key=lambda x:x[1]),states
261
+ else:
262
+ DFG=[]
263
+ for child in root_node.children:
264
+ if child.type in do_first_statement:
265
+ temp,states=DFG_python(child,index_to_code,states)
266
+ DFG+=temp
267
+ for child in root_node.children:
268
+ if child.type not in do_first_statement:
269
+ temp,states=DFG_python(child,index_to_code,states)
270
+ DFG+=temp
271
+
272
+ return sorted(DFG,key=lambda x:x[1]),states
273
+
274
+
275
+ def DFG_java(root_node,index_to_code,states):
276
+ assignment=['assignment_expression']
277
+ def_statement=['variable_declarator']
278
+ increment_statement=['update_expression']
279
+ if_statement=['if_statement','else']
280
+ for_statement=['for_statement']
281
+ enhanced_for_statement=['enhanced_for_statement']
282
+ while_statement=['while_statement']
283
+ do_first_statement=[]
284
+ states=states.copy()
285
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
286
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
287
+ if root_node.type==code:
288
+ return [],states
289
+ elif code in states:
290
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
291
+ else:
292
+ if root_node.type=='identifier':
293
+ states[code]=[idx]
294
+ return [(code,idx,'comesFrom',[],[])],states
295
+ elif root_node.type in def_statement:
296
+ name=root_node.child_by_field_name('name')
297
+ value=root_node.child_by_field_name('value')
298
+ DFG=[]
299
+ if value is None:
300
+ indexs=tree_to_variable_index(name,index_to_code)
301
+ for index in indexs:
302
+ idx,code=index_to_code[index]
303
+ DFG.append((code,idx,'comesFrom',[],[]))
304
+ states[code]=[idx]
305
+ return sorted(DFG,key=lambda x:x[1]),states
306
+ else:
307
+ name_indexs=tree_to_variable_index(name,index_to_code)
308
+ value_indexs=tree_to_variable_index(value,index_to_code)
309
+ temp,states=DFG_java(value,index_to_code,states)
310
+ DFG+=temp
311
+ for index1 in name_indexs:
312
+ idx1,code1=index_to_code[index1]
313
+ for index2 in value_indexs:
314
+ idx2,code2=index_to_code[index2]
315
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
316
+ states[code1]=[idx1]
317
+ return sorted(DFG,key=lambda x:x[1]),states
318
+ elif root_node.type in assignment:
319
+ left_nodes=root_node.child_by_field_name('left')
320
+ right_nodes=root_node.child_by_field_name('right')
321
+ DFG=[]
322
+ temp,states=DFG_java(right_nodes,index_to_code,states)
323
+ DFG+=temp
324
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
325
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
326
+ for index1 in name_indexs:
327
+ idx1,code1=index_to_code[index1]
328
+ for index2 in value_indexs:
329
+ idx2,code2=index_to_code[index2]
330
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
331
+ states[code1]=[idx1]
332
+ return sorted(DFG,key=lambda x:x[1]),states
333
+ elif root_node.type in increment_statement:
334
+ DFG=[]
335
+ indexs=tree_to_variable_index(root_node,index_to_code)
336
+ for index1 in indexs:
337
+ idx1,code1=index_to_code[index1]
338
+ for index2 in indexs:
339
+ idx2,code2=index_to_code[index2]
340
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
341
+ states[code1]=[idx1]
342
+ return sorted(DFG,key=lambda x:x[1]),states
343
+ elif root_node.type in if_statement:
344
+ DFG=[]
345
+ current_states=states.copy()
346
+ others_states=[]
347
+ flag=False
348
+ tag=False
349
+ if 'else' in root_node.type:
350
+ tag=True
351
+ for child in root_node.children:
352
+ if 'else' in child.type:
353
+ tag=True
354
+ if child.type not in if_statement and flag is False:
355
+ temp,current_states=DFG_java(child,index_to_code,current_states)
356
+ DFG+=temp
357
+ else:
358
+ flag=True
359
+ temp,new_states=DFG_java(child,index_to_code,states)
360
+ DFG+=temp
361
+ others_states.append(new_states)
362
+ others_states.append(current_states)
363
+ if tag is False:
364
+ others_states.append(states)
365
+ new_states={}
366
+ for dic in others_states:
367
+ for key in dic:
368
+ if key not in new_states:
369
+ new_states[key]=dic[key].copy()
370
+ else:
371
+ new_states[key]+=dic[key]
372
+ for key in new_states:
373
+ new_states[key]=sorted(list(set(new_states[key])))
374
+ return sorted(DFG,key=lambda x:x[1]),new_states
375
+ elif root_node.type in for_statement:
376
+ DFG=[]
377
+ for child in root_node.children:
378
+ temp,states=DFG_java(child,index_to_code,states)
379
+ DFG+=temp
380
+ flag=False
381
+ for child in root_node.children:
382
+ if flag:
383
+ temp,states=DFG_java(child,index_to_code,states)
384
+ DFG+=temp
385
+ elif child.type=="local_variable_declaration":
386
+ flag=True
387
+ dic={}
388
+ for x in DFG:
389
+ if (x[0],x[1],x[2]) not in dic:
390
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
391
+ else:
392
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
393
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
394
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
395
+ return sorted(DFG,key=lambda x:x[1]),states
396
+ elif root_node.type in enhanced_for_statement:
397
+ name=root_node.child_by_field_name('name')
398
+ value=root_node.child_by_field_name('value')
399
+ body=root_node.child_by_field_name('body')
400
+ DFG=[]
401
+ for i in range(2):
402
+ temp,states=DFG_java(value,index_to_code,states)
403
+ DFG+=temp
404
+ name_indexs=tree_to_variable_index(name,index_to_code)
405
+ value_indexs=tree_to_variable_index(value,index_to_code)
406
+ for index1 in name_indexs:
407
+ idx1,code1=index_to_code[index1]
408
+ for index2 in value_indexs:
409
+ idx2,code2=index_to_code[index2]
410
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
411
+ states[code1]=[idx1]
412
+ temp,states=DFG_java(body,index_to_code,states)
413
+ DFG+=temp
414
+ dic={}
415
+ for x in DFG:
416
+ if (x[0],x[1],x[2]) not in dic:
417
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
418
+ else:
419
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
420
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
421
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
422
+ return sorted(DFG,key=lambda x:x[1]),states
423
+ elif root_node.type in while_statement:
424
+ DFG=[]
425
+ for i in range(2):
426
+ for child in root_node.children:
427
+ temp,states=DFG_java(child,index_to_code,states)
428
+ DFG+=temp
429
+ dic={}
430
+ for x in DFG:
431
+ if (x[0],x[1],x[2]) not in dic:
432
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
433
+ else:
434
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
435
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
436
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
437
+ return sorted(DFG,key=lambda x:x[1]),states
438
+ else:
439
+ DFG=[]
440
+ for child in root_node.children:
441
+ if child.type in do_first_statement:
442
+ temp,states=DFG_java(child,index_to_code,states)
443
+ DFG+=temp
444
+ for child in root_node.children:
445
+ if child.type not in do_first_statement:
446
+ temp,states=DFG_java(child,index_to_code,states)
447
+ DFG+=temp
448
+
449
+ return sorted(DFG,key=lambda x:x[1]),states
450
+
451
+ def DFG_csharp(root_node,index_to_code,states):
452
+ assignment=['assignment_expression']
453
+ def_statement=['variable_declarator']
454
+ increment_statement=['postfix_unary_expression']
455
+ if_statement=['if_statement','else']
456
+ for_statement=['for_statement']
457
+ enhanced_for_statement=['for_each_statement']
458
+ while_statement=['while_statement']
459
+ do_first_statement=[]
460
+ states=states.copy()
461
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
462
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
463
+ if root_node.type==code:
464
+ return [],states
465
+ elif code in states:
466
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
467
+ else:
468
+ if root_node.type=='identifier':
469
+ states[code]=[idx]
470
+ return [(code,idx,'comesFrom',[],[])],states
471
+ elif root_node.type in def_statement:
472
+ if len(root_node.children)==2:
473
+ name=root_node.children[0]
474
+ value=root_node.children[1]
475
+ else:
476
+ name=root_node.children[0]
477
+ value=None
478
+ DFG=[]
479
+ if value is None:
480
+ indexs=tree_to_variable_index(name,index_to_code)
481
+ for index in indexs:
482
+ idx,code=index_to_code[index]
483
+ DFG.append((code,idx,'comesFrom',[],[]))
484
+ states[code]=[idx]
485
+ return sorted(DFG,key=lambda x:x[1]),states
486
+ else:
487
+ name_indexs=tree_to_variable_index(name,index_to_code)
488
+ value_indexs=tree_to_variable_index(value,index_to_code)
489
+ temp,states=DFG_csharp(value,index_to_code,states)
490
+ DFG+=temp
491
+ for index1 in name_indexs:
492
+ idx1,code1=index_to_code[index1]
493
+ for index2 in value_indexs:
494
+ idx2,code2=index_to_code[index2]
495
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
496
+ states[code1]=[idx1]
497
+ return sorted(DFG,key=lambda x:x[1]),states
498
+ elif root_node.type in assignment:
499
+ left_nodes=root_node.child_by_field_name('left')
500
+ right_nodes=root_node.child_by_field_name('right')
501
+ DFG=[]
502
+ temp,states=DFG_csharp(right_nodes,index_to_code,states)
503
+ DFG+=temp
504
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
505
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
506
+ for index1 in name_indexs:
507
+ idx1,code1=index_to_code[index1]
508
+ for index2 in value_indexs:
509
+ idx2,code2=index_to_code[index2]
510
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
511
+ states[code1]=[idx1]
512
+ return sorted(DFG,key=lambda x:x[1]),states
513
+ elif root_node.type in increment_statement:
514
+ DFG=[]
515
+ indexs=tree_to_variable_index(root_node,index_to_code)
516
+ for index1 in indexs:
517
+ idx1,code1=index_to_code[index1]
518
+ for index2 in indexs:
519
+ idx2,code2=index_to_code[index2]
520
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
521
+ states[code1]=[idx1]
522
+ return sorted(DFG,key=lambda x:x[1]),states
523
+ elif root_node.type in if_statement:
524
+ DFG=[]
525
+ current_states=states.copy()
526
+ others_states=[]
527
+ flag=False
528
+ tag=False
529
+ if 'else' in root_node.type:
530
+ tag=True
531
+ for child in root_node.children:
532
+ if 'else' in child.type:
533
+ tag=True
534
+ if child.type not in if_statement and flag is False:
535
+ temp,current_states=DFG_csharp(child,index_to_code,current_states)
536
+ DFG+=temp
537
+ else:
538
+ flag=True
539
+ temp,new_states=DFG_csharp(child,index_to_code,states)
540
+ DFG+=temp
541
+ others_states.append(new_states)
542
+ others_states.append(current_states)
543
+ if tag is False:
544
+ others_states.append(states)
545
+ new_states={}
546
+ for dic in others_states:
547
+ for key in dic:
548
+ if key not in new_states:
549
+ new_states[key]=dic[key].copy()
550
+ else:
551
+ new_states[key]+=dic[key]
552
+ for key in new_states:
553
+ new_states[key]=sorted(list(set(new_states[key])))
554
+ return sorted(DFG,key=lambda x:x[1]),new_states
555
+ elif root_node.type in for_statement:
556
+ DFG=[]
557
+ for child in root_node.children:
558
+ temp,states=DFG_csharp(child,index_to_code,states)
559
+ DFG+=temp
560
+ flag=False
561
+ for child in root_node.children:
562
+ if flag:
563
+ temp,states=DFG_csharp(child,index_to_code,states)
564
+ DFG+=temp
565
+ elif child.type=="local_variable_declaration":
566
+ flag=True
567
+ dic={}
568
+ for x in DFG:
569
+ if (x[0],x[1],x[2]) not in dic:
570
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
571
+ else:
572
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
573
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
574
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
575
+ return sorted(DFG,key=lambda x:x[1]),states
576
+ elif root_node.type in enhanced_for_statement:
577
+ name=root_node.child_by_field_name('left')
578
+ value=root_node.child_by_field_name('right')
579
+ body=root_node.child_by_field_name('body')
580
+ DFG=[]
581
+ for i in range(2):
582
+ temp,states=DFG_csharp(value,index_to_code,states)
583
+ DFG+=temp
584
+ name_indexs=tree_to_variable_index(name,index_to_code)
585
+ value_indexs=tree_to_variable_index(value,index_to_code)
586
+ for index1 in name_indexs:
587
+ idx1,code1=index_to_code[index1]
588
+ for index2 in value_indexs:
589
+ idx2,code2=index_to_code[index2]
590
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
591
+ states[code1]=[idx1]
592
+ temp,states=DFG_csharp(body,index_to_code,states)
593
+ DFG+=temp
594
+ dic={}
595
+ for x in DFG:
596
+ if (x[0],x[1],x[2]) not in dic:
597
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
598
+ else:
599
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
600
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
601
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
602
+ return sorted(DFG,key=lambda x:x[1]),states
603
+ elif root_node.type in while_statement:
604
+ DFG=[]
605
+ for i in range(2):
606
+ for child in root_node.children:
607
+ temp,states=DFG_csharp(child,index_to_code,states)
608
+ DFG+=temp
609
+ dic={}
610
+ for x in DFG:
611
+ if (x[0],x[1],x[2]) not in dic:
612
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
613
+ else:
614
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
615
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
616
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
617
+ return sorted(DFG,key=lambda x:x[1]),states
618
+ else:
619
+ DFG=[]
620
+ for child in root_node.children:
621
+ if child.type in do_first_statement:
622
+ temp,states=DFG_csharp(child,index_to_code,states)
623
+ DFG+=temp
624
+ for child in root_node.children:
625
+ if child.type not in do_first_statement:
626
+ temp,states=DFG_csharp(child,index_to_code,states)
627
+ DFG+=temp
628
+
629
+ return sorted(DFG,key=lambda x:x[1]),states
630
+
631
+
632
+
633
+
634
+ def DFG_ruby(root_node,index_to_code,states):
635
+ assignment=['assignment','operator_assignment']
636
+ if_statement=['if','elsif','else','unless','when']
637
+ for_statement=['for']
638
+ while_statement=['while_modifier','until']
639
+ do_first_statement=[]
640
+ def_statement=['keyword_parameter']
641
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
642
+ states=states.copy()
643
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
644
+ if root_node.type==code:
645
+ return [],states
646
+ elif code in states:
647
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
648
+ else:
649
+ if root_node.type=='identifier':
650
+ states[code]=[idx]
651
+ return [(code,idx,'comesFrom',[],[])],states
652
+ elif root_node.type in def_statement:
653
+ name=root_node.child_by_field_name('name')
654
+ value=root_node.child_by_field_name('value')
655
+ DFG=[]
656
+ if value is None:
657
+ indexs=tree_to_variable_index(name,index_to_code)
658
+ for index in indexs:
659
+ idx,code=index_to_code[index]
660
+ DFG.append((code,idx,'comesFrom',[],[]))
661
+ states[code]=[idx]
662
+ return sorted(DFG,key=lambda x:x[1]),states
663
+ else:
664
+ name_indexs=tree_to_variable_index(name,index_to_code)
665
+ value_indexs=tree_to_variable_index(value,index_to_code)
666
+ temp,states=DFG_ruby(value,index_to_code,states)
667
+ DFG+=temp
668
+ for index1 in name_indexs:
669
+ idx1,code1=index_to_code[index1]
670
+ for index2 in value_indexs:
671
+ idx2,code2=index_to_code[index2]
672
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
673
+ states[code1]=[idx1]
674
+ return sorted(DFG,key=lambda x:x[1]),states
675
+ elif root_node.type in assignment:
676
+ left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
677
+ right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
678
+ if len(right_nodes)!=len(left_nodes):
679
+ left_nodes=[root_node.child_by_field_name('left')]
680
+ right_nodes=[root_node.child_by_field_name('right')]
681
+ if len(left_nodes)==0:
682
+ left_nodes=[root_node.child_by_field_name('left')]
683
+ if len(right_nodes)==0:
684
+ right_nodes=[root_node.child_by_field_name('right')]
685
+ if root_node.type=="operator_assignment":
686
+ left_nodes=[root_node.children[0]]
687
+ right_nodes=[root_node.children[-1]]
688
+
689
+ DFG=[]
690
+ for node in right_nodes:
691
+ temp,states=DFG_ruby(node,index_to_code,states)
692
+ DFG+=temp
693
+
694
+ for left_node,right_node in zip(left_nodes,right_nodes):
695
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
696
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
697
+ temp=[]
698
+ for token1_index in left_tokens_index:
699
+ idx1,code1=index_to_code[token1_index]
700
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
701
+ [index_to_code[x][0] for x in right_tokens_index]))
702
+ states[code1]=[idx1]
703
+ DFG+=temp
704
+ return sorted(DFG,key=lambda x:x[1]),states
705
+ elif root_node.type in if_statement:
706
+ DFG=[]
707
+ current_states=states.copy()
708
+ others_states=[]
709
+ tag=False
710
+ if 'else' in root_node.type:
711
+ tag=True
712
+ for child in root_node.children:
713
+ if 'else' in child.type:
714
+ tag=True
715
+ if child.type not in if_statement:
716
+ temp,current_states=DFG_ruby(child,index_to_code,current_states)
717
+ DFG+=temp
718
+ else:
719
+ temp,new_states=DFG_ruby(child,index_to_code,states)
720
+ DFG+=temp
721
+ others_states.append(new_states)
722
+ others_states.append(current_states)
723
+ if tag is False:
724
+ others_states.append(states)
725
+ new_states={}
726
+ for dic in others_states:
727
+ for key in dic:
728
+ if key not in new_states:
729
+ new_states[key]=dic[key].copy()
730
+ else:
731
+ new_states[key]+=dic[key]
732
+ for key in new_states:
733
+ new_states[key]=sorted(list(set(new_states[key])))
734
+ return sorted(DFG,key=lambda x:x[1]),new_states
735
+ elif root_node.type in for_statement:
736
+ DFG=[]
737
+ for i in range(2):
738
+ left_nodes=[root_node.child_by_field_name('pattern')]
739
+ right_nodes=[root_node.child_by_field_name('value')]
740
+ assert len(right_nodes)==len(left_nodes)
741
+ for node in right_nodes:
742
+ temp,states=DFG_ruby(node,index_to_code,states)
743
+ DFG+=temp
744
+ for left_node,right_node in zip(left_nodes,right_nodes):
745
+ left_tokens_index=tree_to_variable_index(left_node,index_to_code)
746
+ right_tokens_index=tree_to_variable_index(right_node,index_to_code)
747
+ temp=[]
748
+ for token1_index in left_tokens_index:
749
+ idx1,code1=index_to_code[token1_index]
750
+ temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
751
+ [index_to_code[x][0] for x in right_tokens_index]))
752
+ states[code1]=[idx1]
753
+ DFG+=temp
754
+ temp,states=DFG_ruby(root_node.child_by_field_name('body'),index_to_code,states)
755
+ DFG+=temp
756
+ dic={}
757
+ for x in DFG:
758
+ if (x[0],x[1],x[2]) not in dic:
759
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
760
+ else:
761
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
762
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
763
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
764
+ return sorted(DFG,key=lambda x:x[1]),states
765
+ elif root_node.type in while_statement:
766
+ DFG=[]
767
+ for i in range(2):
768
+ for child in root_node.children:
769
+ temp,states=DFG_ruby(child,index_to_code,states)
770
+ DFG+=temp
771
+ dic={}
772
+ for x in DFG:
773
+ if (x[0],x[1],x[2]) not in dic:
774
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
775
+ else:
776
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
777
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
778
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
779
+ return sorted(DFG,key=lambda x:x[1]),states
780
+ else:
781
+ DFG=[]
782
+ for child in root_node.children:
783
+ if child.type in do_first_statement:
784
+ temp,states=DFG_ruby(child,index_to_code,states)
785
+ DFG+=temp
786
+ for child in root_node.children:
787
+ if child.type not in do_first_statement:
788
+ temp,states=DFG_ruby(child,index_to_code,states)
789
+ DFG+=temp
790
+
791
+ return sorted(DFG,key=lambda x:x[1]),states
792
+
793
+ def DFG_go(root_node,index_to_code,states):
794
+ assignment=['assignment_statement',]
795
+ def_statement=['var_spec']
796
+ increment_statement=['inc_statement']
797
+ if_statement=['if_statement','else']
798
+ for_statement=['for_statement']
799
+ enhanced_for_statement=[]
800
+ while_statement=[]
801
+ do_first_statement=[]
802
+ states=states.copy()
803
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
804
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
805
+ if root_node.type==code:
806
+ return [],states
807
+ elif code in states:
808
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
809
+ else:
810
+ if root_node.type=='identifier':
811
+ states[code]=[idx]
812
+ return [(code,idx,'comesFrom',[],[])],states
813
+ elif root_node.type in def_statement:
814
+ name=root_node.child_by_field_name('name')
815
+ value=root_node.child_by_field_name('value')
816
+ DFG=[]
817
+ if value is None:
818
+ indexs=tree_to_variable_index(name,index_to_code)
819
+ for index in indexs:
820
+ idx,code=index_to_code[index]
821
+ DFG.append((code,idx,'comesFrom',[],[]))
822
+ states[code]=[idx]
823
+ return sorted(DFG,key=lambda x:x[1]),states
824
+ else:
825
+ name_indexs=tree_to_variable_index(name,index_to_code)
826
+ value_indexs=tree_to_variable_index(value,index_to_code)
827
+ temp,states=DFG_go(value,index_to_code,states)
828
+ DFG+=temp
829
+ for index1 in name_indexs:
830
+ idx1,code1=index_to_code[index1]
831
+ for index2 in value_indexs:
832
+ idx2,code2=index_to_code[index2]
833
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
834
+ states[code1]=[idx1]
835
+ return sorted(DFG,key=lambda x:x[1]),states
836
+ elif root_node.type in assignment:
837
+ left_nodes=root_node.child_by_field_name('left')
838
+ right_nodes=root_node.child_by_field_name('right')
839
+ DFG=[]
840
+ temp,states=DFG_go(right_nodes,index_to_code,states)
841
+ DFG+=temp
842
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
843
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
844
+ for index1 in name_indexs:
845
+ idx1,code1=index_to_code[index1]
846
+ for index2 in value_indexs:
847
+ idx2,code2=index_to_code[index2]
848
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
849
+ states[code1]=[idx1]
850
+ return sorted(DFG,key=lambda x:x[1]),states
851
+ elif root_node.type in increment_statement:
852
+ DFG=[]
853
+ indexs=tree_to_variable_index(root_node,index_to_code)
854
+ for index1 in indexs:
855
+ idx1,code1=index_to_code[index1]
856
+ for index2 in indexs:
857
+ idx2,code2=index_to_code[index2]
858
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
859
+ states[code1]=[idx1]
860
+ return sorted(DFG,key=lambda x:x[1]),states
861
+ elif root_node.type in if_statement:
862
+ DFG=[]
863
+ current_states=states.copy()
864
+ others_states=[]
865
+ flag=False
866
+ tag=False
867
+ if 'else' in root_node.type:
868
+ tag=True
869
+ for child in root_node.children:
870
+ if 'else' in child.type:
871
+ tag=True
872
+ if child.type not in if_statement and flag is False:
873
+ temp,current_states=DFG_go(child,index_to_code,current_states)
874
+ DFG+=temp
875
+ else:
876
+ flag=True
877
+ temp,new_states=DFG_go(child,index_to_code,states)
878
+ DFG+=temp
879
+ others_states.append(new_states)
880
+ others_states.append(current_states)
881
+ if tag is False:
882
+ others_states.append(states)
883
+ new_states={}
884
+ for dic in others_states:
885
+ for key in dic:
886
+ if key not in new_states:
887
+ new_states[key]=dic[key].copy()
888
+ else:
889
+ new_states[key]+=dic[key]
890
+ for key in states:
891
+ if key not in new_states:
892
+ new_states[key]=states[key]
893
+ else:
894
+ new_states[key]+=states[key]
895
+ for key in new_states:
896
+ new_states[key]=sorted(list(set(new_states[key])))
897
+ return sorted(DFG,key=lambda x:x[1]),new_states
898
+ elif root_node.type in for_statement:
899
+ DFG=[]
900
+ for child in root_node.children:
901
+ temp,states=DFG_go(child,index_to_code,states)
902
+ DFG+=temp
903
+ flag=False
904
+ for child in root_node.children:
905
+ if flag:
906
+ temp,states=DFG_go(child,index_to_code,states)
907
+ DFG+=temp
908
+ elif child.type=="for_clause":
909
+ if child.child_by_field_name('update') is not None:
910
+ temp,states=DFG_go(child.child_by_field_name('update'),index_to_code,states)
911
+ DFG+=temp
912
+ flag=True
913
+ dic={}
914
+ for x in DFG:
915
+ if (x[0],x[1],x[2]) not in dic:
916
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
917
+ else:
918
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
919
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
920
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
921
+ return sorted(DFG,key=lambda x:x[1]),states
922
+ else:
923
+ DFG=[]
924
+ for child in root_node.children:
925
+ if child.type in do_first_statement:
926
+ temp,states=DFG_go(child,index_to_code,states)
927
+ DFG+=temp
928
+ for child in root_node.children:
929
+ if child.type not in do_first_statement:
930
+ temp,states=DFG_go(child,index_to_code,states)
931
+ DFG+=temp
932
+
933
+ return sorted(DFG,key=lambda x:x[1]),states
934
+
935
+
936
+
937
+
938
+ def DFG_php(root_node,index_to_code,states):
939
+ assignment=['assignment_expression','augmented_assignment_expression']
940
+ def_statement=['simple_parameter']
941
+ increment_statement=['update_expression']
942
+ if_statement=['if_statement','else_clause']
943
+ for_statement=['for_statement']
944
+ enhanced_for_statement=['foreach_statement']
945
+ while_statement=['while_statement']
946
+ do_first_statement=[]
947
+ states=states.copy()
948
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
949
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
950
+ if root_node.type==code:
951
+ return [],states
952
+ elif code in states:
953
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
954
+ else:
955
+ if root_node.type=='identifier':
956
+ states[code]=[idx]
957
+ return [(code,idx,'comesFrom',[],[])],states
958
+ elif root_node.type in def_statement:
959
+ name=root_node.child_by_field_name('name')
960
+ value=root_node.child_by_field_name('default_value')
961
+ DFG=[]
962
+ if value is None:
963
+ indexs=tree_to_variable_index(name,index_to_code)
964
+ for index in indexs:
965
+ idx,code=index_to_code[index]
966
+ DFG.append((code,idx,'comesFrom',[],[]))
967
+ states[code]=[idx]
968
+ return sorted(DFG,key=lambda x:x[1]),states
969
+ else:
970
+ name_indexs=tree_to_variable_index(name,index_to_code)
971
+ value_indexs=tree_to_variable_index(value,index_to_code)
972
+ temp,states=DFG_php(value,index_to_code,states)
973
+ DFG+=temp
974
+ for index1 in name_indexs:
975
+ idx1,code1=index_to_code[index1]
976
+ for index2 in value_indexs:
977
+ idx2,code2=index_to_code[index2]
978
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
979
+ states[code1]=[idx1]
980
+ return sorted(DFG,key=lambda x:x[1]),states
981
+ elif root_node.type in assignment:
982
+ left_nodes=root_node.child_by_field_name('left')
983
+ right_nodes=root_node.child_by_field_name('right')
984
+ DFG=[]
985
+ temp,states=DFG_php(right_nodes,index_to_code,states)
986
+ DFG+=temp
987
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
988
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
989
+ for index1 in name_indexs:
990
+ idx1,code1=index_to_code[index1]
991
+ for index2 in value_indexs:
992
+ idx2,code2=index_to_code[index2]
993
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
994
+ states[code1]=[idx1]
995
+ return sorted(DFG,key=lambda x:x[1]),states
996
+ elif root_node.type in increment_statement:
997
+ DFG=[]
998
+ indexs=tree_to_variable_index(root_node,index_to_code)
999
+ for index1 in indexs:
1000
+ idx1,code1=index_to_code[index1]
1001
+ for index2 in indexs:
1002
+ idx2,code2=index_to_code[index2]
1003
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1004
+ states[code1]=[idx1]
1005
+ return sorted(DFG,key=lambda x:x[1]),states
1006
+ elif root_node.type in if_statement:
1007
+ DFG=[]
1008
+ current_states=states.copy()
1009
+ others_states=[]
1010
+ flag=False
1011
+ tag=False
1012
+ if 'else' in root_node.type:
1013
+ tag=True
1014
+ for child in root_node.children:
1015
+ if 'else' in child.type:
1016
+ tag=True
1017
+ if child.type not in if_statement and flag is False:
1018
+ temp,current_states=DFG_php(child,index_to_code,current_states)
1019
+ DFG+=temp
1020
+ else:
1021
+ flag=True
1022
+ temp,new_states=DFG_php(child,index_to_code,states)
1023
+ DFG+=temp
1024
+ others_states.append(new_states)
1025
+ others_states.append(current_states)
1026
+ new_states={}
1027
+ for dic in others_states:
1028
+ for key in dic:
1029
+ if key not in new_states:
1030
+ new_states[key]=dic[key].copy()
1031
+ else:
1032
+ new_states[key]+=dic[key]
1033
+ for key in states:
1034
+ if key not in new_states:
1035
+ new_states[key]=states[key]
1036
+ else:
1037
+ new_states[key]+=states[key]
1038
+ for key in new_states:
1039
+ new_states[key]=sorted(list(set(new_states[key])))
1040
+ return sorted(DFG,key=lambda x:x[1]),new_states
1041
+ elif root_node.type in for_statement:
1042
+ DFG=[]
1043
+ for child in root_node.children:
1044
+ temp,states=DFG_php(child,index_to_code,states)
1045
+ DFG+=temp
1046
+ flag=False
1047
+ for child in root_node.children:
1048
+ if flag:
1049
+ temp,states=DFG_php(child,index_to_code,states)
1050
+ DFG+=temp
1051
+ elif child.type=="assignment_expression":
1052
+ flag=True
1053
+ dic={}
1054
+ for x in DFG:
1055
+ if (x[0],x[1],x[2]) not in dic:
1056
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1057
+ else:
1058
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1059
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1060
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1061
+ return sorted(DFG,key=lambda x:x[1]),states
1062
+ elif root_node.type in enhanced_for_statement:
1063
+ name=None
1064
+ value=None
1065
+ for child in root_node.children:
1066
+ if child.type=='variable_name' and value is None:
1067
+ value=child
1068
+ elif child.type=='variable_name' and name is None:
1069
+ name=child
1070
+ break
1071
+ body=root_node.child_by_field_name('body')
1072
+ DFG=[]
1073
+ for i in range(2):
1074
+ temp,states=DFG_php(value,index_to_code,states)
1075
+ DFG+=temp
1076
+ name_indexs=tree_to_variable_index(name,index_to_code)
1077
+ value_indexs=tree_to_variable_index(value,index_to_code)
1078
+ for index1 in name_indexs:
1079
+ idx1,code1=index_to_code[index1]
1080
+ for index2 in value_indexs:
1081
+ idx2,code2=index_to_code[index2]
1082
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1083
+ states[code1]=[idx1]
1084
+ temp,states=DFG_php(body,index_to_code,states)
1085
+ DFG+=temp
1086
+ dic={}
1087
+ for x in DFG:
1088
+ if (x[0],x[1],x[2]) not in dic:
1089
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1090
+ else:
1091
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1092
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1093
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1094
+ return sorted(DFG,key=lambda x:x[1]),states
1095
+ elif root_node.type in while_statement:
1096
+ DFG=[]
1097
+ for i in range(2):
1098
+ for child in root_node.children:
1099
+ temp,states=DFG_php(child,index_to_code,states)
1100
+ DFG+=temp
1101
+ dic={}
1102
+ for x in DFG:
1103
+ if (x[0],x[1],x[2]) not in dic:
1104
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1105
+ else:
1106
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1107
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1108
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1109
+ return sorted(DFG,key=lambda x:x[1]),states
1110
+ else:
1111
+ DFG=[]
1112
+ for child in root_node.children:
1113
+ if child.type in do_first_statement:
1114
+ temp,states=DFG_php(child,index_to_code,states)
1115
+ DFG+=temp
1116
+ for child in root_node.children:
1117
+ if child.type not in do_first_statement:
1118
+ temp,states=DFG_php(child,index_to_code,states)
1119
+ DFG+=temp
1120
+
1121
+ return sorted(DFG,key=lambda x:x[1]),states
1122
+
1123
+
1124
+ def DFG_javascript(root_node,index_to_code,states):
1125
+ assignment=['assignment_pattern','augmented_assignment_expression']
1126
+ def_statement=['variable_declarator']
1127
+ increment_statement=['update_expression']
1128
+ if_statement=['if_statement','else']
1129
+ for_statement=['for_statement']
1130
+ enhanced_for_statement=[]
1131
+ while_statement=['while_statement']
1132
+ do_first_statement=[]
1133
+ states=states.copy()
1134
+ if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
1135
+ idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
1136
+ if root_node.type==code:
1137
+ return [],states
1138
+ elif code in states:
1139
+ return [(code,idx,'comesFrom',[code],states[code].copy())],states
1140
+ else:
1141
+ if root_node.type=='identifier':
1142
+ states[code]=[idx]
1143
+ return [(code,idx,'comesFrom',[],[])],states
1144
+ elif root_node.type in def_statement:
1145
+ name=root_node.child_by_field_name('name')
1146
+ value=root_node.child_by_field_name('value')
1147
+ DFG=[]
1148
+ if value is None:
1149
+ indexs=tree_to_variable_index(name,index_to_code)
1150
+ for index in indexs:
1151
+ idx,code=index_to_code[index]
1152
+ DFG.append((code,idx,'comesFrom',[],[]))
1153
+ states[code]=[idx]
1154
+ return sorted(DFG,key=lambda x:x[1]),states
1155
+ else:
1156
+ name_indexs=tree_to_variable_index(name,index_to_code)
1157
+ value_indexs=tree_to_variable_index(value,index_to_code)
1158
+ temp,states=DFG_javascript(value,index_to_code,states)
1159
+ DFG+=temp
1160
+ for index1 in name_indexs:
1161
+ idx1,code1=index_to_code[index1]
1162
+ for index2 in value_indexs:
1163
+ idx2,code2=index_to_code[index2]
1164
+ DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
1165
+ states[code1]=[idx1]
1166
+ return sorted(DFG,key=lambda x:x[1]),states
1167
+ elif root_node.type in assignment:
1168
+ left_nodes=root_node.child_by_field_name('left')
1169
+ right_nodes=root_node.child_by_field_name('right')
1170
+ DFG=[]
1171
+ temp,states=DFG_javascript(right_nodes,index_to_code,states)
1172
+ DFG+=temp
1173
+ name_indexs=tree_to_variable_index(left_nodes,index_to_code)
1174
+ value_indexs=tree_to_variable_index(right_nodes,index_to_code)
1175
+ for index1 in name_indexs:
1176
+ idx1,code1=index_to_code[index1]
1177
+ for index2 in value_indexs:
1178
+ idx2,code2=index_to_code[index2]
1179
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1180
+ states[code1]=[idx1]
1181
+ return sorted(DFG,key=lambda x:x[1]),states
1182
+ elif root_node.type in increment_statement:
1183
+ DFG=[]
1184
+ indexs=tree_to_variable_index(root_node,index_to_code)
1185
+ for index1 in indexs:
1186
+ idx1,code1=index_to_code[index1]
1187
+ for index2 in indexs:
1188
+ idx2,code2=index_to_code[index2]
1189
+ DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
1190
+ states[code1]=[idx1]
1191
+ return sorted(DFG,key=lambda x:x[1]),states
1192
+ elif root_node.type in if_statement:
1193
+ DFG=[]
1194
+ current_states=states.copy()
1195
+ others_states=[]
1196
+ flag=False
1197
+ tag=False
1198
+ if 'else' in root_node.type:
1199
+ tag=True
1200
+ for child in root_node.children:
1201
+ if 'else' in child.type:
1202
+ tag=True
1203
+ if child.type not in if_statement and flag is False:
1204
+ temp,current_states=DFG_javascript(child,index_to_code,current_states)
1205
+ DFG+=temp
1206
+ else:
1207
+ flag=True
1208
+ temp,new_states=DFG_javascript(child,index_to_code,states)
1209
+ DFG+=temp
1210
+ others_states.append(new_states)
1211
+ others_states.append(current_states)
1212
+ if tag is False:
1213
+ others_states.append(states)
1214
+ new_states={}
1215
+ for dic in others_states:
1216
+ for key in dic:
1217
+ if key not in new_states:
1218
+ new_states[key]=dic[key].copy()
1219
+ else:
1220
+ new_states[key]+=dic[key]
1221
+ for key in states:
1222
+ if key not in new_states:
1223
+ new_states[key]=states[key]
1224
+ else:
1225
+ new_states[key]+=states[key]
1226
+ for key in new_states:
1227
+ new_states[key]=sorted(list(set(new_states[key])))
1228
+ return sorted(DFG,key=lambda x:x[1]),new_states
1229
+ elif root_node.type in for_statement:
1230
+ DFG=[]
1231
+ for child in root_node.children:
1232
+ temp,states=DFG_javascript(child,index_to_code,states)
1233
+ DFG+=temp
1234
+ flag=False
1235
+ for child in root_node.children:
1236
+ if flag:
1237
+ temp,states=DFG_javascript(child,index_to_code,states)
1238
+ DFG+=temp
1239
+ elif child.type=="variable_declaration":
1240
+ flag=True
1241
+ dic={}
1242
+ for x in DFG:
1243
+ if (x[0],x[1],x[2]) not in dic:
1244
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1245
+ else:
1246
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1247
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1248
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1249
+ return sorted(DFG,key=lambda x:x[1]),states
1250
+ elif root_node.type in while_statement:
1251
+ DFG=[]
1252
+ for i in range(2):
1253
+ for child in root_node.children:
1254
+ temp,states=DFG_javascript(child,index_to_code,states)
1255
+ DFG+=temp
1256
+ dic={}
1257
+ for x in DFG:
1258
+ if (x[0],x[1],x[2]) not in dic:
1259
+ dic[(x[0],x[1],x[2])]=[x[3],x[4]]
1260
+ else:
1261
+ dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
1262
+ dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
1263
+ DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
1264
+ return sorted(DFG,key=lambda x:x[1]),states
1265
+ else:
1266
+ DFG=[]
1267
+ for child in root_node.children:
1268
+ if child.type in do_first_statement:
1269
+ temp,states=DFG_javascript(child,index_to_code,states)
1270
+ DFG+=temp
1271
+ for child in root_node.children:
1272
+ if child.type not in do_first_statement:
1273
+ temp,states=DFG_javascript(child,index_to_code,states)
1274
+ DFG+=temp
1275
+
1276
+ return sorted(DFG,key=lambda x:x[1]),states
1277
+
1278
  dfg_function={
1279
  'python':DFG_python,
1280
  'java':DFG_java,