codebleu / parser_DFG.py
dvitel's picture
init
dbf7be3
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from tree_sitter import Language, Parser
from .parser_utils import (remove_comments_and_docstrings,
tree_to_token_index,
index_to_code_token,
tree_to_variable_index)
def DFG_python(root_node,index_to_code,states):
assignment=['assignment','augmented_assignment','for_in_clause']
if_statement=['if_statement']
for_statement=['for_statement']
while_statement=['while_statement']
do_first_statement=['for_in_clause']
def_statement=['default_parameter']
states=states.copy()
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('value')
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_python(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
if root_node.type=='for_in_clause':
right_nodes=[root_node.children[-1]]
left_nodes=[root_node.child_by_field_name('left')]
else:
if root_node.child_by_field_name('right') is None:
return [],states
left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
if len(right_nodes)!=len(left_nodes):
left_nodes=[root_node.child_by_field_name('left')]
right_nodes=[root_node.child_by_field_name('right')]
if len(left_nodes)==0:
left_nodes=[root_node.child_by_field_name('left')]
if len(right_nodes)==0:
right_nodes=[root_node.child_by_field_name('right')]
DFG=[]
for node in right_nodes:
temp,states=DFG_python(node,index_to_code,states)
DFG+=temp
for left_node,right_node in zip(left_nodes,right_nodes):
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
temp=[]
for token1_index in left_tokens_index:
idx1,code1=index_to_code[token1_index]
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
[index_to_code[x][0] for x in right_tokens_index]))
states[code1]=[idx1]
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in ['elif_clause','else_clause']:
temp,current_states=DFG_python(child,index_to_code,current_states)
DFG+=temp
else:
temp,new_states=DFG_python(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
if tag is False:
others_states.append(states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for i in range(2):
right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
if len(right_nodes)!=len(left_nodes):
left_nodes=[root_node.child_by_field_name('left')]
right_nodes=[root_node.child_by_field_name('right')]
if len(left_nodes)==0:
left_nodes=[root_node.child_by_field_name('left')]
if len(right_nodes)==0:
right_nodes=[root_node.child_by_field_name('right')]
for node in right_nodes:
temp,states=DFG_python(node,index_to_code,states)
DFG+=temp
for left_node,right_node in zip(left_nodes,right_nodes):
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
temp=[]
for token1_index in left_tokens_index:
idx1,code1=index_to_code[token1_index]
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
[index_to_code[x][0] for x in right_tokens_index]))
states[code1]=[idx1]
DFG+=temp
if root_node.children[-1].type=="block":
temp,states=DFG_python(root_node.children[-1],index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in while_statement:
DFG=[]
for i in range(2):
for child in root_node.children:
temp,states=DFG_python(child,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_python(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_python(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
def DFG_java(root_node,index_to_code,states):
assignment=['assignment_expression']
def_statement=['variable_declarator']
increment_statement=['update_expression']
if_statement=['if_statement','else']
for_statement=['for_statement']
enhanced_for_statement=['enhanced_for_statement']
while_statement=['while_statement']
do_first_statement=[]
states=states.copy()
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('value')
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_java(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
left_nodes=root_node.child_by_field_name('left')
right_nodes=root_node.child_by_field_name('right')
DFG=[]
temp,states=DFG_java(right_nodes,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in increment_statement:
DFG=[]
indexs=tree_to_variable_index(root_node,index_to_code)
for index1 in indexs:
idx1,code1=index_to_code[index1]
for index2 in indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
flag=False
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in if_statement and flag is False:
temp,current_states=DFG_java(child,index_to_code,current_states)
DFG+=temp
else:
flag=True
temp,new_states=DFG_java(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
if tag is False:
others_states.append(states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for child in root_node.children:
temp,states=DFG_java(child,index_to_code,states)
DFG+=temp
flag=False
for child in root_node.children:
if flag:
temp,states=DFG_java(child,index_to_code,states)
DFG+=temp
elif child.type=="local_variable_declaration":
flag=True
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in enhanced_for_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('value')
body=root_node.child_by_field_name('body')
DFG=[]
for i in range(2):
temp,states=DFG_java(value,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
temp,states=DFG_java(body,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in while_statement:
DFG=[]
for i in range(2):
for child in root_node.children:
temp,states=DFG_java(child,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_java(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_java(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
def DFG_csharp(root_node,index_to_code,states):
assignment=['assignment_expression']
def_statement=['variable_declarator']
increment_statement=['postfix_unary_expression']
if_statement=['if_statement','else']
for_statement=['for_statement']
enhanced_for_statement=['for_each_statement']
while_statement=['while_statement']
do_first_statement=[]
states=states.copy()
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
if len(root_node.children)==2:
name=root_node.children[0]
value=root_node.children[1]
else:
name=root_node.children[0]
value=None
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_csharp(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
left_nodes=root_node.child_by_field_name('left')
right_nodes=root_node.child_by_field_name('right')
DFG=[]
temp,states=DFG_csharp(right_nodes,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in increment_statement:
DFG=[]
indexs=tree_to_variable_index(root_node,index_to_code)
for index1 in indexs:
idx1,code1=index_to_code[index1]
for index2 in indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
flag=False
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in if_statement and flag is False:
temp,current_states=DFG_csharp(child,index_to_code,current_states)
DFG+=temp
else:
flag=True
temp,new_states=DFG_csharp(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
if tag is False:
others_states.append(states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for child in root_node.children:
temp,states=DFG_csharp(child,index_to_code,states)
DFG+=temp
flag=False
for child in root_node.children:
if flag:
temp,states=DFG_csharp(child,index_to_code,states)
DFG+=temp
elif child.type=="local_variable_declaration":
flag=True
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in enhanced_for_statement:
name=root_node.child_by_field_name('left')
value=root_node.child_by_field_name('right')
body=root_node.child_by_field_name('body')
DFG=[]
for i in range(2):
temp,states=DFG_csharp(value,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
temp,states=DFG_csharp(body,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in while_statement:
DFG=[]
for i in range(2):
for child in root_node.children:
temp,states=DFG_csharp(child,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_csharp(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_csharp(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
def DFG_ruby(root_node,index_to_code,states):
assignment=['assignment','operator_assignment']
if_statement=['if','elsif','else','unless','when']
for_statement=['for']
while_statement=['while_modifier','until']
do_first_statement=[]
def_statement=['keyword_parameter']
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
states=states.copy()
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('value')
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_ruby(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
left_nodes=[x for x in root_node.child_by_field_name('left').children if x.type!=',']
right_nodes=[x for x in root_node.child_by_field_name('right').children if x.type!=',']
if len(right_nodes)!=len(left_nodes):
left_nodes=[root_node.child_by_field_name('left')]
right_nodes=[root_node.child_by_field_name('right')]
if len(left_nodes)==0:
left_nodes=[root_node.child_by_field_name('left')]
if len(right_nodes)==0:
right_nodes=[root_node.child_by_field_name('right')]
if root_node.type=="operator_assignment":
left_nodes=[root_node.children[0]]
right_nodes=[root_node.children[-1]]
DFG=[]
for node in right_nodes:
temp,states=DFG_ruby(node,index_to_code,states)
DFG+=temp
for left_node,right_node in zip(left_nodes,right_nodes):
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
temp=[]
for token1_index in left_tokens_index:
idx1,code1=index_to_code[token1_index]
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
[index_to_code[x][0] for x in right_tokens_index]))
states[code1]=[idx1]
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in if_statement:
temp,current_states=DFG_ruby(child,index_to_code,current_states)
DFG+=temp
else:
temp,new_states=DFG_ruby(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
if tag is False:
others_states.append(states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for i in range(2):
left_nodes=[root_node.child_by_field_name('pattern')]
right_nodes=[root_node.child_by_field_name('value')]
assert len(right_nodes)==len(left_nodes)
for node in right_nodes:
temp,states=DFG_ruby(node,index_to_code,states)
DFG+=temp
for left_node,right_node in zip(left_nodes,right_nodes):
left_tokens_index=tree_to_variable_index(left_node,index_to_code)
right_tokens_index=tree_to_variable_index(right_node,index_to_code)
temp=[]
for token1_index in left_tokens_index:
idx1,code1=index_to_code[token1_index]
temp.append((code1,idx1,'computedFrom',[index_to_code[x][1] for x in right_tokens_index],
[index_to_code[x][0] for x in right_tokens_index]))
states[code1]=[idx1]
DFG+=temp
temp,states=DFG_ruby(root_node.child_by_field_name('body'),index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in while_statement:
DFG=[]
for i in range(2):
for child in root_node.children:
temp,states=DFG_ruby(child,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_ruby(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_ruby(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
def DFG_go(root_node,index_to_code,states):
assignment=['assignment_statement',]
def_statement=['var_spec']
increment_statement=['inc_statement']
if_statement=['if_statement','else']
for_statement=['for_statement']
enhanced_for_statement=[]
while_statement=[]
do_first_statement=[]
states=states.copy()
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('value')
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_go(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
left_nodes=root_node.child_by_field_name('left')
right_nodes=root_node.child_by_field_name('right')
DFG=[]
temp,states=DFG_go(right_nodes,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in increment_statement:
DFG=[]
indexs=tree_to_variable_index(root_node,index_to_code)
for index1 in indexs:
idx1,code1=index_to_code[index1]
for index2 in indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
flag=False
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in if_statement and flag is False:
temp,current_states=DFG_go(child,index_to_code,current_states)
DFG+=temp
else:
flag=True
temp,new_states=DFG_go(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
if tag is False:
others_states.append(states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in states:
if key not in new_states:
new_states[key]=states[key]
else:
new_states[key]+=states[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for child in root_node.children:
temp,states=DFG_go(child,index_to_code,states)
DFG+=temp
flag=False
for child in root_node.children:
if flag:
temp,states=DFG_go(child,index_to_code,states)
DFG+=temp
elif child.type=="for_clause":
if child.child_by_field_name('update') is not None:
temp,states=DFG_go(child.child_by_field_name('update'),index_to_code,states)
DFG+=temp
flag=True
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_go(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_go(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
def DFG_php(root_node,index_to_code,states):
assignment=['assignment_expression','augmented_assignment_expression']
def_statement=['simple_parameter']
increment_statement=['update_expression']
if_statement=['if_statement','else_clause']
for_statement=['for_statement']
enhanced_for_statement=['foreach_statement']
while_statement=['while_statement']
do_first_statement=[]
states=states.copy()
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('default_value')
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_php(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
left_nodes=root_node.child_by_field_name('left')
right_nodes=root_node.child_by_field_name('right')
DFG=[]
temp,states=DFG_php(right_nodes,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in increment_statement:
DFG=[]
indexs=tree_to_variable_index(root_node,index_to_code)
for index1 in indexs:
idx1,code1=index_to_code[index1]
for index2 in indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
flag=False
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in if_statement and flag is False:
temp,current_states=DFG_php(child,index_to_code,current_states)
DFG+=temp
else:
flag=True
temp,new_states=DFG_php(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in states:
if key not in new_states:
new_states[key]=states[key]
else:
new_states[key]+=states[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for child in root_node.children:
temp,states=DFG_php(child,index_to_code,states)
DFG+=temp
flag=False
for child in root_node.children:
if flag:
temp,states=DFG_php(child,index_to_code,states)
DFG+=temp
elif child.type=="assignment_expression":
flag=True
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in enhanced_for_statement:
name=None
value=None
for child in root_node.children:
if child.type=='variable_name' and value is None:
value=child
elif child.type=='variable_name' and name is None:
name=child
break
body=root_node.child_by_field_name('body')
DFG=[]
for i in range(2):
temp,states=DFG_php(value,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
temp,states=DFG_php(body,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in while_statement:
DFG=[]
for i in range(2):
for child in root_node.children:
temp,states=DFG_php(child,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_php(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_php(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states
def DFG_javascript(root_node,index_to_code,states):
assignment=['assignment_pattern','augmented_assignment_expression']
def_statement=['variable_declarator']
increment_statement=['update_expression']
if_statement=['if_statement','else']
for_statement=['for_statement']
enhanced_for_statement=[]
while_statement=['while_statement']
do_first_statement=[]
states=states.copy()
if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
idx,code=index_to_code[(root_node.start_point,root_node.end_point)]
if root_node.type==code:
return [],states
elif code in states:
return [(code,idx,'comesFrom',[code],states[code].copy())],states
else:
if root_node.type=='identifier':
states[code]=[idx]
return [(code,idx,'comesFrom',[],[])],states
elif root_node.type in def_statement:
name=root_node.child_by_field_name('name')
value=root_node.child_by_field_name('value')
DFG=[]
if value is None:
indexs=tree_to_variable_index(name,index_to_code)
for index in indexs:
idx,code=index_to_code[index]
DFG.append((code,idx,'comesFrom',[],[]))
states[code]=[idx]
return sorted(DFG,key=lambda x:x[1]),states
else:
name_indexs=tree_to_variable_index(name,index_to_code)
value_indexs=tree_to_variable_index(value,index_to_code)
temp,states=DFG_javascript(value,index_to_code,states)
DFG+=temp
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'comesFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in assignment:
left_nodes=root_node.child_by_field_name('left')
right_nodes=root_node.child_by_field_name('right')
DFG=[]
temp,states=DFG_javascript(right_nodes,index_to_code,states)
DFG+=temp
name_indexs=tree_to_variable_index(left_nodes,index_to_code)
value_indexs=tree_to_variable_index(right_nodes,index_to_code)
for index1 in name_indexs:
idx1,code1=index_to_code[index1]
for index2 in value_indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in increment_statement:
DFG=[]
indexs=tree_to_variable_index(root_node,index_to_code)
for index1 in indexs:
idx1,code1=index_to_code[index1]
for index2 in indexs:
idx2,code2=index_to_code[index2]
DFG.append((code1,idx1,'computedFrom',[code2],[idx2]))
states[code1]=[idx1]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in if_statement:
DFG=[]
current_states=states.copy()
others_states=[]
flag=False
tag=False
if 'else' in root_node.type:
tag=True
for child in root_node.children:
if 'else' in child.type:
tag=True
if child.type not in if_statement and flag is False:
temp,current_states=DFG_javascript(child,index_to_code,current_states)
DFG+=temp
else:
flag=True
temp,new_states=DFG_javascript(child,index_to_code,states)
DFG+=temp
others_states.append(new_states)
others_states.append(current_states)
if tag is False:
others_states.append(states)
new_states={}
for dic in others_states:
for key in dic:
if key not in new_states:
new_states[key]=dic[key].copy()
else:
new_states[key]+=dic[key]
for key in states:
if key not in new_states:
new_states[key]=states[key]
else:
new_states[key]+=states[key]
for key in new_states:
new_states[key]=sorted(list(set(new_states[key])))
return sorted(DFG,key=lambda x:x[1]),new_states
elif root_node.type in for_statement:
DFG=[]
for child in root_node.children:
temp,states=DFG_javascript(child,index_to_code,states)
DFG+=temp
flag=False
for child in root_node.children:
if flag:
temp,states=DFG_javascript(child,index_to_code,states)
DFG+=temp
elif child.type=="variable_declaration":
flag=True
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
elif root_node.type in while_statement:
DFG=[]
for i in range(2):
for child in root_node.children:
temp,states=DFG_javascript(child,index_to_code,states)
DFG+=temp
dic={}
for x in DFG:
if (x[0],x[1],x[2]) not in dic:
dic[(x[0],x[1],x[2])]=[x[3],x[4]]
else:
dic[(x[0],x[1],x[2])][0]=list(set(dic[(x[0],x[1],x[2])][0]+x[3]))
dic[(x[0],x[1],x[2])][1]=sorted(list(set(dic[(x[0],x[1],x[2])][1]+x[4])))
DFG=[(x[0],x[1],x[2],y[0],y[1]) for x,y in sorted(dic.items(),key=lambda t:t[0][1])]
return sorted(DFG,key=lambda x:x[1]),states
else:
DFG=[]
for child in root_node.children:
if child.type in do_first_statement:
temp,states=DFG_javascript(child,index_to_code,states)
DFG+=temp
for child in root_node.children:
if child.type not in do_first_statement:
temp,states=DFG_javascript(child,index_to_code,states)
DFG+=temp
return sorted(DFG,key=lambda x:x[1]),states