Update commonsenseConstraint.py
Browse files- commonsenseConstraint.py +0 -188
commonsenseConstraint.py
CHANGED
@@ -545,191 +545,3 @@ def boolean_evaluation(query_data, tested_data):
|
|
545 |
print(return_info[key][1])
|
546 |
return False
|
547 |
return True
|
548 |
-
|
549 |
-
# if __name__ == '__main__':
|
550 |
-
# number_list = extract_numbers_from_filenames('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz')
|
551 |
-
# # json_data = json.load(open('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/x/annotation_4.json'))
|
552 |
-
# query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/lrz.jsonl')
|
553 |
-
# for idx in number_list:
|
554 |
-
# json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz/annotation_{idx}.json'))
|
555 |
-
# print(str(idx), evaluation(query_data[idx-1], json_data))
|
556 |
-
# # json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/plan_{idx}.json'))
|
557 |
-
# # query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/test.jsonl')[idx-1]
|
558 |
-
# # help me write all function name in this file, just the name
|
559 |
-
# #
|
560 |
-
# # list all function name in this file
|
561 |
-
# # ['is_reasonalbe_visiting_city', 'is_valiable_restaurants', 'is_valiable_attractions', 'is_valiable_transportation', 'is_valid_information_in_current_city', 'is_valid_information_in_sandbox']
|
562 |
-
# # print(is_valiable_restaurants(query_data, json_data))
|
563 |
-
|
564 |
-
# if __name__ == "__main__":
|
565 |
-
# user = 'zk'
|
566 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
567 |
-
# idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
568 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
569 |
-
# for idx in idx_number_list:
|
570 |
-
# print(idx)
|
571 |
-
# query_data = query_data_list[idx-1]
|
572 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json'))
|
573 |
-
# # generated_plan = generated_plan[:-1]
|
574 |
-
# if generated_plan[-1]['gpt-3.5-turbo-16k-result'] != 'Plan Fail':
|
575 |
-
# info_box = evaluation(query_data, generated_plan[-1]['gpt-3.5-turbo-16k-result'])
|
576 |
-
# generated_plan[-1]['toolAug-commonsense'] = info_box
|
577 |
-
# else:
|
578 |
-
# generated_plan[-1]['toolAug-commonsense'] = None
|
579 |
-
# info_box = None
|
580 |
-
# commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
581 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json','w') as f:
|
582 |
-
# json.dump(generated_plan,f)
|
583 |
-
|
584 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/commonsense_statistic.json','w') as f:
|
585 |
-
# json.dump(commonsense_statistic,f)
|
586 |
-
|
587 |
-
# if __name__ == "__main__":
|
588 |
-
# user = 'all'
|
589 |
-
# model_type = ['chatgpt','gpt4','greedy_search'][2]
|
590 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
591 |
-
# # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
592 |
-
# idx_number_list = [i for i in range(1,501)]
|
593 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
594 |
-
|
595 |
-
# for idx in idx_number_list:
|
596 |
-
# print(idx)
|
597 |
-
# query_data = query_data_list[idx-1]
|
598 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json'))
|
599 |
-
# # generated_plan = generated_plan[:-1]
|
600 |
-
# if model_type == 'greedy_search':
|
601 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
|
602 |
-
# else:
|
603 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'{model_type}_human_collected_info_results_parsed'])
|
604 |
-
# generated_plan[-1][f'{model_type}_with_human_collected_commonsense'] = info_box
|
605 |
-
# commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
606 |
-
|
607 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json','w') as f:
|
608 |
-
# json.dump(generated_plan,f)
|
609 |
-
|
610 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/{model_type}_with_human_collected_commonsense_statistic.json','w') as f:
|
611 |
-
# json.dump(commonsense_statistic,f)
|
612 |
-
|
613 |
-
|
614 |
-
# if __name__ == "__main__":
|
615 |
-
# user = 'all'
|
616 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
617 |
-
# idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
618 |
-
# hardConstraint_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
619 |
-
# not_satified = []
|
620 |
-
# for idx in tqdm(idx_number_list):
|
621 |
-
# # print(idx)
|
622 |
-
# query_data = query_data_list[idx-1]
|
623 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}/annotation_{idx}.json'))
|
624 |
-
|
625 |
-
# if not boolean_evaluation(query_data, generated_plan):
|
626 |
-
# not_satified.append(idx)
|
627 |
-
# print(idx)
|
628 |
-
# generated_plan = generated_plan[:-1]
|
629 |
-
# print(not_satified)
|
630 |
-
|
631 |
-
if __name__ == "__main__":
|
632 |
-
set_type = ["train",'dev','test'][0]
|
633 |
-
query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/query/query.jsonl')
|
634 |
-
# idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan')
|
635 |
-
commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
636 |
-
not_satified = []
|
637 |
-
# print( idx_number_list)
|
638 |
-
for idx in tqdm(range(1,len(query_data_list)+1)):
|
639 |
-
# print(idx)
|
640 |
-
query_data = query_data_list[idx-1]
|
641 |
-
generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan/plan_{idx}.json'))
|
642 |
-
try:
|
643 |
-
store_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json'))
|
644 |
-
except FileNotFoundError:
|
645 |
-
store_plan = [{}]
|
646 |
-
info_box = evaluation(query_data,generated_plan[1])
|
647 |
-
# if not boolean_evaluation(query_data, generated_plan[1]):
|
648 |
-
# not_satified.append(idx)
|
649 |
-
# print(idx)
|
650 |
-
# print(store_plan[-1])
|
651 |
-
store_plan[-1][f'human_anno_commonsense_constraint'] = info_box
|
652 |
-
with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json','w') as f:
|
653 |
-
json.dump(store_plan,f)
|
654 |
-
commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
655 |
-
print(not_satified)
|
656 |
-
with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/human_anno_commonsense_constraint.json','w') as f:
|
657 |
-
json.dump(commonsense_statistic,f)
|
658 |
-
|
659 |
-
# if __name__ == "__main__":
|
660 |
-
# user = 'all'
|
661 |
-
# model_type = ['chatgpt','gpt4'][1]
|
662 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
663 |
-
# # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
664 |
-
# idx_number_list = [i for i in range(1,501)]
|
665 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
666 |
-
# cnt = 0
|
667 |
-
# for idx in idx_number_list:
|
668 |
-
# # print(idx)
|
669 |
-
# query_data = query_data_list[idx-1]
|
670 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre/{user}/plan_{idx}.json'))[-1]['gpt4_human_collected_info_results_parsed']
|
671 |
-
# # generated_plan = generated_plan[:-1]
|
672 |
-
|
673 |
-
# if not boolean_evaluation(query_data, generated_plan):
|
674 |
-
# cnt += 1
|
675 |
-
# print(idx)
|
676 |
-
# print(cnt)
|
677 |
-
|
678 |
-
# if __name__ == "__main__":
|
679 |
-
# parser = argparse.ArgumentParser(description="")
|
680 |
-
# # model_type = ['gpt-3.5-turbo-1106','gpt-4-1106-preview','greedy_search','mistral-7B-32K','gemini2','mixtral','gpt-3.5-turbo-11062'][-1]
|
681 |
-
# # method = ['direct','cot','react','reflexion','tool-use'][-1]
|
682 |
-
# # set_type = ['dev','test'][0]
|
683 |
-
# parser.add_argument("--model_type", type=str, default="gpt-3.5-turbo-1106")
|
684 |
-
# parser.add_argument("--method", type=str, default="direct")
|
685 |
-
# parser.add_argument("--set_type", type=str, default="dev")
|
686 |
-
# args = parser.parse_args()
|
687 |
-
# directory = f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{args.set_type}'
|
688 |
-
# query_data_list = load_line_json_data(os.path.join(directory, 'query/query.jsonl'))
|
689 |
-
# # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
690 |
-
# idx_number_list = [i for i in range(1,len(query_data_list)+1)]
|
691 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
692 |
-
# deliver_cnt = 0
|
693 |
-
# if args.method == 'tool-use':
|
694 |
-
# suffix = ''
|
695 |
-
# else:
|
696 |
-
# suffix = '_with_human_info'
|
697 |
-
# for idx in tqdm(idx_number_list):
|
698 |
-
# # print(idx)
|
699 |
-
# query_data = query_data_list[idx-1]
|
700 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json'))
|
701 |
-
# # generated_plan = generated_plan[:-1]
|
702 |
-
# if args.model_type == 'greedy_search':
|
703 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
|
704 |
-
# else:
|
705 |
-
# if args.method == 'tool-use':
|
706 |
-
# suffix2 = ''
|
707 |
-
# else:
|
708 |
-
# suffix2 = '_collected'
|
709 |
-
# if generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] and generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results']!='Max Token Length Exceeded.':
|
710 |
-
# try:
|
711 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_results_parsed'])
|
712 |
-
# except KeyError:
|
713 |
-
# info_box = None
|
714 |
-
# generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
|
715 |
-
# except IndexError:
|
716 |
-
# info_box = None
|
717 |
-
# generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
|
718 |
-
# else:
|
719 |
-
# info_box = None
|
720 |
-
# if info_box:
|
721 |
-
# deliver_cnt += 1
|
722 |
-
# generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_commonsense_constraint'] = info_box
|
723 |
-
# commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
724 |
-
|
725 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json','w') as f:
|
726 |
-
# json.dump(generated_plan,f)
|
727 |
-
|
728 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/{args.model_type}_{args.method}{suffix}_commonsense_constraint.json','w') as f:
|
729 |
-
# json.dump(commonsense_statistic,f)
|
730 |
-
|
731 |
-
# if args.set_type == 'dev':
|
732 |
-
# print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/180}" )
|
733 |
-
# elif args.set_type == 'test':
|
734 |
-
# print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/1000}" )
|
735 |
-
|
|
|
545 |
print(return_info[key][1])
|
546 |
return False
|
547 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|