# a argparse script it can set eval dir # and run the eval script in the dir then save the reasult json file in the dir # usage: python .\batch_eval_script.py ..\deid_resaut import os import json import argparse import streamlit as st from huggingface_hub import hf_hub_download from utils.Evaluation_answer_txt import Evaluation_answer_txt from utils.upload_hub import file_name_decode # Function to download gold answer based on dataset name def download_gold_answer(repo, filename, token, force_download=False): ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download) return ret HUB_TOKEN = st.secrets['hf'] ANSWER_REPO = 'zhaorui-nb/leaderboard-answer' GET_GOLD_ANSWER_PATH = { 'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN), 'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN), 'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN) } # Function to evaluate answer text def eval_answer_txt(set_name, uploaded_file_path): if set_name not in GET_GOLD_ANSWER_PATH: return None gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name] eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path) score_json = eval.eval() return score_json # Function to traverse directory and evaluate files def evaluate_directory(input_dir, output_dir='./.output'): os.makedirs(output_dir, exist_ok=True) for root, _, files in os.walk(input_dir): for file in files: filename_info = file_name_decode(file) if filename_info: model_name_input = filename_info['model_name'] dataset_input = filename_info['dataset'] method_input = filename_info['method'] file_name = filename_info['file_name'] file_path = os.path.join(root, file) # get full path of the file file_path = os.path.abspath(file_path) score_json = eval_answer_txt(dataset_input, file_path) # print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path) if score_json: leaderboard_dict = { "model name": model_name_input, "dataset": dataset_input, "method": method_input, "file name": file_name, "submitter": 'zhaorui', "MICRO precision": score_json["MICRO_AVERAGE"]["precision"], "MICRO recall": score_json["MICRO_AVERAGE"]["recall"], "MICRO f1": score_json["MICRO_AVERAGE"]["f1"], "MACRO precision": score_json["MACRO_AVERAGE"]["precision"], "MACRO recall": score_json["MACRO_AVERAGE"]["recall"], "MACRO f1": score_json["MACRO_AVERAGE"]["f1"], "detail result": json.dumps(score_json,indent=4) #score_json } # train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json' output_path = os.path.join(output_dir, repo_file_name) with open(output_path, 'w') as f: json.dump(leaderboard_dict, f, indent=4) else: print(f"Failed to evaluate {file_path}") # Main function to handle argparse def main(): parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.") parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.') parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.') args = parser.parse_args() evaluate_directory(args.input_dir, args.output_dir) print(f"Evaluation completed. Results saved to evaluation_results.json") if __name__ == "__main__": main()