File size: 4,331 Bytes
2f22782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# a argparse script it can set eval dir 
# and run the eval script in the dir then save the reasult json file in the dir
# usage: python .\batch_eval_script.py ..\deid_resaut

import os
import json
import argparse
import streamlit as st

from huggingface_hub import hf_hub_download

from utils.Evaluation_answer_txt import Evaluation_answer_txt
from utils.upload_hub import file_name_decode

# Function to download gold answer based on dataset name
def download_gold_answer(repo, filename, token, force_download=False):
    ret = hf_hub_download(repo_id=repo, repo_type='dataset',  filename=filename, token=token, force_download=force_download)
    return ret

HUB_TOKEN = st.secrets['hf']
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
GET_GOLD_ANSWER_PATH = {
    'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
    'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
    'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
}

# Function to evaluate answer text
def eval_answer_txt(set_name, uploaded_file_path):
    if set_name not in GET_GOLD_ANSWER_PATH:
        return None
    gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
    eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
    score_json = eval.eval()
    return score_json

# Function to traverse directory and evaluate files
def evaluate_directory(input_dir, output_dir='./.output'):
    os.makedirs(output_dir, exist_ok=True)
    for root, _, files in os.walk(input_dir):
        for file in files:
            filename_info = file_name_decode(file)
            if filename_info:
                model_name_input = filename_info['model_name']
                dataset_input = filename_info['dataset']
                method_input = filename_info['method']
                file_name = filename_info['file_name']
                
                file_path = os.path.join(root, file)
                # get full path of the file
                file_path = os.path.abspath(file_path)
                score_json = eval_answer_txt(dataset_input, file_path)
                # print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path)
                if score_json:
                    leaderboard_dict = {
                        "model name": model_name_input,
                        "dataset": dataset_input,
                        "method": method_input,
                        "file name": file_name,
                        "submitter": 'zhaorui',

                        "MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
                        "MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
                        "MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
                        "MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
                        "MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
                        "MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
                        "detail result": json.dumps(score_json,indent=4) #score_json
                    }
                    
                    # train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json
                    repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
                    output_path = os.path.join(output_dir, repo_file_name)
                    with open(output_path, 'w') as f:
                        json.dump(leaderboard_dict, f, indent=4)
                else:
                    print(f"Failed to evaluate {file_path}")



# Main function to handle argparse
def main():
    parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
    parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
    parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')

    args = parser.parse_args()
    
    evaluate_directory(args.input_dir, args.output_dir)
    
    print(f"Evaluation completed. Results saved to evaluation_results.json")

if __name__ == "__main__":
    main()