File size: 3,042 Bytes
ee800e9
 
325bf75
 
ee800e9
 
325bf75
ee800e9
 
 
325bf75
ee800e9
 
 
 
 
 
 
 
325bf75
 
 
dca0a7c
6723c13
 
 
 
 
 
 
 
325bf75
1b862fc
 
325bf75
 
dca0a7c
ee800e9
 
 
325bf75
ee800e9
 
325bf75
 
 
ee800e9
325bf75
ee800e9
 
 
325bf75
 
ee800e9
 
 
325bf75
 
 
 
ee800e9
 
325bf75
ee800e9
 
6723c13
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import argparse
import pandas as pd
from alignment import alignment
from scores.multi_scores import multi_scores

class Evaluator:
    def __init__(self, pred_path, gt_path, eval_path, res_path):
        self.pred_path = pred_path
        self.gt_path = gt_path
        self.eval_path = eval_path
        self.res_path = res_path

    def eval(self):
        # Align two SRT files
        aligned_srt = alignment(self.pred_path, self.gt_path)

        # Get sentence scores
        scorer = multi_scores()
        result_data = []
        for (pred_s, gt_s) in aligned_srt:
            print("pred_s.source_text: ", pred_s.source_text)
            print("pred_s.translation: ", pred_s.translation)
            print("gt_s.source_text: ", gt_s.source_text)
            print("gt_s.translation: ", gt_s.translation)

            # Check if the gt_s.translation is not empty
            if gt_s.translation != "":
                # gt_s.translation = " "
                scores_dict = scorer.get_scores(pred_s.source_text, pred_s.translation, gt_s.translation)
            else:
                scores_dict = scorer.get_scores(pred_s.source_text, pred_s.translation, gt_s.source_text)

            print("scores_dict: ", scores_dict)

            scores_dict['Source'] = pred_s.source_text
            scores_dict['Prediction'] = pred_s.translation
            scores_dict['Ground Truth'] = gt_s.source_text
            result_data.append(scores_dict)

        eval_df = pd.DataFrame(result_data)
        eval_df.to_csv(self.eval_path, index=False, columns=['Source', 'Prediction', 'Ground Truth', 'bleu_score', 'comet_score', 'llm_score', 'llm_explanation'])

        # Get average scores
        avg_llm = eval_df['llm_score'].mean()
        avg_bleu = eval_df['bleu_score'].mean()
        avg_comet = eval_df['comet_score'].mean()

        res_data = {
            'Metric': ['Avg LLM', 'Avg BLEU', 'Avg COMET'],
            'Score': [avg_llm, avg_bleu, avg_comet]
        }
        res_df = pd.DataFrame(res_data)
        res_df.to_csv(self.res_path, index=False)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Evaluate SRT files.')
    parser.add_argument('-bi_path', default='evaluation/test5_tiny/test5_bi.srt', help='Path to predicted SRT file')
    parser.add_argument('-zh_path', default='evaluation/test5_tiny/test5_gt.srt', help='Path to ground truth SRT file')
    parser.add_argument('-eval_output', default='evaluation/test5_tiny/eval.csv', help='Path to eval CSV file')
    parser.add_argument('-res_output', default='evaluation/test5_tiny/res.csv', help='Path to result CSV file')
    args = parser.parse_args()

    evaluator = Evaluator(args.bi_path, args.zh_path, args.eval_output, args.res_output)
    evaluator.eval()


# python evaluation.py -bi_path /home/jiaenliu/project-t/results/test1/test1_bi.srt -zh_path test5_tiny/test1_gt.srt -eval_output /home/jiaenliu/project-t/evaluation/results/test1_large/eval.csv -res_output /home/jiaenliu/project-t/evaluation/results/test1_large/res.csv