File size: 3,055 Bytes
be9d6f9
 
9387453
 
be9d6f9
3853a8e
11ff02b
be9d6f9
11ff02b
8abf414
11ff02b
be9d6f9
4abcb2a
 
 
 
 
 
 
 
 
 
8abf414
 
 
 
11ff02b
4abcb2a
 
11ff02b
 
 
 
 
 
 
 
 
 
4abcb2a
 
 
 
 
11ff02b
 
 
 
8abf414
3853a8e
4abcb2a
11ff02b
 
 
8abf414
 
4abcb2a
 
 
 
11ff02b
4abcb2a
be9d6f9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from comet import download_model, load_from_checkpoint
from sacrebleu.metrics import BLEU, CHRF, TER
from scores import LLM_eval
# import LLM_eval

class multi_scores:
    def __init__(self, source_lang="en", target_lang="zh", domain="starcraft 2") -> None:
        self.comet_model = load_from_checkpoint(download_model("Unbabel/wmt22-comet-da"))
        self.bleu_model = BLEU(tokenize=target_lang)
        self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
        # self.score = {}

    def __preprocess(self, src:str, mt:str, ref:str) -> dict:
        # remove the space in the beginning and end of the sentence\
        src = src.strip()
        mt = mt.strip()
        ref = ref.strip()
        print(src, mt, ref)
        return {'src':src, 'mt':mt, 'ref':ref}



    # The function to get the scores
    # src: orginal sentence
    # mt: machine translation
    # ref: reference translation
    def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
        # preprocess the input
        src, mt, ref = self.__preprocess(src, mt, ref)
        comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
        # bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
        llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
        return {'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
        # self.score['bleu_score'] = bleu_score
        # self.score['comet_score'] = comet_score
        # self.score['llm_score'] = llm_score
        # self.score['llm_explanation'] = llm_explanation
    
    def calculate_bleu(self, mts:list, refs:list) -> dict:
        # src, mt, ref = self.__preprocess(src, mt, ref)
        # remove the space in the beginning and end of the sentence for each sentence
        # mts = [mt.strip() for mt in mts]
        # refs = [ref.strip() for ref in refs]
        # print(mts, refs)
        # mt and ref are list of sentences
        bleu_score = self.bleu_model.corpus_score(mts, refs).score
        return {'bleu_score':bleu_score}
    
    def get_scores(self, src:str, mt:str, ref:str) -> dict:
        comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
        bleu_score = self.bleu_model.corpus_score([mt], [[ref]]).score
        llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
        return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}

    
if __name__ == "__main__":
    src = "South Korea playing with the Blue Proto's Probes"
    mt = "位于对角线的另一个角落  使用蓝色的Proto's Probes"
    ref = " 在对角落里使用蓝色神族探机 他的名字是..."
    # print(multi_scores().get_scores(src, mt, ref))
    # print(multi_scores().calculate_comet_llm(src, mt, ref))
    print(multi_scores().calculate_bleu([mt], [[ref]]))