JiaenLiu commited on
Commit
4abcb2a
1 Parent(s): 11ff02b

Former-commit-id: 787a3bc38b29041f667fb2dbad0faca7089b83ba

evaluation/scores/__init__.py ADDED
File without changes
evaluation/scores/multi_scores.py CHANGED
@@ -10,11 +10,23 @@ class multi_scores:
10
  self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
11
  # self.score = {}
12
 
 
 
 
 
 
 
 
 
 
 
13
  # The function to get the scores
14
  # src: orginal sentence
15
  # mt: machine translation
16
  # ref: reference translation
17
  def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
 
 
18
  comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
19
  # bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
20
  llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
@@ -25,22 +37,27 @@ class multi_scores:
25
  # self.score['llm_explanation'] = llm_explanation
26
 
27
  def calculate_bleu(self, mts:list, refs:list) -> dict:
 
 
 
 
 
28
  # mt and ref are list of sentences
29
  bleu_score = self.bleu_model.corpus_score(mts, refs).score
30
  return {'bleu_score':bleu_score}
31
 
32
  def get_scores(self, src:str, mt:str, ref:str) -> dict:
33
  comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
34
- bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
35
  llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
36
  return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
37
 
38
 
39
  if __name__ == "__main__":
40
- src = "this is an test sentences"
41
- mt = "这是一个测试句子。"
42
- ref = "这不是一个测试语句。"
43
- print(multi_scores().get_scores(src, mt, ref))
44
  # print(multi_scores().calculate_comet_llm(src, mt, ref))
45
- # print(multi_scores().calculate_bleu([mt], [ref]))
46
 
 
10
  self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
11
  # self.score = {}
12
 
13
+ def __preprocess(self, src:str, mt:str, ref:str) -> dict:
14
+ # remove the space in the beginning and end of the sentence\
15
+ src = src.strip()
16
+ mt = mt.strip()
17
+ ref = ref.strip()
18
+ print(src, mt, ref)
19
+ return {'src':src, 'mt':mt, 'ref':ref}
20
+
21
+
22
+
23
  # The function to get the scores
24
  # src: orginal sentence
25
  # mt: machine translation
26
  # ref: reference translation
27
  def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
28
+ # preprocess the input
29
+ src, mt, ref = self.__preprocess(src, mt, ref)
30
  comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
31
  # bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
32
  llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
 
37
  # self.score['llm_explanation'] = llm_explanation
38
 
39
  def calculate_bleu(self, mts:list, refs:list) -> dict:
40
+ # src, mt, ref = self.__preprocess(src, mt, ref)
41
+ # remove the space in the beginning and end of the sentence for each sentence
42
+ # mts = [mt.strip() for mt in mts]
43
+ # refs = [ref.strip() for ref in refs]
44
+ # print(mts, refs)
45
  # mt and ref are list of sentences
46
  bleu_score = self.bleu_model.corpus_score(mts, refs).score
47
  return {'bleu_score':bleu_score}
48
 
49
  def get_scores(self, src:str, mt:str, ref:str) -> dict:
50
  comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
51
+ bleu_score = self.bleu_model.corpus_score([mt], [[ref]]).score
52
  llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
53
  return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
54
 
55
 
56
  if __name__ == "__main__":
57
+ src = "South Korea playing with the Blue Proto's Probes"
58
+ mt = "位于对角线的另一个角落 使用蓝色的Proto's Probes"
59
+ ref = " 在对角落里使用蓝色神族探机 他的名字是..."
60
+ # print(multi_scores().get_scores(src, mt, ref))
61
  # print(multi_scores().calculate_comet_llm(src, mt, ref))
62
+ print(multi_scores().calculate_bleu([mt], [[ref]]))
63