Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

JiaenLiu commited on Oct 17, 2023

Commit

4abcb2a

1 Parent(s): 11ff02b

bleu fix

Browse files

Former-commit-id: 787a3bc38b29041f667fb2dbad0faca7089b83ba

Files changed (2) hide show

evaluation/scores/__init__.py +0 -0
evaluation/scores/multi_scores.py +23 -6

evaluation/scores/__init__.py ADDED Viewed

File without changes

evaluation/scores/multi_scores.py CHANGED Viewed

@@ -10,11 +10,23 @@ class multi_scores:
         self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
         # self.score = {}
     # The function to get the scores
     # src: orginal sentence
     # mt: machine translation
     # ref: reference translation
     def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
         comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
         # bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
         llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
@@ -25,22 +37,27 @@ class multi_scores:
         # self.score['llm_explanation'] = llm_explanation
     def calculate_bleu(self, mts:list, refs:list) -> dict:
         # mt and ref are list of sentences
         bleu_score = self.bleu_model.corpus_score(mts, refs).score
         return {'bleu_score':bleu_score}
     def get_scores(self, src:str, mt:str, ref:str) -> dict:
         comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
-        bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
         llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
         return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
 if __name__ == "__main__":
-    src = "this is an test sentences"
-    mt = "这是一个测试句子。"
-    ref = "这不是一个测试语句。"
-    print(multi_scores().get_scores(src, mt, ref))
     # print(multi_scores().calculate_comet_llm(src, mt, ref))
-    # print(multi_scores().calculate_bleu([mt], [ref]))

         self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
         # self.score = {}
+    def __preprocess(self, src:str, mt:str, ref:str) -> dict:
+        # remove the space in the beginning and end of the sentence\
+        src = src.strip()
+        mt = mt.strip()
+        ref = ref.strip()
+        print(src, mt, ref)
+        return {'src':src, 'mt':mt, 'ref':ref}
     # The function to get the scores
     # src: orginal sentence
     # mt: machine translation
     # ref: reference translation
     def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
+        # preprocess the input
+        src, mt, ref = self.__preprocess(src, mt, ref)
         comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
         # bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
         llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
         # self.score['llm_explanation'] = llm_explanation
     def calculate_bleu(self, mts:list, refs:list) -> dict:
+        # src, mt, ref = self.__preprocess(src, mt, ref)
+        # remove the space in the beginning and end of the sentence for each sentence
+        # mts = [mt.strip() for mt in mts]
+        # refs = [ref.strip() for ref in refs]
+        # print(mts, refs)
         # mt and ref are list of sentences
         bleu_score = self.bleu_model.corpus_score(mts, refs).score
         return {'bleu_score':bleu_score}
     def get_scores(self, src:str, mt:str, ref:str) -> dict:
         comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
+        bleu_score = self.bleu_model.corpus_score([mt], [[ref]]).score
         llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
         return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
 if __name__ == "__main__":
+    src = "South Korea playing with the Blue Proto's Probes"
+    mt = "位于对角线的另一个角落  使用蓝色的Proto's Probes"
+    ref = " 在对角落里使用蓝色神族探机 他的名字是..."
+    # print(multi_scores().get_scores(src, mt, ref))
     # print(multi_scores().calculate_comet_llm(src, mt, ref))
+    print(multi_scores().calculate_bleu([mt], [[ref]]))