import re import sys from sacremoses import MosesDetokenizer md_en = MosesDetokenizer(lang='en') md_zh = MosesDetokenizer(lang='zh') def moses_detokenize(tokens, language='en'): en_detokenizer = MosesDetokenizer(lang=language) stdout = en_detokenizer.detokenize(tokens,return_str=True) # 返回处理后的句子 return stdout.strip() def detokenize(tokens, mode): if mode == "汉译英" : text = moses_detokenize(tokens) text = re.sub(r" n't", "n't",text) else : text = ''.join(tokens) return text def detokenize2(tokens, mode): if mode == "汉译英" : answer_en_bpe = md_en.detokenize(tokens,return_str=True) text = re.sub(r"@@ ", "",answer_en_bpe) else : answer_zh_bpe = md_zh.detokenize(tokens,return_str=True) text = re.sub(r"@@ ", "",answer_zh_bpe) return text