translate / detokenizer.py
Dy3257's picture
Upload 9 files
7c19755 verified
raw
history blame
910 Bytes
import re
import sys
from sacremoses import MosesDetokenizer
md_en = MosesDetokenizer(lang='en')
md_zh = MosesDetokenizer(lang='zh')
def moses_detokenize(tokens, language='en'):
en_detokenizer = MosesDetokenizer(lang=language)
stdout = en_detokenizer.detokenize(tokens,return_str=True)
# 返回处理后的句子
return stdout.strip()
def detokenize(tokens, mode):
if mode == "汉译英" :
text = moses_detokenize(tokens)
text = re.sub(r" n't", "n't",text)
else :
text = ''.join(tokens)
return text
def detokenize2(tokens, mode):
if mode == "汉译英" :
answer_en_bpe = md_en.detokenize(tokens,return_str=True)
text = re.sub(r"@@ ", "",answer_en_bpe)
else :
answer_zh_bpe = md_zh.detokenize(tokens,return_str=True)
text = re.sub(r"@@ ", "",answer_zh_bpe)
return text