sugoi-v4 / server /model.py
playmak3r's picture
add model testing
96aee55
raw
history blame
1.31 kB
import ctranslate2
import sentencepiece as spm
modelDir = "./model"
sp_source_model = "./model/spm.ja.nopretok.model"
sp_target_model = "./model/spm.en.nopretok.model"
# inter_threads: quantas operações independentes podem ser executadas simultaneamente
translator = ctranslate2.Translator(modelDir, device="cpu", intra_threads=4, inter_threads=1)
def tokenizeBatch(text):
sp = spm.SentencePieceProcessor(sp_source_model)
if isinstance(text, list): return sp.encode(text, out_type=str)
elif isinstance(text, str):
return [sp.encode(text, out_type=str)]
def detokenizeBatch(text: str):
sp = spm.SentencePieceProcessor(sp_target_model)
translation = sp.decode(text)
return translation
def translate(text: str):
translated = translator.translate_batch(
source=tokenizeBatch(text),
num_hypotheses= 1,
return_alternatives= False,
replace_unknowns= False,
no_repeat_ngram_size= 3, # repetition_penalty
disable_unk= True,
beam_size= 5,
sampling_temperature= 0,
)
return [''.join( detokenizeBatch(result.hypotheses[0]) ) for result in translated]
if __name__ == "__main__":
translated = translate("ダンガンロンパ 希望の学園と絶望の高校生")
print(translated)