en_to_zh / test.py
gy567's picture
initial commit
020812d
raw
history blame contribute delete
676 Bytes
# -*- coding: utf-8 -*-
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("SECode/Gradio/t5-base")
model = AutoModel.from_pretrained("SECode/Gradio/t5-base")
text = "In terms of time."
# Tokenize the text
batch = tokenizer.prepare_seq2seq_batch(src_texts=[text])
# Make sure that the tokenized text does not exceed the maximum
# allowed size of 512
batch["input_ids"] = batch["input_ids"][:, :512]
batch["attention_mask"] = batch["attention_mask"][:, :512]
# Perform the translation and decode the output
translation = model.generate(**batch)
result = tokenizer.batch_decode(translation, skip_special_tokens=True)
print(result)