File size: 676 Bytes
020812d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# -*- coding: utf-8 -*-
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("SECode/Gradio/t5-base")
model = AutoModel.from_pretrained("SECode/Gradio/t5-base")
text = "In terms of time."
# Tokenize the text
batch = tokenizer.prepare_seq2seq_batch(src_texts=[text])
# Make sure that the tokenized text does not exceed the maximum
# allowed size of 512
batch["input_ids"] = batch["input_ids"][:, :512]
batch["attention_mask"] = batch["attention_mask"][:, :512]
# Perform the translation and decode the output
translation = model.generate(**batch)
result = tokenizer.batch_decode(translation, skip_special_tokens=True)
print(result)
|