Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# T5: Text-To-Text Transfer Transformer for english vietnamese translation#### Example Using```
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from transformers import MT5ForConditionalGeneration, T5Tokenizer
|
5 |
+
import torch
|
6 |
+
if torch.cuda.is_available():
|
7 |
+
device = torch.device("cuda")
|
8 |
+
|
9 |
+
print('There are %d GPU(s) available.' % torch.cuda.device_count())
|
10 |
+
|
11 |
+
print('We will use the GPU:', torch.cuda.get_device_name(0))
|
12 |
+
else:
|
13 |
+
print('No GPU available, using the CPU instead.')
|
14 |
+
device = torch.device("cpu")
|
15 |
+
|
16 |
+
model = MT5ForConditionalGeneration.from_pretrained("NlpHUST/t5-en-vi-small")
|
17 |
+
tokenizer = T5Tokenizer.from_pretrained("NlpHUST/t5-en-vi-small")
|
18 |
+
model.to(device)
|
19 |
+
|
20 |
+
src = "In school , we spent a lot of time studying the history of Kim Il-Sung , but we never learned much about the outside world , except that America , South Korea , Japan are the enemies ."
|
21 |
+
tokenized_text = tokenizer.encode(src, return_tensors="pt").to(device)
|
22 |
+
model.eval()
|
23 |
+
summary_ids = model.generate(
|
24 |
+
tokenized_text,
|
25 |
+
max_length=128,
|
26 |
+
num_beams=5,
|
27 |
+
repetition_penalty=2.5,
|
28 |
+
length_penalty=1.0,
|
29 |
+
early_stopping=True
|
30 |
+
)
|
31 |
+
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
32 |
+
print(output)
|
33 |
+
``````Ở trường, chúng tôi dành nhiều thời gian để nghiên cứu về lịch sử Kim Il-Sung, nhưng chúng tôi chưa bao giờ học được nhiều về thế giới bên ngoài, ngoại trừ Mỹ, Hàn Quốc, Nhật Bản là kẻ thù.```
|