Update app.py
Browse files
app.py
CHANGED
@@ -6,10 +6,17 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
6 |
|
7 |
LANG_CODES = {
|
8 |
"English":"en",
|
9 |
-
"Toki Pona":"tl",
|
10 |
"Romanian":"ro"
|
|
|
|
|
|
|
|
|
|
|
11 |
}
|
12 |
|
|
|
|
|
|
|
13 |
def translate(text, src_lang, tgt_lang, candidates:int):
|
14 |
"""
|
15 |
Translate the text from source lang to target lang
|
@@ -18,25 +25,6 @@ def translate(text, src_lang, tgt_lang, candidates:int):
|
|
18 |
src = LANG_CODES.get(src_lang)
|
19 |
tgt = LANG_CODES.get(tgt_lang)
|
20 |
|
21 |
-
if tgt == tl and src == en:
|
22 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
23 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
24 |
-
elif tgt == en and src == tl:
|
25 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
26 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
27 |
-
elif tgt == en and src == en:
|
28 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
29 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
30 |
-
elif tgt == tl and src == tl:
|
31 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
32 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
33 |
-
elif tgt == en and src == ro:
|
34 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
|
35 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
36 |
-
elif tgt == ro and src == en:
|
37 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
|
38 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
39 |
-
|
40 |
tokenizer.src_lang = src
|
41 |
tokenizer.tgt_lang = tgt
|
42 |
|
@@ -68,12 +56,10 @@ with gr.Blocks() as app:
|
|
68 |
|
69 |
Input your text to translate, a source language and target language, and desired number of return sequences!
|
70 |
|
71 |
-
Right now, this only supports
|
72 |
|
73 |
### Model and Data
|
74 |
-
This app
|
75 |
-
|
76 |
-
The Toki Pona variant of the model was fine-tuned on the English/toki pona bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
|
77 |
|
78 |
### This app is a machine and not all translations will be perfect.
|
79 |
"""
|
@@ -81,27 +67,23 @@ with gr.Blocks() as app:
|
|
81 |
with gr.Row():
|
82 |
gr.Markdown(markdown)
|
83 |
with gr.Column():
|
84 |
-
input_text = gr.components.Textbox(label="Input Text", value="
|
85 |
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
|
86 |
-
target_lang = gr.components.Dropdown(label="Target Language", value="
|
87 |
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=128, step=1)
|
88 |
|
89 |
inputs=[input_text, source_lang, target_lang, return_seqs]
|
90 |
outputs = gr.Textbox()
|
91 |
|
92 |
-
translate_btn = gr.Button("Translate!
|
93 |
translate_btn.click(translate, inputs=inputs, outputs=outputs)
|
94 |
|
95 |
gr.Examples(
|
96 |
[
|
97 |
-
["Hello! How are you?", "English", "
|
98 |
-
["
|
99 |
-
["
|
100 |
-
["
|
101 |
-
["I love this tool!", "English", "Toki Pona", 3],
|
102 |
-
["toki pona li toki pona.", "Toki Pona", "English", 3],
|
103 |
-
["pona toki a", "Toki Pona", "Toki Pona", 3],
|
104 |
-
["I want some bread and rice.", "English", "English", 3],
|
105 |
],
|
106 |
inputs=inputs
|
107 |
)
|
|
|
6 |
|
7 |
LANG_CODES = {
|
8 |
"English":"en",
|
|
|
9 |
"Romanian":"ro"
|
10 |
+
"Spanish":"es"
|
11 |
+
"Italian":"it"
|
12 |
+
"German":"de"
|
13 |
+
"Portugese":"pt"
|
14 |
+
"French":"fr"
|
15 |
}
|
16 |
|
17 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
19 |
+
|
20 |
def translate(text, src_lang, tgt_lang, candidates:int):
|
21 |
"""
|
22 |
Translate the text from source lang to target lang
|
|
|
25 |
src = LANG_CODES.get(src_lang)
|
26 |
tgt = LANG_CODES.get(tgt_lang)
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
tokenizer.src_lang = src
|
29 |
tokenizer.tgt_lang = tgt
|
30 |
|
|
|
56 |
|
57 |
Input your text to translate, a source language and target language, and desired number of return sequences!
|
58 |
|
59 |
+
Right now, this only supports 7 languages. I will add more later! So stay tuned!
|
60 |
|
61 |
### Model and Data
|
62 |
+
This app uses Facebook/Meta AI's M2M100 418M param model for translation.
|
|
|
|
|
63 |
|
64 |
### This app is a machine and not all translations will be perfect.
|
65 |
"""
|
|
|
67 |
with gr.Row():
|
68 |
gr.Markdown(markdown)
|
69 |
with gr.Column():
|
70 |
+
input_text = gr.components.Textbox(label="Input Text", value="Hello, world! Have a nice day!")
|
71 |
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
|
72 |
+
target_lang = gr.components.Dropdown(label="Target Language", value="Romanian", choices=list(LANG_CODES.keys()))
|
73 |
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=128, step=1)
|
74 |
|
75 |
inputs=[input_text, source_lang, target_lang, return_seqs]
|
76 |
outputs = gr.Textbox()
|
77 |
|
78 |
+
translate_btn = gr.Button("Translate!")
|
79 |
translate_btn.click(translate, inputs=inputs, outputs=outputs)
|
80 |
|
81 |
gr.Examples(
|
82 |
[
|
83 |
+
["Hello! How are you?", "English", "Romanian", 3],
|
84 |
+
["Mă numesc Popa Mihai și am 13 ani.", "Romanian", "English", 3],
|
85 |
+
["Tu vreau cafea.", "Romanian", "Toki Pona", 3],
|
86 |
+
["Do you needs coffee?", "English", "English", 3],
|
|
|
|
|
|
|
|
|
87 |
],
|
88 |
inputs=inputs
|
89 |
)
|