Jordan Myers
commited on
Commit
•
8210be8
1
Parent(s):
6318241
return sequences added
Browse files
app.py
CHANGED
@@ -6,13 +6,12 @@ import torch
|
|
6 |
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
|
7 |
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
8 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
9 |
-
LANGS = ["English", "toki pona"]
|
10 |
LANG_CODES = {
|
11 |
"English":"en",
|
12 |
"toki pona":"tl"
|
13 |
}
|
14 |
|
15 |
-
def translate(text, src_lang, tgt_lang):
|
16 |
"""
|
17 |
Translate the text from source lang to target lang
|
18 |
"""
|
@@ -30,8 +29,8 @@ def translate(text, src_lang, tgt_lang):
|
|
30 |
'output_scores': True,
|
31 |
'output_hidden_states': True,
|
32 |
'length_penalty': 0.0, # don't encourage longer or shorter output,
|
33 |
-
'num_return_sequences':
|
34 |
-
'num_beams':
|
35 |
'forced_bos_token_id': tokenizer.lang_code_to_id[tgt]
|
36 |
}
|
37 |
|
@@ -45,11 +44,12 @@ app = gr.Interface(
|
|
45 |
fn=translate,
|
46 |
inputs=[
|
47 |
gr.components.Textbox(label="Text"),
|
48 |
-
gr.components.Dropdown(label="Source Language", choices=
|
49 |
-
gr.components.Dropdown(label="Target Language", choices=
|
|
|
50 |
],
|
51 |
outputs=["text"],
|
52 |
-
examples=[["This is an example
|
53 |
cache_examples=False,
|
54 |
title="A simple English / toki pona Neural Translation App",
|
55 |
description="A simple English / toki pona Neural Translation App"
|
|
|
6 |
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
|
7 |
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
8 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
9 |
LANG_CODES = {
|
10 |
"English":"en",
|
11 |
"toki pona":"tl"
|
12 |
}
|
13 |
|
14 |
+
def translate(text, src_lang, tgt_lang, candidates:int):
|
15 |
"""
|
16 |
Translate the text from source lang to target lang
|
17 |
"""
|
|
|
29 |
'output_scores': True,
|
30 |
'output_hidden_states': True,
|
31 |
'length_penalty': 0.0, # don't encourage longer or shorter output,
|
32 |
+
'num_return_sequences': candidates,
|
33 |
+
'num_beams':candidates,
|
34 |
'forced_bos_token_id': tokenizer.lang_code_to_id[tgt]
|
35 |
}
|
36 |
|
|
|
44 |
fn=translate,
|
45 |
inputs=[
|
46 |
gr.components.Textbox(label="Text"),
|
47 |
+
gr.components.Dropdown(label="Source Language", choices=LANG_CODES.keys()),
|
48 |
+
gr.components.Dropdown(label="Target Language", choices=LANG_CODES.keys()),
|
49 |
+
gr.Slider(label="Number of return sequences", value=1, minimum=1, maximum=12)
|
50 |
],
|
51 |
outputs=["text"],
|
52 |
+
examples=[["This is an example statement. It will be translated from English to toki pona.", "English", "toki pona"]],
|
53 |
cache_examples=False,
|
54 |
title="A simple English / toki pona Neural Translation App",
|
55 |
description="A simple English / toki pona Neural Translation App"
|