maxbittker commited on
Commit
22ca3fa
1 Parent(s): ed6c2e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -36
app.py CHANGED
@@ -5,10 +5,9 @@ import torch
5
  model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
6
  tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
- LANG_CODES = {
9
- "English":"en",
10
- "toki pona":"tl"
11
- }
12
 
13
  def translate(text):
14
  """
@@ -20,51 +19,47 @@ def translate(text):
20
 
21
  tokenizer.src_lang = "en"
22
  tokenizer.tgt_lang = "tl"
23
-
24
- ins = tokenizer(text, return_tensors='pt').to(device)
25
 
26
  gen_args = {
27
- 'return_dict_in_generate': True,
28
- 'output_scores': True,
29
- 'output_hidden_states': True,
30
- 'length_penalty': 0.0, # don't encourage longer or shorter output,
31
- 'num_return_sequences': 1,
32
- 'num_beams':1,
33
- 'forced_bos_token_id': tokenizer.lang_code_to_id["tl"]
34
- }
35
-
36
 
37
  outs = model.generate(**{**ins, **gen_args})
38
  output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
39
- text2 = '\n'.join(output)
40
-
41
  ##################
42
 
43
  tokenizer.src_lang = "tl"
44
  tokenizer.tgt_lang = "en"
45
 
46
- ins = tokenizer(text2, return_tensors='pt').to(device)
47
 
48
  gen_args = {
49
- 'return_dict_in_generate': True,
50
- 'output_scores': True,
51
- 'output_hidden_states': True,
52
- 'length_penalty': 0.0, # don't encourage longer or shorter output,
53
- 'num_return_sequences': 1,
54
- 'num_beams':1,
55
- 'forced_bos_token_id': tokenizer.lang_code_to_id["en"]
56
- }
57
-
58
 
59
  outs2 = model.generate(**{**ins, **gen_args})
60
  output2 = tokenizer.batch_decode(outs2.sequences, skip_special_tokens=True)
61
 
 
62
 
63
 
64
- return '\n'.join(output2)
65
-
66
  with gr.Blocks() as app:
67
- markdown="""
68
  # An English / toki pona Neural Machine Translation App!
69
 
70
  ### toki a! 💬
@@ -101,12 +96,15 @@ with gr.Blocks() as app:
101
  with gr.Row():
102
  gr.Markdown(markdown)
103
  with gr.Column():
104
- input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
 
 
 
105
  # source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
106
  # target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
107
  # return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
108
-
109
- inputs=[input_text]
110
  outputs = gr.Textbox()
111
 
112
  translate_btn = gr.Button("Translate! | o ante toki!")
@@ -115,10 +113,10 @@ with gr.Blocks() as app:
115
  gr.Examples(
116
  [
117
  ["Hello! How are you?", "English", "toki pona", 3],
118
- ["toki a! ilo pi ante toki ni li pona!", "toki pona", "English", 3],
119
  ["mi li toki e toki pona", "toki pona", "toki pona", 3],
120
  ],
121
- inputs=inputs
122
  )
123
 
124
- app.launch()
 
5
  model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
6
  tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+ model.to(device)
9
+ LANG_CODES = {"English": "en", "toki pona": "tl"}
10
+
 
11
 
12
  def translate(text):
13
  """
 
19
 
20
  tokenizer.src_lang = "en"
21
  tokenizer.tgt_lang = "tl"
22
+ ins = tokenizer(text, return_tensors="pt").to(device)
 
23
 
24
  gen_args = {
25
+ "return_dict_in_generate": True,
26
+ "output_scores": True,
27
+ "output_hidden_states": True,
28
+ "length_penalty": 0.0, # don't encourage longer or shorter output,
29
+ "num_return_sequences": 1,
30
+ "num_beams": 1,
31
+ "forced_bos_token_id": tokenizer.lang_code_to_id["tl"],
32
+ }
 
33
 
34
  outs = model.generate(**{**ins, **gen_args})
35
  output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
36
+ text2 = "\n".join(output)
37
+
38
  ##################
39
 
40
  tokenizer.src_lang = "tl"
41
  tokenizer.tgt_lang = "en"
42
 
43
+ ins = tokenizer(text2, return_tensors="pt").to(device)
44
 
45
  gen_args = {
46
+ "return_dict_in_generate": True,
47
+ "output_scores": True,
48
+ "output_hidden_states": True,
49
+ "length_penalty": 0.0, # don't encourage longer or shorter output,
50
+ "num_return_sequences": 1,
51
+ "num_beams": 1,
52
+ "forced_bos_token_id": tokenizer.lang_code_to_id["en"],
53
+ }
 
54
 
55
  outs2 = model.generate(**{**ins, **gen_args})
56
  output2 = tokenizer.batch_decode(outs2.sequences, skip_special_tokens=True)
57
 
58
+ return "\n".join(output2)
59
 
60
 
 
 
61
  with gr.Blocks() as app:
62
+ markdown = """
63
  # An English / toki pona Neural Machine Translation App!
64
 
65
  ### toki a! 💬
 
96
  with gr.Row():
97
  gr.Markdown(markdown)
98
  with gr.Column():
99
+ input_text = gr.components.Textbox(
100
+ label="Input Text",
101
+ value="Raccoons are fascinating creatures, but I prefer opossums.",
102
+ )
103
  # source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
104
  # target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
105
  # return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
106
+
107
+ inputs = [input_text]
108
  outputs = gr.Textbox()
109
 
110
  translate_btn = gr.Button("Translate! | o ante toki!")
 
113
  gr.Examples(
114
  [
115
  ["Hello! How are you?", "English", "toki pona", 3],
116
+ ["toki a! ilo pi ante toki ni li pona!", "toki pona", "English", 3],
117
  ["mi li toki e toki pona", "toki pona", "toki pona", 3],
118
  ],
119
+ inputs=inputs,
120
  )
121
 
122
+ app.launch()