huseinzol05 commited on
Commit
a4577da
β€’
1 Parent(s): bfe12e3
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +24 -4
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🌍
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.14.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.32.1
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -10,6 +10,8 @@ import gradio as gr
10
  import logging
11
  import os
12
 
 
 
13
  logging.basicConfig(level=logging.INFO)
14
 
15
  TO_LANG = {
@@ -42,8 +44,14 @@ tokenizer = AutoTokenizer.from_pretrained(
42
  use_fast=False,
43
  )
44
 
45
-
46
  def translate(text, to_lang):
 
 
 
 
 
 
 
47
  global model
48
  to_lang = TO_LANG[to_lang]
49
  if model is None:
@@ -66,7 +74,7 @@ def translate(text, to_lang):
66
  [[i for i in o if i not in [0, 1, 2]] for o in results],
67
  spaces_between_special_tokens=False,
68
  )
69
- return results[0]
70
 
71
 
72
  hansard = """
@@ -96,13 +104,22 @@ my_array = np.reshape(my_array, (2, 5)) # menjadi array 2D dengan 2 baris dan 5
96
  Itulah beberapa operasi dasar numpy. Anda dapat menemukan dokumentasi resmi numpy di https://numpy.org/doc/stable/.
97
  """.strip()
98
 
 
 
 
 
 
 
99
  demo = gr.Interface(
100
  fn=translate,
101
  inputs=[
102
  gr.components.Textbox(label='Input Text'),
103
  gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'),
104
  ],
105
- outputs=['text'],
 
 
 
106
  examples=[
107
  ['Syed Saddiq berkata, mereka seharusnya mengingati bahawa semasa menjadi Perdana Menteri Pakatan Harapan', 'English'],
108
  ['SHAH ALAM - Pertubuhan Kebajikan Anak Bersatu Selangor bersetuju pihak kerajaan mewujudkan Suruhanjaya Siasatan Diraja untuk menyiasat isu kartel daging.', 'English'],
@@ -116,9 +133,12 @@ demo = gr.Interface(
116
  [code, 'Malay']
117
 
118
  ],
 
 
 
119
  cache_examples=False,
120
  title='Malaysian NMT',
121
- description='This translation model able to translate malay, pasar malay (social media texts or local context), english, manglish, javanese, banjarese and indonesian to target language. It also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.'
122
  )
123
 
124
  demo.launch(server_name='0.0.0.0')
 
10
  import logging
11
  import os
12
 
13
+ HF_TOKEN = os.getenv('HF_TOKEN')
14
+ hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, 'mesolitica/crowdsourced-malaysian-translation')
15
  logging.basicConfig(level=logging.INFO)
16
 
17
  TO_LANG = {
 
44
  use_fast=False,
45
  )
46
 
 
47
  def translate(text, to_lang):
48
+
49
+ if len(text) < 2:
50
+ raise gr.Error('text input must longer than 1 character')
51
+
52
+ if to_lang is None or len(to_lang) < 1:
53
+ raise gr.Error('Please select target language')
54
+
55
  global model
56
  to_lang = TO_LANG[to_lang]
57
  if model is None:
 
74
  [[i for i in o if i not in [0, 1, 2]] for o in results],
75
  spaces_between_special_tokens=False,
76
  )
77
+ return results[0], results[0]
78
 
79
 
80
  hansard = """
 
104
  Itulah beberapa operasi dasar numpy. Anda dapat menemukan dokumentasi resmi numpy di https://numpy.org/doc/stable/.
105
  """.strip()
106
 
107
+ description = """
108
+ <a href="https://huggingface.co/mesolitica/translation-t5-small-standard-bahasa-cased-v2">mesolitica/translation-t5-small-standard-bahasa-cased-v2</a> able to translate malay, pasar malay (social media texts or local context), english, manglish, javanese, banjarese and indonesian to target language. It also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.
109
+
110
+ If you found out the translation is wrong, by simply fix the second box output and click `Flag as Wrong` button. You can help us to improve the future model, the dataset will be open source at <a href="https://huggingface.co/datasets/mesolitica/crowdsourced-malaysian-translation">mesolitica/crowdsourced-malaysian-translation</a>
111
+ """.strip()
112
+
113
  demo = gr.Interface(
114
  fn=translate,
115
  inputs=[
116
  gr.components.Textbox(label='Input Text'),
117
  gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'),
118
  ],
119
+ outputs=[
120
+ gr.components.Textbox(label='Output', interactive = False),
121
+ gr.components.Textbox(label='Does it correct? fix me!', interactive = True),
122
+ ],
123
  examples=[
124
  ['Syed Saddiq berkata, mereka seharusnya mengingati bahawa semasa menjadi Perdana Menteri Pakatan Harapan', 'English'],
125
  ['SHAH ALAM - Pertubuhan Kebajikan Anak Bersatu Selangor bersetuju pihak kerajaan mewujudkan Suruhanjaya Siasatan Diraja untuk menyiasat isu kartel daging.', 'English'],
 
133
  [code, 'Malay']
134
 
135
  ],
136
+ allow_flagging='manual',
137
+ flagging_options=['Wrong'],
138
+ flagging_callback=hf_writer,
139
  cache_examples=False,
140
  title='Malaysian NMT',
141
+ description=description,
142
  )
143
 
144
  demo.launch(server_name='0.0.0.0')