dpc commited on
Commit
1c69775
1 Parent(s): 283f4e0

add more languages

Browse files
Files changed (2) hide show
  1. README.md +17 -20
  2. app.py +54 -12
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Vietnamese - English Translation
3
  emoji: 🐠
4
  colorFrom: yellow
5
  colorTo: gray
@@ -8,30 +8,27 @@ app_file: app.py
8
  pinned: false
9
  ---
10
 
11
- # Configuration
12
 
13
- `title`: _string_
14
- Display title for the Space
15
 
16
- `emoji`: _string_
17
- Space emoji (emoji-only character allowed)
18
 
19
- `colorFrom`: _string_
20
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
 
22
- `colorTo`: _string_
23
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
 
25
- `sdk`: _string_
26
- Can be either `gradio` or `streamlit`
 
 
 
 
 
 
27
 
28
- `sdk_version` : _string_
29
- Only applicable for `streamlit` SDK.
30
- See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31
 
32
- `app_file`: _string_
33
- Path to your main application file (which contains either `gradio` or `streamlit` Python code).
34
- Path is relative to the root of the repository.
35
 
36
- `pinned`: _boolean_
37
- Whether the Space stays on top of your list.
 
 
1
  ---
2
+ title: Text Translation
3
  emoji: 🐠
4
  colorFrom: yellow
5
  colorTo: gray
 
8
  pinned: false
9
  ---
10
 
11
+ ## Info
12
 
 
 
13
 
14
+ Using facebook/m2m100_1.2B pre-trained model
 
15
 
16
+ facebook/m2m100_1.2B supports 100 languages.
 
17
 
18
+ Here, this app uses/tests these languages only.
 
19
 
20
+ ```
21
+ Chinese(zh)
22
+ English(en)
23
+ Hindi(hi)
24
+ Japanese(ja)
25
+ Sinhalese(si)
26
+ Thai(th)
27
+ Vietnamese(vi)
28
 
29
+ ```
 
 
30
 
 
 
 
31
 
32
+ ## Read more:
33
+
34
+ https://huggingface.co/facebook/m2m100_1.2B
app.py CHANGED
@@ -3,30 +3,72 @@
3
 
4
  import gradio as gr
5
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
 
6
  model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
 
7
  tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
8
 
9
- # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
- # tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_1.2B")
11
- # model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_1.2B")
 
 
 
 
 
 
 
 
12
 
13
 
14
- def vi_en(Vietnamese_Text):
15
- tokenizer.src_lang = "vi"
16
- encoded_vi = tokenizer(Vietnamese_Text, return_tensors="pt")
 
 
17
  generated_tokens = model.generate(
18
- **encoded_vi, forced_bos_token_id=tokenizer.get_lang_id("en"))
 
19
  res = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
 
20
  return res[0]
21
  # if res:
22
- # return '\n'.join(res)
23
 
24
 
25
  iface = gr.Interface(
26
- fn=vi_en,
27
- title="Vietnamese to English",
28
- description="Using/testing facebook/m2m100_1.2B pre-trained model",
29
- inputs=gr.inputs.Textbox(lines=5, placeholder="Enter text in Vietnamese"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  outputs="text")
31
 
32
  iface.launch()
 
3
 
4
  import gradio as gr
5
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
6
+
7
  model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
8
+
9
  tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
10
 
11
+
12
+ this_description = '''
13
+ Using facebook/m2m100_1.2B pre-trained model. Language code:
14
+ Chinese(zh)
15
+ English(en)
16
+ Hindi(hi)
17
+ Japanese(ja)
18
+ Sinhalese(si)
19
+ Thai(th)
20
+ Vietnamese(vi)
21
+ '''
22
 
23
 
24
+ def m2m_translate(Input_Text, from_lang, to_lang):
25
+ tokenizer.src_lang = from_lang
26
+
27
+ encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
28
+
29
  generated_tokens = model.generate(
30
+ **encoded_from_lang, forced_bos_token_id=tokenizer.get_lang_id(to_lang))
31
+
32
  res = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
33
+
34
  return res[0]
35
  # if res:
36
+ # return '\n'.join(res)
37
 
38
 
39
  iface = gr.Interface(
40
+ fn=m2m_translate,
41
+
42
+ title="M2M100 Translation",
43
+ description=this_description,
44
+
45
+ inputs=[
46
+ gr.inputs.Textbox(lines=5, placeholder="Enter text"),
47
+
48
+ gr.inputs.Radio(
49
+ choices=[
50
+ 'zh',
51
+ 'en',
52
+ 'hi',
53
+ 'ja',
54
+ 'si',
55
+ 'th',
56
+ 'vi'],
57
+ default='vi',
58
+ label='From language'),
59
+
60
+ gr.inputs.Radio(
61
+ choices=[
62
+ 'zh',
63
+ 'en',
64
+ 'hi',
65
+ 'ja',
66
+ 'si',
67
+ 'th',
68
+ 'vi'],
69
+ default='en',
70
+ label='To language'),
71
+ ],
72
  outputs="text")
73
 
74
  iface.launch()