qgyd2021 commited on
Commit
68eb545
·
1 Parent(s): 665d723

[update]add main

Browse files
Files changed (5) hide show
  1. .gitignore +8 -0
  2. README.md +2 -2
  3. main.py +79 -0
  4. project_settings.py +12 -0
  5. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ .git/
3
+ .idea/
4
+
5
+ flagged/
6
+ hub_datasets/
7
+
8
+ **/__pycache__/
README.md CHANGED
@@ -4,8 +4,8 @@ emoji: ⚡
4
  colorFrom: red
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.44.4
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
 
4
  colorFrom: red
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.20.1
8
+ app_file: main.py
9
  pinned: false
10
  ---
11
 
main.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+
6
+ from project_settings import project_path
7
+
8
+ hf_hub_cache = (project_path / "cache/huggingface/hub").as_posix()
9
+
10
+ os.environ["HUGGINGFACE_HUB_CACHE"] = hf_hub_cache
11
+
12
+ import gradio as gr
13
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
14
+
15
+
16
+ def get_args():
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument(
19
+ "--pretrained_model_name_or_path",
20
+ default="facebook/m2m100_418M",
21
+ type=str
22
+ )
23
+ args = parser.parse_args()
24
+ return args
25
+
26
+
27
+ def main():
28
+ args = get_args()
29
+
30
+ model = M2M100ForConditionalGeneration.from_pretrained(args.pretrained_model_name_or_path)
31
+ tokenizer = M2M100Tokenizer.from_pretrained(args.pretrained_model_name_or_path)
32
+
33
+ def multilingual_translate(src_text: str,
34
+ src_lang: str,
35
+ tgt_lang: str,
36
+ ):
37
+ tokenizer.src_lang = src_lang
38
+ encoded_src = tokenizer(src_text, return_tensors="pt")
39
+ generated_tokens = model.generate(**encoded_src, forced_bos_token_id=tokenizer.get_lang_id(tgt_lang))
40
+ result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
41
+
42
+ return result
43
+
44
+ title = "Beyond English-Centric Multilingual Machine Translation"
45
+
46
+ description = "M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation. It was introduced in this [paper](https://arxiv.org/abs/2010.11125) and first released in [this](https://github.com/pytorch/fairseq/tree/master/examples/m2m_100) repository."
47
+
48
+ examples = [
49
+ [
50
+ "Hello world!",
51
+ "en",
52
+ "zh",
53
+ ],
54
+ ]
55
+
56
+ inputs = [
57
+ gr.Textbox(lines=4, value="", label="Input Text"),
58
+ gr.Textbox(lines=1, value="", label="Source Language"),
59
+ gr.Textbox(lines=4, value="", label="Target Language"),
60
+ ]
61
+
62
+ output = gr.outputs.Textbox(label="Output Text")
63
+
64
+ app = gr.Interface(
65
+ fn=multilingual_translate,
66
+ inputs=inputs,
67
+ outputs=output,
68
+ examples=examples,
69
+ title=title,
70
+ description=description,
71
+ cache_examples=True
72
+ )
73
+ app.launch(debug=True, enable_queue=True)
74
+
75
+ return
76
+
77
+
78
+ if __name__ == '__main__':
79
+ main()
project_settings.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ project_path = os.path.abspath(os.path.dirname(__file__))
8
+ project_path = Path(project_path)
9
+
10
+
11
+ if __name__ == '__main__':
12
+ pass
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio==3.20.1
2
+ transformers==4.30.2