nisheeth commited on
Commit
6c1c798
·
verified ·
1 Parent(s): 4aecf52

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +5 -5
  2. app.py +120 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: MachineTranslation
3
- emoji: 👀
4
  colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 4.22.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Ep
3
+ emoji: 🏢
4
  colorFrom: pink
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 3.27.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import time
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
+
7
+ codes_as_string = '''Assamese asm_Beng
8
+ Awadhi awa_Deva
9
+ Bengali ben_Beng
10
+ Bhojpuri bho_Deva
11
+ Standard Tibetan bod_Tibt
12
+ Dzongkha dzo_Tibt
13
+ English eng_Latn
14
+ Gujarati guj_Gujr
15
+ Hindi hin_Deva
16
+ Chhattisgarhi hne_Deva
17
+ Kannada kan_Knda
18
+ Kashmiri (Arabic script) kas_Arab
19
+ Kashmiri (Devanagari script) kas_Deva
20
+ Mizo lus_Latn
21
+ Magahi mag_Deva
22
+ Maithili mai_Deva
23
+ Malayalam mal_Mlym
24
+ Marathi mar_Deva
25
+ Meitei (Bengali script) mni_Beng
26
+ Burmese mya_Mymr
27
+ Nepali npi_Deva
28
+ Odia ory_Orya
29
+ Punjabi pan_Guru
30
+ Sanskrit san_Deva
31
+ Santali sat_Olck
32
+ Sindhi snd_Arab
33
+ Tamil tam_Taml
34
+ Telugu tel_Telu
35
+ Urdu urd_Arab
36
+ Vietnamese vie_Latn'''
37
+
38
+
39
+
40
+ def load_models():
41
+ # build model and tokenizer
42
+ model_name_dict = {
43
+ 'nllb-1.3B': "ychenNLP/nllb-200-distilled-1.3B-easyproject",
44
+ }
45
+
46
+ model_dict = {}
47
+
48
+ for call_name, real_name in model_name_dict.items():
49
+ print('\tLoading model: %s' % call_name)
50
+ model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
51
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
52
+ model_dict[call_name+'_model'] = model
53
+ model_dict[call_name+'_tokenizer'] = tokenizer
54
+
55
+ return model_dict
56
+
57
+
58
+ def translation(source, target, text):
59
+ if len(model_dict) == 2:
60
+ model_name = 'nllb-1.3B'
61
+
62
+ start_time = time.time()
63
+ source = flores_codes[source]
64
+ target = flores_codes[target]
65
+
66
+ model = model_dict[model_name + '_model']
67
+ tokenizer = model_dict[model_name + '_tokenizer']
68
+
69
+ translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
70
+ output = translator(text, max_length=400)
71
+
72
+ end_time = time.time()
73
+
74
+ full_output = output
75
+ output = output[0]['translation_text']
76
+ # result = {'inference_time': end_time - start_time,
77
+ # 'source': source,
78
+ # 'target': target,
79
+ # 'result': output,
80
+ # 'full_output': full_output}
81
+ return output
82
+
83
+
84
+ if __name__ == '__main__':
85
+ print('\tinit models')
86
+ codes_as_string = codes_as_string.split('\n')
87
+
88
+ flores_codes = {}
89
+ for code in codes_as_string:
90
+ lang, lang_code = code.split('\t')
91
+ flores_codes[lang] = lang_code
92
+
93
+ global model_dict
94
+
95
+ model_dict = load_models()
96
+
97
+ # define gradio demo
98
+ lang_codes = list(flores_codes.keys())
99
+
100
+ inputs = [gr.inputs.Dropdown(lang_codes, default='English', label='Source'),
101
+ gr.inputs.Dropdown(lang_codes, default='Hindi', label='Target'),
102
+ gr.inputs.Textbox(lines=5, label="Input text"),
103
+ ]
104
+
105
+ outputs = gr.inputs.Textbox(label="Output text")
106
+
107
+ title = "Machine Translation Demo"
108
+
109
+ demo_status = "Machine Translation System."
110
+ description = f"{demo_status}"
111
+
112
+ gr.Interface(translation,
113
+ inputs,
114
+ outputs,
115
+ title=title,
116
+ description=description,
117
+ examples=examples,
118
+ examples_per_page=50,
119
+ theme="JohnSmith9982/small_and_pretty"
120
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ gradio
3
+ torch