vteam27 commited on
Commit
79868fd
1 Parent(s): ef6d6f0

Added upload button

Browse files
Files changed (2) hide show
  1. app.py +9 -7
  2. lang_list.py +1 -93
app.py CHANGED
@@ -16,7 +16,9 @@ processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
16
  # print(translated_text_from_text)
17
 
18
 
19
- def run_t2tt(input_text: str, source_language: str, target_language: str) -> str:
 
 
20
  source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
21
  target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
22
  text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
@@ -25,17 +27,17 @@ def run_t2tt(input_text: str, source_language: str, target_language: str) -> str
25
  return str(output)
26
 
27
 
28
-
29
  with gr.Blocks() as demo_t2tt:
30
  with gr.Row():
31
  with gr.Column():
32
  with gr.Group():
 
33
  input_text = gr.Textbox(label="Input text")
34
  with gr.Row():
35
  source_language = gr.Dropdown(
36
  label="Source language",
37
  choices=TEXT_SOURCE_LANGUAGE_NAMES,
38
- value="English",
39
  )
40
  target_language = gr.Dropdown(
41
  label="Target language",
@@ -49,7 +51,7 @@ with gr.Blocks() as demo_t2tt:
49
  gr.Examples(
50
  examples=[
51
  [
52
- "The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since inde- pendence",
53
  "English",
54
  "Punjabi",
55
  ],
@@ -69,17 +71,17 @@ with gr.Blocks() as demo_t2tt:
69
  "English",
70
  ],
71
  ],
72
- inputs=[input_text, source_language, target_language],
73
  outputs=output_text,
74
  fn=run_t2tt,
75
- cache_examples=True,
76
  api_name=False,
77
  )
78
 
79
  gr.on(
80
  triggers=[input_text.submit, btn.click],
81
  fn=run_t2tt,
82
- inputs=[input_text, source_language, target_language],
83
  outputs=output_text,
84
  api_name="t2tt",
85
  )
 
16
  # print(translated_text_from_text)
17
 
18
 
19
+ def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> str:
20
+ if file_uploader is not None:
21
+ input_text = file_uploader.read().decode("utf-8")
22
  source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
23
  target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
24
  text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
 
27
  return str(output)
28
 
29
 
 
30
  with gr.Blocks() as demo_t2tt:
31
  with gr.Row():
32
  with gr.Column():
33
  with gr.Group():
34
+ file_uploader = gr.File(label="Upload a text file (Optional)", type="txt")
35
  input_text = gr.Textbox(label="Input text")
36
  with gr.Row():
37
  source_language = gr.Dropdown(
38
  label="Source language",
39
  choices=TEXT_SOURCE_LANGUAGE_NAMES,
40
+ value="Punjabi",
41
  )
42
  target_language = gr.Dropdown(
43
  label="Target language",
 
51
  gr.Examples(
52
  examples=[
53
  [
54
+ "The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since independence",
55
  "English",
56
  "Punjabi",
57
  ],
 
71
  "English",
72
  ],
73
  ],
74
+ inputs=[file_uploader ,input_text, source_language, target_language],
75
  outputs=output_text,
76
  fn=run_t2tt,
77
+ cache_examples=False,
78
  api_name=False,
79
  )
80
 
81
  gr.on(
82
  triggers=[input_text.submit, btn.click],
83
  fn=run_t2tt,
84
+ inputs=[file_uploader, input_text, source_language, target_language],
85
  outputs=output_text,
86
  api_name="t2tt",
87
  )
lang_list.py CHANGED
@@ -108,101 +108,9 @@ LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
108
  # Source langs: S2ST / S2TT / ASR don't need source lang
109
  # T2TT / T2ST use this
110
  text_source_language_codes = [
111
- "afr",
112
- "amh",
113
- "arb",
114
- "ary",
115
- "arz",
116
- "asm",
117
- "azj",
118
- "bel",
119
- "ben",
120
- "bos",
121
- "bul",
122
- "cat",
123
- "ceb",
124
- "ces",
125
- "ckb",
126
- "cmn",
127
- "cym",
128
- "dan",
129
- "deu",
130
- "ell",
131
- "eng",
132
- "est",
133
- "eus",
134
- "fin",
135
- "fra",
136
- "gaz",
137
- "gle",
138
- "glg",
139
- "guj",
140
- "heb",
141
  "hin",
142
- "hrv",
143
- "hun",
144
- "hye",
145
- "ibo",
146
- "ind",
147
- "isl",
148
- "ita",
149
- "jav",
150
- "jpn",
151
- "kan",
152
- "kat",
153
- "kaz",
154
- "khk",
155
- "khm",
156
- "kir",
157
- "kor",
158
- "lao",
159
- "lit",
160
- "lug",
161
- "luo",
162
- "lvs",
163
- "mai",
164
- "mal",
165
- "mar",
166
- "mkd",
167
- "mlt",
168
- "mni",
169
- "mya",
170
- "nld",
171
- "nno",
172
- "nob",
173
- "npi",
174
- "nya",
175
- "ory",
176
  "pan",
177
- "pbt",
178
- "pes",
179
- "pol",
180
- "por",
181
- "ron",
182
- "rus",
183
- "slk",
184
- "slv",
185
- "sna",
186
- "snd",
187
- "som",
188
- "spa",
189
- "srp",
190
- "swe",
191
- "swh",
192
- "tam",
193
- "tel",
194
- "tgk",
195
- "tgl",
196
- "tha",
197
- "tur",
198
- "ukr",
199
- "urd",
200
- "uzn",
201
- "vie",
202
- "yor",
203
- "yue",
204
- "zsm",
205
- "zul",
206
  ]
207
  TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
208
 
 
108
  # Source langs: S2ST / S2TT / ASR don't need source lang
109
  # T2TT / T2ST use this
110
  text_source_language_codes = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  "hin",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  "pan",
113
+ "eng",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  ]
115
  TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
116