Pranjal12345 commited on
Commit
d1e1d4f
1 Parent(s): 5dd794f

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +60 -0
  2. requirements.txt +4 -0
  3. utils.py +6 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from faster_whisper import WhisperModel
3
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
+ from utils import lang_ids
5
+
6
+ model_size = "medium"
7
+ ts_model = WhisperModel(model_size, device = "cpu", compute_type = "int8")
8
+
9
+ lang_list = list(lang_ids.keys())
10
+
11
+ def translate_audio(inputs,target_language):
12
+ if inputs is None:
13
+ raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
14
+
15
+ segments, _ = ts_model.transcribe(inputs, task="translate")
16
+
17
+ target_lang = lang_ids[target_language]
18
+
19
+ if target_language == 'English':
20
+ lst = ''
21
+ for segment in segments:
22
+ lst = lst + segment.text
23
+ return lst
24
+
25
+ else:
26
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
27
+ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
28
+
29
+ tokenizer.src_lang = "en_XX"
30
+ translated_text = ''
31
+
32
+ for segment in segments:
33
+ encoded_chunk = tokenizer(segment.text, return_tensors="pt")
34
+ generated_tokens = model.generate(
35
+
36
+ **encoded_chunk,
37
+ forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
38
+ )
39
+ translated_chunk = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
40
+ translated_text = translated_text + translated_chunk[0]
41
+ return translated_text
42
+
43
+ translation_interface = gr.Interface(
44
+ fn=translate_audio,
45
+ inputs=[
46
+ gr.inputs.Audio(source="upload", type="filepath", label="Audio file"),
47
+ gr.Dropdown(lang_list, value="English", label="Target Language"),
48
+ ],
49
+ outputs="text",
50
+ layout="horizontal",
51
+ theme="huggingface",
52
+ title="Translate Audio to English",
53
+ description=(
54
+ "Translate audio inputs to English using the"
55
+ ),
56
+ allow_flagging="never",
57
+ )
58
+
59
+ if __name__ == "__main__":
60
+ translation_interface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ faster_whisper
4
+ requests
utils.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ lang_ids = {
2
+ "English": "en_XX",
3
+ "French": "fr_XX",
4
+ "Spanish": "es_XX",
5
+ }
6
+