kollis commited on
Commit
c51ea2c
1 Parent(s): 8c6b576

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+
5
+ from transformers import AutoProcessor, SeamlessM4TModel
6
+ processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
7
+ model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
8
+ model.to('cuda')
9
+
10
+ language_dict = {
11
+ "Modern Standard Arabic" : "arb",
12
+ "Bengali" : "ben",
13
+ "Catalan" : "cat",
14
+ "Czech" : "ces",
15
+ "Mandarin Chinese" : "cmn",
16
+ "Welsh" : "cym",
17
+ "Danish" : "dan",
18
+ "German" : "deu",
19
+ "English" : "eng",
20
+ "Estonian" : "est",
21
+ "Finnish" : "fin",
22
+ "French" : "fra",
23
+ "Hindi" : "hin",
24
+ "Indonesian" : "ind",
25
+ "Italian" : "ita",
26
+ "Japanese" : "jpn",
27
+ "Korean" : "kor",
28
+ "Maltese" : "mlt",
29
+ "Dutch" : "nld",
30
+ "Western Persian" : "pes",
31
+ "Polish" : "pol",
32
+ "Portuguese" : "por",
33
+ "Romanian" : "ron",
34
+ "Russian" : "rus",
35
+ "Slovak" : "slk",
36
+ "Spanish" : "spa",
37
+ "Swedish" : "swe",
38
+ "Swahili" : "swh",
39
+ "Telugu" : "tel",
40
+ "Tagalog" : "tgl",
41
+ "Thai" : "tha",
42
+ "Turkish" : "tur",
43
+ "Ukrainian" : "ukr",
44
+ "Urdu" : "urd",
45
+ "Northern Uzbek" : "uzn",
46
+ "Vietnamese" : "vie"
47
+ }
48
+ languages = list(language_dict.keys())
49
+
50
+
51
+ def png(source_lang,target_lang,audio,text):
52
+
53
+ source_lang_code = language_dict[source_lang]
54
+ target_lang_code = language_dict[target_lang]
55
+
56
+ if audio == None:
57
+ processed_inputs = processor(text, src_lang=source_lang_code, return_tensors="pt")
58
+ else:
59
+ sample_rate, audio_data = audio
60
+ audio_tokens = torch.from_numpy(audio_data).to(torch.device("cuda"))
61
+ audio_tokens = audio_tokens.to(torch.float32)
62
+ audio_tokens = torchaudio.functional.resample(audio_tokens, orig_freq=sample_rate, new_freq=16_000)
63
+ audio_tokens = audio_tokens.cpu()
64
+ processed_inputs = processor(audios=audio_tokens, sampling_rate=16000, return_tensors="pt")
65
+
66
+
67
+ processed_inputs = processed_inputs.to("cuda")
68
+ generated_audio = model.generate(**processed_inputs, tgt_lang=target_lang_code)[0].cpu().numpy().squeeze()
69
+ output_tokens = model.generate(**processed_inputs, tgt_lang=target_lang_code, generate_speech=False)
70
+ generated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
71
+
72
+ return (16000,generated_audio),generated_text
73
+
74
+ iface = gr.Interface(
75
+ png,
76
+ inputs=[
77
+ gr.Dropdown(languages, label="Source Language"),
78
+ gr.Dropdown(languages, label="Target Language"),
79
+ gr.Audio(),
80
+ gr.Textbox(label="Enter Text in Source Language")
81
+ ],
82
+ outputs=[
83
+ gr.Audio(label = "Translated Audio"),
84
+ gr.Textbox(label="Translated Text")
85
+ ],
86
+ title="Language Translation App",
87
+ description="Select source and target languages for translation.",
88
+ )
89
+
90
+ iface.launch(debug=True)
91
+
92
+