frogcho123 commited on
Commit
d84f405
·
1 Parent(s): 2b4cf3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+ from gtts import gTTS
6
+
7
+ def translate_voice(file, target_lang):
8
+ # Auto to text (STT)
9
+ model = whisper.load_model("base")
10
+ audio = whisper.load_audio(file.name)
11
+ audio = whisper.pad_or_trim(audio)
12
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
13
+ _, probs = model.detect_language(mel)
14
+
15
+ options = whisper.DecodingOptions()
16
+ result = whisper.decode(model, mel, options)
17
+
18
+ text = result.text
19
+ lang = max(probs, key=probs.get)
20
+
21
+ # Translate
22
+ tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
23
+ model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
24
+
25
+ tokenizer.src_lang = target_lang
26
+ encoded_bg = tokenizer(text, return_tensors="pt")
27
+ generated_tokens = model.generate(**encoded_bg)
28
+ translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
29
+
30
+ # Text-to-audio (TTS)
31
+ tts = gTTS(text=translated_text, lang=target_lang)
32
+ filename = "to_speech.mp3"
33
+ tts.save(filename)
34
+
35
+ return filename, text, translated_text, target_lang
36
+
37
+ iface = gr.Interface(
38
+ fn=translate_voice,
39
+ inputs=[
40
+ gr.inputs.File(type="audio", label="Your Audio"),
41
+ gr.inputs.Dropdown(choices=['en', 'ru', 'de', 'fr'], label="Target Language")
42
+ ],
43
+ outputs=[
44
+ gr.outputs.Audio(type="auto", label="Translated Audio"),
45
+ gr.outputs.Textbox(label="Original Text"),
46
+ gr.outputs.Textbox(label="Translated Text"),
47
+ gr.outputs.Textbox(label="Target Language"),
48
+ ]
49
+ )
50
+ iface.launch()