frogcho123 commited on
Commit
36bec1c
1 Parent(s): 7a02c16

Add application file

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import whisper
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+ from gtts import gTTS
6
+ from tempfile import NamedTemporaryFile
7
+
8
+ # Define translation function
9
+ def translate_audio(input_file, target_language):
10
+ # Save uploaded audio file to a temporary file
11
+ with NamedTemporaryFile(suffix=".wav") as temp_audio:
12
+ temp_audio.write(input_file.read())
13
+ temp_audio.seek(0)
14
+
15
+ # Auto to text (STT)
16
+ model = whisper.load_model("base")
17
+ audio = whisper.load_audio(temp_audio.name)
18
+ audio = whisper.pad_or_trim(audio)
19
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
20
+ _, probs = model.detect_language(mel)
21
+ options = whisper.DecodingOptions()
22
+ result = whisper.decode(model, mel, options)
23
+ text = result.text
24
+ lang = max(probs, key=probs.get)
25
+
26
+ # Translate
27
+ tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
28
+ model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
29
+ tokenizer.src_lang = target_language
30
+ encoded_bg = tokenizer(text, return_tensors="pt")
31
+ generated_tokens = model.generate(**encoded_bg)
32
+ translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
33
+
34
+ # Text-to-audio (TTS)
35
+ tts = gTTS(text=translated_text, lang=target_language)
36
+ output_file = NamedTemporaryFile(suffix=".mp3", delete=False)
37
+ output_file.close()
38
+ tts.save(output_file.name)
39
+ return output_file.name
40
+
41
+ # Define Gradio interface
42
+ inputs = [
43
+ gr.File(label="Upload Audio File"),
44
+ gr.Dropdown(choices=['en', 'es', 'fr', 'de', 'ru'], label="Target Language")
45
+ ]
46
+
47
+ outputs = [
48
+ gr.File(label="Translated Audio")
49
+ ]
50
+
51
+ title = "Audio Translation"
52
+ description = "Upload an audio file, translate the speech to a target language, and download the translated audio."
53
+
54
+ gr.Interface(fn=translate_audio, inputs=inputs, outputs=outputs, title=title, description=description).launch(share=True)