Spaces:
Runtime error
Runtime error
File size: 1,965 Bytes
d7dfa49 da5250a c5a94f2 d7dfa49 726d965 da5250a 726d965 da5250a d7dfa49 726d965 d7dfa49 726d965 d7dfa49 da5250a 726d965 da5250a 726d965 da5250a d7dfa49 da5250a 726d965 da5250a d7dfa49 da5250a c5a94f2 da5250a d7dfa49 da5250a b2604a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
import os
from openai_whisper import whisper
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
# Load models
model_stt = whisper.load_model("base")
model_translation = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
tokenizer_translation = AutoTokenizer.from_pretrained("alirezamsh/small100")
def speech_to_speech(input_audio, to_lang):
# Save the uploaded audio file
input_file = "input_audio" + os.path.splitext(input_audio.name)[1]
input_audio.save(input_file)
# Speech-to-Text (STT)
audio = whisper.load_audio(input_file)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model_stt.device)
_, probs = model_stt.detect_language(mel)
options = whisper.DecodingOptions()
result = whisper.decode(model_stt, mel, options)
text = result.text
lang = max(probs, key=probs.get)
# Translate
tokenizer_translation.src_lang = lang
tokenizer_translation.tgt_lang = to_lang
encoded_bg = tokenizer_translation(text, return_tensors="pt")
generated_tokens = model_translation.generate(**encoded_bg)
translated_text = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# Text-to-Speech (TTS)
tts = gTTS(text=translated_text, lang=to_lang)
output_file = "output_audio.mp3"
tts.save(output_file)
return output_file
languages = ["ru", "fr", "es", "de"] # Example languages: Russian, French, Spanish, German
file_input = gr.inputs.File(label="Upload Audio")
dropdown = gr.inputs.Dropdown(languages, label="Translation Language")
audio_output = gr.outputs.Audio(type="file", label="Translated Voice")
gr.Interface(fn=speech_to_speech, inputs=[file_input, dropdown], outputs=audio_output, title="Speech-to-Speech Translator", description="Upload an audio file (MP3, WAV, or FLAC) and choose the target language for translation.", theme="default").launch()
|