irmtou's picture
Edited README.md and removed flagging button
64d3e15
raw
history blame
No virus
3.56 kB
import gradio as gr
import whisper
from translate import Translator
import os
os.environ["COQUI_TOS_AGREED"] = "1"
# Loading the base model
model = whisper.load_model("base")
def speech_to_text(audio_file):
"""
:param audio_file: Input audio file
:return: result["text"] - The transcribed text after processing the input
"""
result = model.transcribe(audio_file)
print(result["text"])
return result["text"] # Only first tuple
# Defining the Translate Function
def translate(text, language):
"""
:param text: Input text to be translated
:param language: Language choice
:return: translated_text - The translated text
"""
translator = Translator(to_lang=language)
translated_text = translator.translate(text)
return translated_text
# Initialize TTS model outside the function to avoid reinitialization on each call
from TTS.api import TTS
tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
# Speech to Speech Function
def s2s(audio, language):
"""
:param audio:
:param language:
:return:
"""
print(audio)
# Load the audio file from the file path
result_text = speech_to_text(audio)
translated_text = translate(result_text, language)
# Generate speech using the input audio as the speaker's voice
tts_model.tts_to_file(text=translated_text,
file_path="output.wav",
speaker_wav=audio,
language=language)
return [result_text, translated_text, "output.wav"]
# List of supported language codes
language_names = ["Arabic", "Portuguese", "Chinese", "Czech", "Dutch",
"English", "French", "German", "Italian", "Polish",
"Russian", "Spanish", "Turkish", "Korean",
"Hungarian", "Hindi"]
language_options = ["ar", "pt", "zh-cn", "cs", "nl", "en", "fr", "de",
"it", "pl", "ru", "es", "tr", "ko", "hu", "hi"]
language_dropdown = gr.Dropdown(choices=zip(language_names, language_options),
value="es",
label="Target Language",
)
translate_button = gr.Button(value="Synthesize and Translate my Voice!")
transcribed_text = gr.Textbox(label="Transcribed Text")
output_text = gr.Textbox(label="Translated Text")
output_speech = gr.Audio(label="Synthesized Audio", type="filepath")
# Gradio interface with the transcribe function as the main function
demo = gr.Interface(
fn=s2s,
inputs=[gr.Audio(sources=["upload", "microphone"],
type="filepath",
format='wav',
# value="Original Audio",
show_download_button=True,
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#FF69B4",
skip_length=2,
show_controls=False,
)
),
language_dropdown],
outputs=[transcribed_text, output_text, output_speech],
theme=gr.themes.Soft(),
title="Speech Translation Synthesis",
description="This speech-to-speech translator uses the Whisper model for speech-to-text "
"transcription, the translate library for translation, and the Coqui TTS model for text-to-speech "
"synthesis.",
allow_flagging="never"
)
demo.launch(debug=True, share=True)