Spaces:

ik
/

twi-ewe-mss-tss

Sleeping

File size: 3,117 Bytes

import gradio as gr
import speech_recognition as sr
from ttsmms import TTS
from deep_translator import GoogleTranslator

# Initialize the TTS model for Ewe and Twi languages
ewe = TTS("data/ewe")
twi = TTS("data/aka")

# Create a list of supported languages and their corresponding TTS models
langs = [{"lang": 'ewe', "tts": ewe}, {"lang": 'twi', "tts": twi}]


# Function to convert speech to text using Google's speech recognition API
def speech_to_text(audio_file):
    r = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = r.record(source)
    try:
        text = r.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        return None
    except sr.RequestError:
        print("Speech recognition service unavailable.")
        return None


# Function to convert text to speech
def text_to_speech(text, lang):
    # Find the selected language in the list of supported languages
    selected_lang = next((lang_item for lang_item in langs if lang_item["lang"] == lang), None)
    if selected_lang is None:
        raise ValueError(f"Language '{lang}' is not supported.")
    selected_tts = selected_lang["tts"]
    # Translate the text to the selected language using Google Translator
    translated = GoogleTranslator(source='auto', target=lang).translate(text)
    wav_path = "output.wav"
    # Generate speech synthesis and save it as a WAV file
    selected_tts.synthesis(translated, wav_path=wav_path)
    return wav_path, translated


# Function to handle the speech to text app
def speech_to_text_app(audio_file):
    text = speech_to_text(audio_file)
    return text if text else "Unable to transcribe audio."


# Function to handle the text to speech output
def text_to_speech_output(text, lang):
    wav_path, translated = text_to_speech(text, lang)
    return wav_path,translated


# Function to handle the speech to text and text to speech app
def speech_to_text_and_tts_app(lang_input, audio_file, text_input):
    if audio_file:
        print("Converting audio to text:", audio_file)
        text = speech_to_text(audio_file)
        wav_path, translates = text_to_speech_output(text, lang_input)
        return translates, wav_path
    else:
        wav_path, translates = text_to_speech_output(text_input, lang_input)
        return translates, wav_path


# Define the Gradio interface inputs and outputs
audio_input = gr.inputs.Audio(source="microphone", type="filepath", label="Record Audio")
text_input = gr.inputs.Textbox(label="Enter your text here")
lang_input = gr.inputs.Dropdown(choices=[lang["lang"] for lang in langs], label="Language")
output_text = gr.outputs.Textbox(label="Transcription")
output_audio = gr.outputs.Audio(label="Text-to-Speech Audio", type='filepath')

# Create the Gradio interface
interface = gr.Interface(
    fn=speech_to_text_and_tts_app,
    inputs=[lang_input, audio_input, text_input],
    outputs=[output_text, output_audio],
    title="English to Twi - Ewe Speech Generator(MMS TTS)",
    description="Translate English to Twi and Ewe Language(from Ghana)"
)

# Launch the interface
interface.launch()