Spaces:

DhakkiTikki
/

v2vtranslator

Runtime error

File size: 5,140 Bytes

import gradio as gr
import whisper
from translate import Translator
from TTS.api import TTS
import uuid
import os
from pathlib import Path
import gc
import torch

os.environ["COQUI_TOS_AGREED"] = "1"

model = whisper.load_model("base")
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")

output_dir = "output_audio"
os.makedirs(output_dir, exist_ok=True)

def transcribeaudio(audiofile):
    print("Transcribing audio...")
    tresult = model.transcribe(audiofile)

    if "text" not in tresult:
        print("Transcription failed.")
        return {"status": "error", "error": "Transcription failed"}

    audio = whisper.load_audio(audiofile)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    _, probs = model.detect_language(mel)
    detected_language = max(probs, key=probs.get)
    print(f"Detected language: {detected_language}")

    return {"text": tresult["text"], "language": detected_language}

def translatetext(text, source_lang, target_lang):
    try:
        translator = Translator(from_lang=source_lang, to_lang=target_lang)
        translated_text = translator.translate(text)
        print(f"Translated text: {translated_text}")
        return translated_text
    except Exception as e:
        print(f"Error translating to {target_lang}: {str(e)}")
        return f"Error: Could not translate to {target_lang}"

def readtranslation(text, audiofile, language):
    output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
    print(f"Generating TTS for text: {text}")
    tts.tts_to_file(text=text, file_path=output_path, speaker_wav=audiofile, language=language)
    print(f"Generated audio file at: {output_path}")
    return output_path

def v2vtranslate(audiofile, selected_lang,COQUI_TOS_AGREED, progress=gr.Progress()):

  if COQUI_TOS_AGREED == True:

    progress(0, desc="Starting process...")
    try:
        progress(0.2, desc="Transcribing audio...")
        transcription_result = transcribeaudio(audiofile)

        if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
            raise gr.Error(transcription_result["error"])

        text = transcription_result["text"]
        detected_language = transcription_result["language"]

        progress(0.4, desc="Translating text...")
        translated_text = translatetext(text, detected_language, selected_lang)

        progress(0.7, desc="Generating audio...")
        audio_path = readtranslation(translated_text, audiofile, selected_lang)

        progress(1.0, desc="Process complete!")
        return audio_path, translated_text
    except Exception as e:
        raise gr.Error(f"An error occurred: {str(e)}")
    finally:
        cleanup_memory()

  else:
        gr.Warning("Please accept the Terms & Condition!")
        return (
            None,
            None,
            None,
            None,
        )

with gr.Blocks() as demo:
    gr.Markdown("## Record yourself in any language and immediately receive voice translations.")

    with gr.Row():
        with gr.Column():
            
            audio_input = gr.Audio(
                sources=["microphone"],
                type="filepath",
                show_download_button=True,
                max_length=15,
                label="Record your voice",
                waveform_options=gr.WaveformOptions(
                  waveform_color="#01C6FF",
                  waveform_progress_color="#0066B4",
                  skip_length=2,
                  show_controls=False,)
                )
            language_gr = gr.Dropdown(
                label="Language",
                info="Select an output language for the synthesised speech",
                choices=[
                    "en",
                    "es",
                    "fr",
                    "de",
                    "it",
                    "pt",
                    "pl",
                    "tr",
                    "ru",
                    "nl",
                    "cs",
                    "ar",
                    "zh-cn",
                    "ja",
                    "ko",
                    "hu",
                    "hi"
                ],
                max_choices=1,
                value="es",
            )
            tos_gr = gr.Checkbox(
                label="Agree",
                value=False,
                info="I agree to the terms of the CPML: https://coqui.ai/cpml",
            )
            submit = gr.Button("Submit", variant="primary")
            reset = gr.Button("Reset")

    with gr.Row():
        output_audio = gr.Audio(label="Translated Audio", interactive=False)
        output_text = gr.Markdown()

    output_components = [output_audio, output_text]

    submit.click(fn=v2vtranslate, inputs=[audio_input, language_gr,tos_gr], outputs=output_components, show_progress=True)
    reset.click(fn=lambda: None, inputs=None, outputs=output_components + [audio_input])

    def cleanup_memory():
        gc.collect()
        torch.cuda.empty_cache()
        print("Memory cleaned up")

if __name__ == "__main__":
    demo.launch()