Spaces:
Runtime error
Runtime error
File size: 5,140 Bytes
3d13e83 6853642 3d13e83 36bb9b3 3d13e83 bdc5baa 3d13e83 4995b83 bdc5baa 7fe9462 36bb9b3 3d13e83 36bb9b3 814a890 36bb9b3 814a890 3d13e83 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 36bb9b3 814a890 3d13e83 36bb9b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
import whisper
from translate import Translator
from TTS.api import TTS
import uuid
import os
from pathlib import Path
import gc
import torch
os.environ["COQUI_TOS_AGREED"] = "1"
model = whisper.load_model("base")
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
output_dir = "output_audio"
os.makedirs(output_dir, exist_ok=True)
def transcribeaudio(audiofile):
print("Transcribing audio...")
tresult = model.transcribe(audiofile)
if "text" not in tresult:
print("Transcription failed.")
return {"status": "error", "error": "Transcription failed"}
audio = whisper.load_audio(audiofile)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
detected_language = max(probs, key=probs.get)
print(f"Detected language: {detected_language}")
return {"text": tresult["text"], "language": detected_language}
def translatetext(text, source_lang, target_lang):
try:
translator = Translator(from_lang=source_lang, to_lang=target_lang)
translated_text = translator.translate(text)
print(f"Translated text: {translated_text}")
return translated_text
except Exception as e:
print(f"Error translating to {target_lang}: {str(e)}")
return f"Error: Could not translate to {target_lang}"
def readtranslation(text, audiofile, language):
output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
print(f"Generating TTS for text: {text}")
tts.tts_to_file(text=text, file_path=output_path, speaker_wav=audiofile, language=language)
print(f"Generated audio file at: {output_path}")
return output_path
def v2vtranslate(audiofile, selected_lang,COQUI_TOS_AGREED, progress=gr.Progress()):
if COQUI_TOS_AGREED == True:
progress(0, desc="Starting process...")
try:
progress(0.2, desc="Transcribing audio...")
transcription_result = transcribeaudio(audiofile)
if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
raise gr.Error(transcription_result["error"])
text = transcription_result["text"]
detected_language = transcription_result["language"]
progress(0.4, desc="Translating text...")
translated_text = translatetext(text, detected_language, selected_lang)
progress(0.7, desc="Generating audio...")
audio_path = readtranslation(translated_text, audiofile, selected_lang)
progress(1.0, desc="Process complete!")
return audio_path, translated_text
except Exception as e:
raise gr.Error(f"An error occurred: {str(e)}")
finally:
cleanup_memory()
else:
gr.Warning("Please accept the Terms & Condition!")
return (
None,
None,
None,
None,
)
with gr.Blocks() as demo:
gr.Markdown("## Record yourself in any language and immediately receive voice translations.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
show_download_button=True,
max_length=15,
label="Record your voice",
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,)
)
language_gr = gr.Dropdown(
label="Language",
info="Select an output language for the synthesised speech",
choices=[
"en",
"es",
"fr",
"de",
"it",
"pt",
"pl",
"tr",
"ru",
"nl",
"cs",
"ar",
"zh-cn",
"ja",
"ko",
"hu",
"hi"
],
max_choices=1,
value="es",
)
tos_gr = gr.Checkbox(
label="Agree",
value=False,
info="I agree to the terms of the CPML: https://coqui.ai/cpml",
)
submit = gr.Button("Submit", variant="primary")
reset = gr.Button("Reset")
with gr.Row():
output_audio = gr.Audio(label="Translated Audio", interactive=False)
output_text = gr.Markdown()
output_components = [output_audio, output_text]
submit.click(fn=v2vtranslate, inputs=[audio_input, language_gr,tos_gr], outputs=output_components, show_progress=True)
reset.click(fn=lambda: None, inputs=None, outputs=output_components + [audio_input])
def cleanup_memory():
gc.collect()
torch.cuda.empty_cache()
print("Memory cleaned up")
if __name__ == "__main__":
demo.launch()
|