Spaces:
Runtime error
Runtime error
Pawan Kumar Pradhan
commited on
Commit
·
814a890
1
Parent(s):
7fe9462
update lang dropdown
Browse files
app.py
CHANGED
@@ -16,8 +16,6 @@ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
|
16 |
output_dir = "output_audio"
|
17 |
os.makedirs(output_dir, exist_ok=True)
|
18 |
|
19 |
-
|
20 |
-
|
21 |
def transcribeaudio(audiofile):
|
22 |
print("Transcribing audio...")
|
23 |
tresult = model.transcribe(audiofile)
|
@@ -36,102 +34,124 @@ def transcribeaudio(audiofile):
|
|
36 |
|
37 |
return {"text": tresult["text"], "language": detected_language}
|
38 |
|
39 |
-
def translatetext(text, source_lang):
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
print(f"{lang_name} Translation: {translated_text}")
|
49 |
-
except Exception as e:
|
50 |
-
print(f"Error translating to {lang_name}: {str(e)}")
|
51 |
-
translations[lang_code] = f"Error: Could not translate to {lang_name}"
|
52 |
-
|
53 |
-
return [translations[lang] for lang in ["es", "fr", "hi"]]
|
54 |
|
55 |
def readtranslation(text, audiofile, language):
|
56 |
output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
|
57 |
print(f"Generating TTS for text: {text}")
|
58 |
-
tts.tts_to_file(text=text,
|
59 |
-
file_path=output_path,
|
60 |
-
speaker_wav=audiofile,
|
61 |
-
language=language)
|
62 |
print(f"Generated audio file at: {output_path}")
|
63 |
return output_path
|
64 |
|
65 |
-
def
|
|
|
|
|
|
|
66 |
progress(0, desc="Starting process...")
|
67 |
try:
|
68 |
progress(0.2, desc="Transcribing audio...")
|
69 |
transcription_result = transcribeaudio(audiofile)
|
70 |
-
|
71 |
if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
|
72 |
raise gr.Error(transcription_result["error"])
|
73 |
-
|
74 |
text = transcription_result["text"]
|
75 |
detected_language = transcription_result["language"]
|
76 |
-
|
77 |
progress(0.4, desc="Translating text...")
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
progress((i + 1) * 0.1 + 0.5, desc=f"Generating {lang} audio...")
|
84 |
-
try:
|
85 |
-
audio_path = readtranslation(translation, audiofile, lang)
|
86 |
-
audio_paths.append(audio_path)
|
87 |
-
except Exception as e:
|
88 |
-
print(f"Error generating audio for {lang}: {str(e)}")
|
89 |
-
audio_paths.append(None)
|
90 |
-
|
91 |
progress(1.0, desc="Process complete!")
|
92 |
-
return
|
93 |
except Exception as e:
|
94 |
raise gr.Error(f"An error occurred: {str(e)}")
|
95 |
finally:
|
96 |
cleanup_memory()
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
with gr.Blocks() as demo:
|
99 |
gr.Markdown("## Record yourself in any language and immediately receive voice translations.")
|
|
|
100 |
with gr.Row():
|
101 |
with gr.Column():
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
with gr.Row():
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
|
129 |
-
|
130 |
-
def cleanup_memory():
|
131 |
-
gc.collect()
|
132 |
-
torch.cuda.empty_cache()
|
133 |
-
print("Memory cleaned up")
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
demo.launch()
|
137 |
-
cleanup_memory()
|
|
|
16 |
output_dir = "output_audio"
|
17 |
os.makedirs(output_dir, exist_ok=True)
|
18 |
|
|
|
|
|
19 |
def transcribeaudio(audiofile):
|
20 |
print("Transcribing audio...")
|
21 |
tresult = model.transcribe(audiofile)
|
|
|
34 |
|
35 |
return {"text": tresult["text"], "language": detected_language}
|
36 |
|
37 |
+
def translatetext(text, source_lang, target_lang):
|
38 |
+
try:
|
39 |
+
translator = Translator(from_lang=source_lang, to_lang=target_lang)
|
40 |
+
translated_text = translator.translate(text)
|
41 |
+
print(f"Translated text: {translated_text}")
|
42 |
+
return translated_text
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error translating to {target_lang}: {str(e)}")
|
45 |
+
return f"Error: Could not translate to {target_lang}"
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def readtranslation(text, audiofile, language):
|
48 |
output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
|
49 |
print(f"Generating TTS for text: {text}")
|
50 |
+
tts.tts_to_file(text=text, file_path=output_path, speaker_wav=audiofile, language=language)
|
|
|
|
|
|
|
51 |
print(f"Generated audio file at: {output_path}")
|
52 |
return output_path
|
53 |
|
54 |
+
def v2vtranslate(audiofile, selected_lang,COQUI_TOS_AGREED, progress=gr.Progress()):
|
55 |
+
|
56 |
+
if COQUI_TOS_AGREED == True:
|
57 |
+
|
58 |
progress(0, desc="Starting process...")
|
59 |
try:
|
60 |
progress(0.2, desc="Transcribing audio...")
|
61 |
transcription_result = transcribeaudio(audiofile)
|
62 |
+
|
63 |
if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
|
64 |
raise gr.Error(transcription_result["error"])
|
65 |
+
|
66 |
text = transcription_result["text"]
|
67 |
detected_language = transcription_result["language"]
|
68 |
+
|
69 |
progress(0.4, desc="Translating text...")
|
70 |
+
translated_text = translatetext(text, detected_language, selected_lang)
|
71 |
+
|
72 |
+
progress(0.7, desc="Generating audio...")
|
73 |
+
audio_path = readtranslation(translated_text, audiofile, selected_lang)
|
74 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
progress(1.0, desc="Process complete!")
|
76 |
+
return audio_path, translated_text
|
77 |
except Exception as e:
|
78 |
raise gr.Error(f"An error occurred: {str(e)}")
|
79 |
finally:
|
80 |
cleanup_memory()
|
81 |
|
82 |
+
else:
|
83 |
+
gr.Warning("Please accept the Terms & Condition!")
|
84 |
+
return (
|
85 |
+
None,
|
86 |
+
None,
|
87 |
+
None,
|
88 |
+
None,
|
89 |
+
)
|
90 |
+
|
91 |
with gr.Blocks() as demo:
|
92 |
gr.Markdown("## Record yourself in any language and immediately receive voice translations.")
|
93 |
+
|
94 |
with gr.Row():
|
95 |
with gr.Column():
|
96 |
+
|
97 |
+
audio_input = gr.Audio(
|
98 |
+
sources=["microphone"],
|
99 |
+
type="filepath",
|
100 |
+
show_download_button=True,
|
101 |
+
max_length=15,
|
102 |
+
label="Record your voice",
|
103 |
+
waveform_options=gr.WaveformOptions(
|
104 |
+
waveform_color="#01C6FF",
|
105 |
+
waveform_progress_color="#0066B4",
|
106 |
+
skip_length=2,
|
107 |
+
show_controls=False,)
|
108 |
+
)
|
109 |
+
language_gr = gr.Dropdown(
|
110 |
+
label="Language",
|
111 |
+
info="Select an output language for the synthesised speech",
|
112 |
+
choices=[
|
113 |
+
"en",
|
114 |
+
"es",
|
115 |
+
"fr",
|
116 |
+
"de",
|
117 |
+
"it",
|
118 |
+
"pt",
|
119 |
+
"pl",
|
120 |
+
"tr",
|
121 |
+
"ru",
|
122 |
+
"nl",
|
123 |
+
"cs",
|
124 |
+
"ar",
|
125 |
+
"zh-cn",
|
126 |
+
"ja",
|
127 |
+
"ko",
|
128 |
+
"hu",
|
129 |
+
"hi"
|
130 |
+
],
|
131 |
+
max_choices=1,
|
132 |
+
value="es",
|
133 |
+
)
|
134 |
+
tos_gr = gr.Checkbox(
|
135 |
+
label="Agree",
|
136 |
+
value=False,
|
137 |
+
info="I agree to the terms of the CPML: https://coqui.ai/cpml",
|
138 |
+
)
|
139 |
+
submit = gr.Button("Submit", variant="primary")
|
140 |
+
reset = gr.Button("Reset")
|
141 |
|
142 |
with gr.Row():
|
143 |
+
output_audio = gr.Audio(label="Translated Audio", interactive=False)
|
144 |
+
output_text = gr.Markdown()
|
145 |
+
|
146 |
+
output_components = [output_audio, output_text]
|
147 |
+
|
148 |
+
submit.click(fn=v2vtranslate, inputs=[audio_input, language_gr,tos_gr], outputs=output_components, show_progress=True)
|
149 |
+
reset.click(fn=lambda: None, inputs=None, outputs=output_components + [audio_input])
|
150 |
+
|
151 |
+
def cleanup_memory():
|
152 |
+
gc.collect()
|
153 |
+
torch.cuda.empty_cache()
|
154 |
+
print("Memory cleaned up")
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
demo.launch()
|
|