Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import gradio as gr
|
4 |
+
import assemblyai as aai
|
5 |
+
from translate import Translator
|
6 |
+
import uuid
|
7 |
+
from elevenlabs import VoiceSettings
|
8 |
+
from elevenlabs.client import ElevenLabs
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
|
12 |
+
ELEVENLABS_API = os.environ.get("ELEVENLABS_API")
|
13 |
+
|
14 |
+
ASSEMBLYAI_API = os.environ.get("ASSEMBLYAI_API")
|
15 |
+
|
16 |
+
def voice_to_voice(audio_file):
|
17 |
+
transcript = transcribe_audio(audio_file)
|
18 |
+
if transcript.status == aai.TranscriptStatus.error:
|
19 |
+
raise gr.Error(transcript.error)
|
20 |
+
else:
|
21 |
+
transcript = transcript.text
|
22 |
+
|
23 |
+
list_translations = translate_text(transcript)
|
24 |
+
generated_audio_paths = []
|
25 |
+
|
26 |
+
for translation in list_translations:
|
27 |
+
translated_audio_file_name = text_to_speech(translation)
|
28 |
+
path = Path(translated_audio_file_name)
|
29 |
+
generated_audio_paths.append(path)
|
30 |
+
|
31 |
+
return tuple(generated_audio_paths + list_translations)
|
32 |
+
|
33 |
+
def transcribe_audio(audio_file):
|
34 |
+
aai.settings.api_key = ELEVENLABS_API
|
35 |
+
transcriber = aai.Transcriber()
|
36 |
+
transcript = transcriber.transcribe(audio_file)
|
37 |
+
return transcript
|
38 |
+
|
39 |
+
def translate_text(text):
|
40 |
+
languages = ["ru", "tr", "sv", "de", "es", "ja", "id"]
|
41 |
+
list_translations = []
|
42 |
+
|
43 |
+
for lan in languages:
|
44 |
+
translator = Translator(from_lang="en", to_lang=lan)
|
45 |
+
translation = translator.translate(text)
|
46 |
+
list_translations.append(translation)
|
47 |
+
|
48 |
+
return list_translations
|
49 |
+
|
50 |
+
def text_to_speech(text):
|
51 |
+
client = ElevenLabs(api_key=ELEVENLABS_API)
|
52 |
+
response = client.text_to_speech.convert(
|
53 |
+
voice_id="<your-voice-id>",
|
54 |
+
optimize_streaming_latency="0",
|
55 |
+
output_format="mp3_22050_32",
|
56 |
+
text=text,
|
57 |
+
model_id="eleven_multilingual_v2",
|
58 |
+
voice_settings=VoiceSettings(
|
59 |
+
stability=0.5,
|
60 |
+
similarity_boost=0.8,
|
61 |
+
style=0.5,
|
62 |
+
use_speaker_boost=True,
|
63 |
+
),
|
64 |
+
)
|
65 |
+
|
66 |
+
save_file_path = f"{uuid.uuid4()}.mp3"
|
67 |
+
with open(save_file_path, "wb") as f:
|
68 |
+
for chunk in response:
|
69 |
+
if chunk:
|
70 |
+
f.write(chunk)
|
71 |
+
|
72 |
+
return save_file_path
|
73 |
+
|
74 |
+
with gr.Blocks() as demo:
|
75 |
+
gr.Markdown("## audio Translator")
|
76 |
+
gr.Markdown(
|
77 |
+
f"""
|
78 |
+
The API Key you need:
|
79 |
+
(AssemblyAI API key)[https://www.assemblyai.com/?utm_source=youtube&utm_medium=referral&utm_campaign=yt_mis_66]<br>
|
80 |
+
(Elevenlabs API key)[https://elevenlabs.io/]<br>
|
81 |
+
Note: you need at least 30 minutes of a voice recording of yourself for the *Professional voice cloning. But there is also a simpler voice cloning option that only requires 30 seconds of voice recording. *Professional voice cloning is a paid feature.
|
82 |
+
|
83 |
+
"""
|
84 |
+
)
|
85 |
+
audio_input = gr.Audio(type="filepath", show_download_button=True)
|
86 |
+
submit = gr.Button("Submit", variant="primary")
|
87 |
+
clear_button = gr.ClearButton(audio_input, "Clear")
|
88 |
+
|
89 |
+
output_components = []
|
90 |
+
languages = ["Turkish", "Swedish", "Russian", "German", "Spanish", "Japanese", "indonesian"]
|
91 |
+
|
92 |
+
for lang in languages:
|
93 |
+
with gr.Group():
|
94 |
+
output_components.append(gr.Audio(label=lang, interactive=False))
|
95 |
+
output_components.append(gr.Markdown())
|
96 |
+
|
97 |
+
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
|
98 |
+
|
99 |
+
if __name__ == "__main__":
|
100 |
+
demo.launch()
|