Spaces:
Sleeping
Sleeping
Ubuntu
commited on
Commit
·
da70d80
1
Parent(s):
893eb12
add speaker selection
Browse files
app.py
CHANGED
@@ -24,6 +24,14 @@ DEVELOPER_PASSWORD = os.getenv("DEV_PWD")
|
|
24 |
# Add this constant for the RapidAPI key
|
25 |
RAPID_API_KEY = os.getenv("RAPID_API_KEY")
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def fetch_youtube_id(youtube_url: str) -> str:
|
28 |
if 'v=' in youtube_url:
|
29 |
return youtube_url.split("v=")[1].split("&")[0]
|
@@ -108,7 +116,7 @@ def inference_via_llm_api(input_text, min_new_tokens=2, max_new_tokens=64):
|
|
108 |
else:
|
109 |
return "The system got some error during vLLM generation. Please try it again."
|
110 |
|
111 |
-
def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None):
|
112 |
if youtube_url:
|
113 |
audio = download_youtube_audio(youtube_url)
|
114 |
if not audio:
|
@@ -141,7 +149,7 @@ def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None):
|
|
141 |
tts_params = {
|
142 |
'language': target_lang,
|
143 |
'speed': 1.1,
|
144 |
-
'speaker':
|
145 |
'text': translated_text
|
146 |
}
|
147 |
|
@@ -156,8 +164,8 @@ def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None):
|
|
156 |
def check_password(password):
|
157 |
return password == DEVELOPER_PASSWORD
|
158 |
|
159 |
-
def run_speech_translation(audio, source_lang, target_lang, youtube_url):
|
160 |
-
transcription, translated_text, audio_url = transcribe_and_speak(audio, source_lang, target_lang, youtube_url)
|
161 |
|
162 |
return transcription, translated_text, audio_url
|
163 |
|
@@ -172,7 +180,8 @@ with gr.Blocks() as demo:
|
|
172 |
user_youtube_url = gr.Textbox(label="YouTube URL (optional)")
|
173 |
user_source_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Source Language", value="en")
|
174 |
user_target_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Target Language", value="zh")
|
175 |
-
|
|
|
176 |
with gr.Row():
|
177 |
user_button = gr.Button("Translate and Speak", interactive=False)
|
178 |
|
@@ -200,7 +209,7 @@ with gr.Blocks() as demo:
|
|
200 |
|
201 |
user_button.click(
|
202 |
fn=run_speech_translation,
|
203 |
-
inputs=[user_audio_input, user_source_lang, user_target_lang, user_youtube_url],
|
204 |
outputs=[user_transcription_output, user_translation_output, user_audio_output]
|
205 |
)
|
206 |
|
@@ -219,4 +228,13 @@ with gr.Blocks() as demo:
|
|
219 |
outputs=[user_video_output]
|
220 |
)
|
221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
|
|
|
24 |
# Add this constant for the RapidAPI key
|
25 |
RAPID_API_KEY = os.getenv("RAPID_API_KEY")
|
26 |
|
27 |
+
# Add this constant for available speakers
|
28 |
+
AVAILABLE_SPEAKERS = {
|
29 |
+
"en": ["MS"],
|
30 |
+
"ma": ["msFemale"],
|
31 |
+
"ta": ["ta_female1"],
|
32 |
+
"zh": ["childChinese2"]
|
33 |
+
}
|
34 |
+
|
35 |
def fetch_youtube_id(youtube_url: str) -> str:
|
36 |
if 'v=' in youtube_url:
|
37 |
return youtube_url.split("v=")[1].split("&")[0]
|
|
|
116 |
else:
|
117 |
return "The system got some error during vLLM generation. Please try it again."
|
118 |
|
119 |
+
def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None):
|
120 |
if youtube_url:
|
121 |
audio = download_youtube_audio(youtube_url)
|
122 |
if not audio:
|
|
|
149 |
tts_params = {
|
150 |
'language': target_lang,
|
151 |
'speed': 1.1,
|
152 |
+
'speaker': target_speaker or AVAILABLE_SPEAKERS[target_lang][0], # Use the first speaker as default
|
153 |
'text': translated_text
|
154 |
}
|
155 |
|
|
|
164 |
def check_password(password):
|
165 |
return password == DEVELOPER_PASSWORD
|
166 |
|
167 |
+
def run_speech_translation(audio, source_lang, target_lang, youtube_url, target_speaker):
|
168 |
+
transcription, translated_text, audio_url = transcribe_and_speak(audio, source_lang, target_lang, youtube_url, target_speaker)
|
169 |
|
170 |
return transcription, translated_text, audio_url
|
171 |
|
|
|
180 |
user_youtube_url = gr.Textbox(label="YouTube URL (optional)")
|
181 |
user_source_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Source Language", value="en")
|
182 |
user_target_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Target Language", value="zh")
|
183 |
+
user_target_speaker = gr.Dropdown(choices=[], label="Target Speaker")
|
184 |
+
|
185 |
with gr.Row():
|
186 |
user_button = gr.Button("Translate and Speak", interactive=False)
|
187 |
|
|
|
209 |
|
210 |
user_button.click(
|
211 |
fn=run_speech_translation,
|
212 |
+
inputs=[user_audio_input, user_source_lang, user_target_lang, user_youtube_url, user_target_speaker],
|
213 |
outputs=[user_transcription_output, user_translation_output, user_audio_output]
|
214 |
)
|
215 |
|
|
|
228 |
outputs=[user_video_output]
|
229 |
)
|
230 |
|
231 |
+
def update_target_speakers(target_lang):
|
232 |
+
return gr.Dropdown(choices=AVAILABLE_SPEAKERS[target_lang], value=AVAILABLE_SPEAKERS[target_lang][0])
|
233 |
+
|
234 |
+
user_target_lang.change(
|
235 |
+
fn=update_target_speakers,
|
236 |
+
inputs=[user_target_lang],
|
237 |
+
outputs=[user_target_speaker]
|
238 |
+
)
|
239 |
+
|
240 |
demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
|