Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -49,6 +49,8 @@ other_language = {
|
|
49 |
|
50 |
# Инициализация модели TTS
|
51 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
|
|
|
|
52 |
|
53 |
# Функции для голосового клонирования
|
54 |
def check_audio_length(audio_path, max_duration=120):
|
@@ -64,8 +66,9 @@ def check_audio_length(audio_path, max_duration=120):
|
|
64 |
return False
|
65 |
|
66 |
def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
67 |
-
# Синтез речи с помощью TTS
|
68 |
tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
|
|
|
|
|
69 |
wav_data = tts_synthesis.tts(text, speed=speed)
|
70 |
|
71 |
# Преобразование wav_data из списка в NumPy массив с типом float32
|
@@ -100,7 +103,7 @@ def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
|
100 |
torchaudio.save(temp_denoised_wav_path, denoised_wav_tensor.unsqueeze(0).cpu(), denoised_sample_rate)
|
101 |
|
102 |
# Преобразование голоса с использованием денойзенного аудио
|
103 |
-
|
104 |
|
105 |
# Подготовка временного выходного файла
|
106 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
@@ -167,10 +170,7 @@ def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
|
167 |
torchaudio.save(temp_vc_input_path, wav_tensor.cpu(), sample_rate)
|
168 |
|
169 |
# Инициализация модели voice conversion
|
170 |
-
|
171 |
-
model_name="voice_conversion_models/multilingual/vctk/freevc24",
|
172 |
-
progress_bar=False
|
173 |
-
)
|
174 |
|
175 |
# Подготовка временного выходного файла
|
176 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
@@ -322,13 +322,21 @@ with gr.Blocks() as app:
|
|
322 |
gr.HTML("<div style='width:300px;'></div>")
|
323 |
reload_button = gr.Button("Перезапустить")
|
324 |
|
|
|
325 |
synthesize_button.click(
|
326 |
-
|
327 |
-
inputs=[text_input, speaker_wav_input, language_input, speed_input],
|
328 |
-
outputs=output_audio
|
|
|
329 |
)
|
330 |
|
331 |
-
reload_button
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
with gr.TabItem("Lipsync"):
|
334 |
# Интерфейс для липсинка
|
@@ -352,9 +360,10 @@ with gr.Blocks() as app:
|
|
352 |
result = gr.Video(label="Результат")
|
353 |
|
354 |
generate_btn.click(
|
355 |
-
generate,
|
356 |
inputs=[video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bottom, pad_left, pad_right, save_as_video],
|
357 |
outputs=result,
|
|
|
358 |
)
|
359 |
|
360 |
def launch_gradio():
|
|
|
49 |
|
50 |
# Инициализация модели TTS
|
51 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
52 |
+
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
53 |
+
|
54 |
|
55 |
# Функции для голосового клонирования
|
56 |
def check_audio_length(audio_path, max_duration=120):
|
|
|
66 |
return False
|
67 |
|
68 |
def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
|
|
69 |
tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
|
70 |
+
# Синтез речи с помощью TTS
|
71 |
+
|
72 |
wav_data = tts_synthesis.tts(text, speed=speed)
|
73 |
|
74 |
# Преобразование wav_data из списка в NumPy массив с типом float32
|
|
|
103 |
torchaudio.save(temp_denoised_wav_path, denoised_wav_tensor.unsqueeze(0).cpu(), denoised_sample_rate)
|
104 |
|
105 |
# Преобразование голоса с использованием денойзенного аудио
|
106 |
+
|
107 |
|
108 |
# Подготовка временного выходного файла
|
109 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
|
|
170 |
torchaudio.save(temp_vc_input_path, wav_tensor.cpu(), sample_rate)
|
171 |
|
172 |
# Инициализация модели voice conversion
|
173 |
+
|
|
|
|
|
|
|
174 |
|
175 |
# Подготовка временного выходного файла
|
176 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
|
|
322 |
gr.HTML("<div style='width:300px;'></div>")
|
323 |
reload_button = gr.Button("Перезапустить")
|
324 |
|
325 |
+
# Corrected: Bind synthesize_button to process_speech
|
326 |
synthesize_button.click(
|
327 |
+
process_speech, # Function to call
|
328 |
+
inputs=[text_input, speaker_wav_input, language_input, speed_input], # Inputs for voice synthesis
|
329 |
+
outputs=output_audio, # Output audio
|
330 |
+
queue=False # Optional: Disable queueing
|
331 |
)
|
332 |
|
333 |
+
# Bind reload_button to restart_program
|
334 |
+
reload_button.click(
|
335 |
+
fn=restart_program,
|
336 |
+
inputs=None,
|
337 |
+
outputs=None,
|
338 |
+
queue=False
|
339 |
+
)
|
340 |
|
341 |
with gr.TabItem("Lipsync"):
|
342 |
# Интерфейс для липсинка
|
|
|
360 |
result = gr.Video(label="Результат")
|
361 |
|
362 |
generate_btn.click(
|
363 |
+
generate, # Function to call for Lipsync
|
364 |
inputs=[video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bottom, pad_left, pad_right, save_as_video],
|
365 |
outputs=result,
|
366 |
+
queue=False # Optional: Disable queueing
|
367 |
)
|
368 |
|
369 |
def launch_gradio():
|