Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,14 @@
|
|
1 |
import os
|
2 |
-
import subprocess
|
3 |
import sys
|
4 |
import uuid
|
|
|
5 |
import gradio as gr
|
6 |
from pydub import AudioSegment
|
7 |
from TTS.api import TTS
|
8 |
|
9 |
-
#
|
10 |
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", )
|
11 |
-
# tts.to("cuda")
|
12 |
|
13 |
-
#
|
14 |
language_options = {
|
15 |
"English (en)": "en",
|
16 |
"Spanish (es)": "es",
|
@@ -39,6 +37,9 @@ other_language = {
|
|
39 |
"Philippine": "tgl"
|
40 |
}
|
41 |
|
|
|
|
|
|
|
42 |
def clean_audio(audio_path):
|
43 |
out_filename = f"output/cleaned_{uuid.uuid4()}.wav"
|
44 |
lowpass_highpass = "lowpass=8000,highpass=75,"
|
@@ -70,9 +71,9 @@ def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
|
70 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
71 |
output_file = "output/docout.wav"
|
72 |
os.makedirs("output", exist_ok=True)
|
73 |
-
tts_conversion.voice_conversion_to_file(wav_data, target_wav=voice_audio_path,
|
74 |
-
|
75 |
-
return
|
76 |
|
77 |
def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
78 |
output_file_xtts = "output/undocout.wav"
|
@@ -81,9 +82,9 @@ def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
|
81 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
82 |
output_file = "output/docout.wav"
|
83 |
os.makedirs("output", exist_ok=True)
|
84 |
-
tts_conversion.voice_conversion_to_file(output_file_xtts, target_wav=speaker_wav_path,
|
85 |
-
|
86 |
-
return
|
87 |
|
88 |
def get_language_code(selected_language):
|
89 |
if selected_language in language_options:
|
@@ -108,34 +109,53 @@ def process_speech(text, speaker_wav, selected_language, speed):
|
|
108 |
cleaned_wav_path = clean_audio(speaker_wav)
|
109 |
|
110 |
if selected_language in other_language:
|
111 |
-
|
112 |
else:
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
-
def generate_lipsync(video_path, audio_path, pad_top, pad_bottom, pad_left, pad_right, no_smooth, save_as_video):
|
118 |
output_dir = "outputs"
|
119 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
120 |
output_path = os.path.join(output_dir, "output.mp4")
|
|
|
121 |
|
122 |
args = [
|
123 |
-
"--checkpoint_path", "checkpoints/
|
124 |
-
"--segmentation_path", "checkpoints/face_segmentation.pth",
|
125 |
-
"--no_seg",
|
126 |
-
"--no_sr",
|
127 |
"--face", video_path,
|
128 |
"--audio", audio_path,
|
129 |
"--outfile", output_path,
|
130 |
-
"--resize_factor",
|
131 |
-
"--face_det_batch_size", "4",
|
132 |
-
"--wav2lip_batch_size", "64",
|
133 |
"--fps", "30",
|
134 |
"--pads", str(pad_top), str(pad_bottom), str(pad_left), str(pad_right)
|
135 |
]
|
136 |
|
137 |
if no_smooth:
|
138 |
args.append("--nosmooth")
|
|
|
139 |
if save_as_video:
|
140 |
args.append("--save_as_video")
|
141 |
|
@@ -152,26 +172,17 @@ def generate_lipsync(video_path, audio_path, pad_top, pad_bottom, pad_left, pad_
|
|
152 |
return "Не удалось создать выходное видео."
|
153 |
|
154 |
print(f"Выходной файл создан по пути: {output_path}")
|
155 |
-
return output_path
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
result = generate_lipsync(video_path, audio_path, pad_top, pad_bottom, pad_left, pad_right, no_smooth, save_as_video)
|
165 |
-
return result
|
166 |
-
|
167 |
-
with gr.Blocks() as demo:
|
168 |
-
gr.Markdown("# Объединение Voice Clone и Lipsync")
|
169 |
-
|
170 |
-
with gr.Row():
|
171 |
-
with gr.Column():
|
172 |
-
gr.Markdown("### Шаг 1: Настройки синтеза речи")
|
173 |
text_input = gr.Textbox(label="Введите текст для генерации", placeholder="Введите ваш текст здесь...")
|
174 |
-
speaker_wav_input = gr.Audio(label="Загрузите аудио говорящего (WAV формат)", type="filepath")
|
175 |
|
176 |
all_languages = list(language_options.keys()) + list(other_language.keys())
|
177 |
language_input = gr.Dropdown(
|
@@ -189,37 +200,56 @@ with gr.Blocks() as demo:
|
|
189 |
info="Выберите скорость"
|
190 |
)
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
if __name__ == "__main__":
|
225 |
launch_gradio()
|
|
|
1 |
import os
|
|
|
2 |
import sys
|
3 |
import uuid
|
4 |
+
import subprocess
|
5 |
import gradio as gr
|
6 |
from pydub import AudioSegment
|
7 |
from TTS.api import TTS
|
8 |
|
9 |
+
# Импорт необходимых модулей для обеих функций
|
|
|
|
|
10 |
|
11 |
+
# Глобальные переменные и настройки
|
12 |
language_options = {
|
13 |
"English (en)": "en",
|
14 |
"Spanish (es)": "es",
|
|
|
37 |
"Philippine": "tgl"
|
38 |
}
|
39 |
|
40 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
41 |
+
|
42 |
+
# Функции для голосового клонирования
|
43 |
def clean_audio(audio_path):
|
44 |
out_filename = f"output/cleaned_{uuid.uuid4()}.wav"
|
45 |
lowpass_highpass = "lowpass=8000,highpass=75,"
|
|
|
71 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
72 |
output_file = "output/docout.wav"
|
73 |
os.makedirs("output", exist_ok=True)
|
74 |
+
converted_audio = tts_conversion.voice_conversion_to_file(wav_data, target_wav=voice_audio_path,
|
75 |
+
file_path=output_file)
|
76 |
+
return converted_audio
|
77 |
|
78 |
def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
79 |
output_file_xtts = "output/undocout.wav"
|
|
|
82 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
83 |
output_file = "output/docout.wav"
|
84 |
os.makedirs("output", exist_ok=True)
|
85 |
+
converted_audio = tts_conversion.voice_conversion_to_file(output_file_xtts, target_wav=speaker_wav_path,
|
86 |
+
file_path=output_file)
|
87 |
+
return converted_audio
|
88 |
|
89 |
def get_language_code(selected_language):
|
90 |
if selected_language in language_options:
|
|
|
109 |
cleaned_wav_path = clean_audio(speaker_wav)
|
110 |
|
111 |
if selected_language in other_language:
|
112 |
+
return synthesize_and_convert_voice(text, language_code, cleaned_wav_path, speed)
|
113 |
else:
|
114 |
+
return synthesize_speech(text, cleaned_wav_path, language_code, speed)
|
115 |
+
|
116 |
+
def restart_program():
|
117 |
+
python = sys.executable
|
118 |
+
os.execl(python, python, *sys.argv)
|
119 |
+
|
120 |
+
# Функции для липсинка
|
121 |
+
def generate(video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bottom, pad_left, pad_right, save_as_video):
|
122 |
+
if video is None or audio is None or checkpoint is None:
|
123 |
+
return "Пожалуйста, загрузите видео/изображение и аудио файл, а также выберите чекпойнт."
|
124 |
|
125 |
+
print(f"Текущая рабочая директория: {os.getcwd()}")
|
126 |
+
print(f"Содержимое текущей директории: {os.listdir('.')}")
|
127 |
+
print(f"Проверка наличия 'inference.py': {os.path.exists('inference.py')}")
|
128 |
+
|
129 |
+
video_path = video # Путь к видео или изображению
|
130 |
+
audio_path = audio # Путь к аудио
|
131 |
+
|
132 |
+
print(f"Путь к видео: {video_path}")
|
133 |
+
print(f"Путь к аудио: {audio_path}")
|
134 |
|
|
|
135 |
output_dir = "outputs"
|
136 |
os.makedirs(output_dir, exist_ok=True)
|
137 |
+
|
138 |
output_path = os.path.join(output_dir, "output.mp4")
|
139 |
+
print(f"Путь к выходному файлу: {output_path}")
|
140 |
|
141 |
args = [
|
142 |
+
"--checkpoint_path", f"checkpoints/{checkpoint}.pth",
|
143 |
+
"--segmentation_path", "checkpoints/face_segmentation.pth",
|
144 |
+
"--no_seg",
|
145 |
+
"--no_sr",
|
146 |
"--face", video_path,
|
147 |
"--audio", audio_path,
|
148 |
"--outfile", output_path,
|
149 |
+
"--resize_factor", str(resize_factor),
|
150 |
+
"--face_det_batch_size", "4",
|
151 |
+
"--wav2lip_batch_size", "64",
|
152 |
"--fps", "30",
|
153 |
"--pads", str(pad_top), str(pad_bottom), str(pad_left), str(pad_right)
|
154 |
]
|
155 |
|
156 |
if no_smooth:
|
157 |
args.append("--nosmooth")
|
158 |
+
|
159 |
if save_as_video:
|
160 |
args.append("--save_as_video")
|
161 |
|
|
|
172 |
return "Не удалось создать выходное видео."
|
173 |
|
174 |
print(f"Выходной файл создан по пути: {output_path}")
|
175 |
+
return output_path # Возвращаем путь к выходному видео
|
176 |
|
177 |
+
# Создание Gradio интерфейса с вкладками
|
178 |
+
with gr.Blocks() as app:
|
179 |
+
gr.Markdown("# Voice Clone Union")
|
180 |
|
181 |
+
with gr.Tabs():
|
182 |
+
with gr.TabItem("Voice Clone"):
|
183 |
+
# Интерфейс для голосового клонирования
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
text_input = gr.Textbox(label="Введите текст для генерации", placeholder="Введите ваш текст здесь...")
|
185 |
+
speaker_wav_input = gr.Audio(label="Загрузите аудио файла говорящего (WAV формат)", type="filepath")
|
186 |
|
187 |
all_languages = list(language_options.keys()) + list(other_language.keys())
|
188 |
language_input = gr.Dropdown(
|
|
|
200 |
info="Выберите скорость"
|
201 |
)
|
202 |
|
203 |
+
output_audio = gr.Audio(label="Сгенерированное аудио", type="filepath")
|
204 |
+
|
205 |
+
with gr.Row():
|
206 |
+
synthesize_button = gr.Button("Сгенерировать")
|
207 |
+
gr.HTML("<div style='width:300px;'></div>")
|
208 |
+
reload_button = gr.Button("Перезапустить")
|
209 |
+
|
210 |
+
synthesize_button.click(
|
211 |
+
fn=process_speech,
|
212 |
+
inputs=[text_input, speaker_wav_input, language_input, speed_input],
|
213 |
+
outputs=output_audio
|
214 |
+
)
|
215 |
+
|
216 |
+
reload_button.click(fn=restart_program, inputs=None, outputs=None)
|
217 |
+
|
218 |
+
with gr.TabItem("Lipsync"):
|
219 |
+
# Интерфейс для липсинка
|
220 |
+
gr.Markdown("## Lipsync")
|
221 |
+
with gr.Row():
|
222 |
+
video = gr.File(label="Видео или Изображение", type="filepath")
|
223 |
+
audio = gr.File(label="Аудио", type="filepath")
|
224 |
+
with gr.Column():
|
225 |
+
checkpoint = gr.Radio(["wav2lip", "wav2lip_gan"], label="Чекпойнт", value="wav2lip_gan", visible=False)
|
226 |
+
no_smooth = gr.Checkbox(label="Без сглаживания", value=False)
|
227 |
+
resize_factor = gr.Slider(minimum=1, maximum=4, step=1, label="Фактор изменения размера", value=2)
|
228 |
+
with gr.Row():
|
229 |
+
with gr.Column():
|
230 |
+
pad_top = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ сверху")
|
231 |
+
pad_bottom = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Отступ снизу")
|
232 |
+
pad_left = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ слева")
|
233 |
+
pad_right = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ справа")
|
234 |
+
save_as_video = gr.Checkbox(label="Сохранять как видео", value=True)
|
235 |
+
generate_btn = gr.Button("Сгенерировать")
|
236 |
+
with gr.Column():
|
237 |
+
result = gr.Video(label="Результат")
|
238 |
+
|
239 |
+
generate_btn.click(
|
240 |
+
generate,
|
241 |
+
inputs=[video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bottom, pad_left, pad_right, save_as_video],
|
242 |
+
outputs=result,
|
243 |
+
# concurrency_limit=30
|
244 |
+
)
|
245 |
+
|
246 |
+
def launch_gradio():
|
247 |
+
app.launch(
|
248 |
+
share="True" in sys.argv,
|
249 |
+
inbrowser="--open" in sys.argv,
|
250 |
+
server_port=8600,
|
251 |
+
server_name="0.0.0.0",
|
252 |
+
)
|
253 |
|
254 |
if __name__ == "__main__":
|
255 |
launch_gradio()
|