Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
|
4 |
+
import os
|
5 |
+
import shutil
|
6 |
+
from huggingface_hub import snapshot_download
|
7 |
+
import gradio as gr
|
8 |
+
from gradio_client import Client, handle_file
|
9 |
+
from mutagen.mp3 import MP3
|
10 |
+
from pydub import AudioSegment
|
11 |
+
from PIL import Image
|
12 |
+
import ffmpeg
|
13 |
+
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
14 |
+
from scripts.inference import inference_process
|
15 |
+
import argparse
|
16 |
+
import uuid
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
#hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
|
21 |
+
|
22 |
+
AUDIO_MAX_DURATION = 40000
|
23 |
+
|
24 |
+
#############
|
25 |
+
# UTILITIES #
|
26 |
+
#############
|
27 |
+
|
28 |
+
def is_mp3(file_path):
|
29 |
+
try:
|
30 |
+
audio = MP3(file_path)
|
31 |
+
return True
|
32 |
+
except Exception as e:
|
33 |
+
return False
|
34 |
+
|
35 |
+
def convert_mp3_to_wav(mp3_file_path, wav_file_path):
|
36 |
+
# Load the MP3 file
|
37 |
+
audio = AudioSegment.from_mp3(mp3_file_path)
|
38 |
+
# Export as WAV file
|
39 |
+
audio.export(wav_file_path, format="wav")
|
40 |
+
return wav_file_path
|
41 |
+
|
42 |
+
|
43 |
+
def trim_audio(file_path, output_path, max_duration):
|
44 |
+
# Load the audio file
|
45 |
+
audio = AudioSegment.from_wav(file_path)
|
46 |
+
|
47 |
+
# Check the length of the audio in milliseconds
|
48 |
+
audio_length = len(audio)
|
49 |
+
|
50 |
+
# If the audio is longer than the maximum duration, trim it
|
51 |
+
if audio_length > max_duration:
|
52 |
+
trimmed_audio = audio[:max_duration]
|
53 |
+
else:
|
54 |
+
trimmed_audio = audio
|
55 |
+
|
56 |
+
# Export the trimmed audio to a new file
|
57 |
+
trimmed_audio.export(output_path, format="wav")
|
58 |
+
|
59 |
+
return output_path
|
60 |
+
|
61 |
+
|
62 |
+
def add_silence_to_wav(wav_file_path, duration_s=1):
|
63 |
+
# Load the WAV file
|
64 |
+
audio = AudioSegment.from_wav(wav_file_path)
|
65 |
+
# Create 1 second of silence
|
66 |
+
silence = AudioSegment.silent(duration=duration_s * 1000) # duration is in milliseconds
|
67 |
+
# Add silence to the end of the audio file
|
68 |
+
audio_with_silence = audio + silence
|
69 |
+
# Export the modified audio
|
70 |
+
audio_with_silence.export(wav_file_path, format="wav")
|
71 |
+
return wav_file_path
|
72 |
+
|
73 |
+
def check_mp3(file_path):
|
74 |
+
|
75 |
+
if is_mp3(file_path):
|
76 |
+
unique_id = uuid.uuid4()
|
77 |
+
wav_file_path = f"{os.path.splitext(file_path)[0]}-{unique_id}.wav"
|
78 |
+
converted_audio = convert_mp3_to_wav(file_path, wav_file_path)
|
79 |
+
print(f"File converted to {wav_file_path}")
|
80 |
+
|
81 |
+
return converted_audio, gr.update(value=converted_audio, visible=True)
|
82 |
+
else:
|
83 |
+
print("The file is not an MP3 file.")
|
84 |
+
|
85 |
+
return file_path, gr.update(value=file_path, visible=True)
|
86 |
+
|
87 |
+
def check_and_convert_webp_to_png(input_path, output_path):
|
88 |
+
try:
|
89 |
+
# Open the image file
|
90 |
+
with Image.open(input_path) as img:
|
91 |
+
# Check if the image is in WebP format
|
92 |
+
if img.format == 'WEBP':
|
93 |
+
# Convert and save as PNG
|
94 |
+
img.save(output_path, 'PNG')
|
95 |
+
print(f"Converted {input_path} to {output_path}")
|
96 |
+
return output_path
|
97 |
+
else:
|
98 |
+
print(f"The file {input_path} is not in WebP format.")
|
99 |
+
return input_path
|
100 |
+
except IOError:
|
101 |
+
print(f"Cannot open {input_path}. The file might not exist or is not an image.")
|
102 |
+
|
103 |
+
def convert_user_uploded_webp(input_path):
|
104 |
+
|
105 |
+
# convert to png if necessary
|
106 |
+
input_file = input_path
|
107 |
+
unique_id = uuid.uuid4()
|
108 |
+
output_file = f"converted_to_png_portrait-{unique_id}.png"
|
109 |
+
ready_png = check_and_convert_webp_to_png(input_file, output_file)
|
110 |
+
print(f"PORTRAIT PNG FILE: {ready_png}")
|
111 |
+
return ready_png
|
112 |
+
|
113 |
+
def clear_audio_elms():
|
114 |
+
return gr.update(value=None, visible=False)
|
115 |
+
|
116 |
+
def change_video_codec(input_file, output_file, codec='libx264', audio_codec='aac'):
|
117 |
+
try:
|
118 |
+
(
|
119 |
+
ffmpeg
|
120 |
+
.input(input_file)
|
121 |
+
.output(output_file, vcodec=codec, acodec=audio_codec)
|
122 |
+
.run(overwrite_output=True)
|
123 |
+
)
|
124 |
+
print(f'Successfully changed codec of {input_file} and saved as {output_file}')
|
125 |
+
except ffmpeg.Error as e:
|
126 |
+
print(f'Error occurred: {e.stderr.decode()}')
|
127 |
+
|
128 |
+
|
129 |
+
|
130 |
+
def get_talk(image_in, speech):
|
131 |
+
client = Client("fffiloni/dreamtalk")
|
132 |
+
result = client.predict(
|
133 |
+
audio_input=handle_file(speech),
|
134 |
+
image_path=handle_file(image_in),
|
135 |
+
emotional_style="M030_front_neutral_level1_001.mat",
|
136 |
+
api_name="/infer"
|
137 |
+
)
|
138 |
+
print(result)
|
139 |
+
return result['video']
|
140 |
+
|
141 |
+
#######################################################
|
142 |
+
# Gradio APIs for optional image and voice generation #
|
143 |
+
#######################################################
|
144 |
+
|
145 |
+
def generate_portrait(prompt_image):
|
146 |
+
if prompt_image is None or prompt_image == "":
|
147 |
+
raise gr.Error("Can't generate a portrait without a prompt !")
|
148 |
+
|
149 |
+
try:
|
150 |
+
client = Client("ByteDance/SDXL-Lightning")
|
151 |
+
except:
|
152 |
+
raise gr.Error(f"ByteDance/SDXL-Lightning space's api might not be ready, please wait, or upload an image instead.")
|
153 |
+
|
154 |
+
result = client.predict(
|
155 |
+
prompt = prompt_image,
|
156 |
+
ckpt = "4-Step",
|
157 |
+
api_name = "/generate_image"
|
158 |
+
)
|
159 |
+
print(result)
|
160 |
+
|
161 |
+
# convert to png if necessary
|
162 |
+
input_file = result
|
163 |
+
unique_id = uuid.uuid4()
|
164 |
+
output_file = f"converted_to_png_portrait-{unique_id}.png"
|
165 |
+
ready_png = check_and_convert_webp_to_png(input_file, output_file)
|
166 |
+
print(f"PORTRAIT PNG FILE: {ready_png}")
|
167 |
+
|
168 |
+
return ready_png
|
169 |
+
|
170 |
+
def generate_voice_with_parler(prompt_audio, voice_description):
|
171 |
+
if prompt_audio is None or prompt_audio == "" :
|
172 |
+
raise gr.Error(f"Can't generate a voice without text to synthetize !")
|
173 |
+
if voice_description is None or voice_description == "":
|
174 |
+
gr.Info(
|
175 |
+
"For better control, You may want to provide a voice character description next time.",
|
176 |
+
duration = 10,
|
177 |
+
visible = True
|
178 |
+
)
|
179 |
+
try:
|
180 |
+
client = Client("parler-tts/parler_tts_mini")
|
181 |
+
except:
|
182 |
+
raise gr.Error(f"parler-tts/parler_tts_mini space's api might not be ready, please wait, or upload an audio instead.")
|
183 |
+
|
184 |
+
result = client.predict(
|
185 |
+
text = prompt_audio,
|
186 |
+
description = voice_description,
|
187 |
+
api_name = "/gen_tts"
|
188 |
+
)
|
189 |
+
print(result)
|
190 |
+
return result, gr.update(value=result, visible=True)
|
191 |
+
|
192 |
+
def get_whisperspeech(prompt_audio_whisperspeech, audio_to_clone):
|
193 |
+
try:
|
194 |
+
client = Client("collabora/WhisperSpeech")
|
195 |
+
except:
|
196 |
+
raise gr.Error(f"collabora/WhisperSpeech space's api might not be ready, please wait, or upload an audio instead.")
|
197 |
+
|
198 |
+
result = client.predict(
|
199 |
+
multilingual_text = prompt_audio_whisperspeech,
|
200 |
+
speaker_audio = handle_file(audio_to_clone),
|
201 |
+
speaker_url = "",
|
202 |
+
cps = 14,
|
203 |
+
api_name = "/whisper_speech_demo"
|
204 |
+
)
|
205 |
+
print(result)
|
206 |
+
return result, gr.update(value=result, visible=True)
|
207 |
+
|
208 |
+
|
209 |
+
########################
|
210 |
+
# TALKING PORTRAIT GEN #
|
211 |
+
########################
|
212 |
+
|
213 |
+
|
214 |
+
|
215 |
+
def pipe (voice, image_in):
|
216 |
+
|
217 |
+
talking_portrait_vid = get_talk(portrait, ready_audio)
|
218 |
+
|
219 |
+
# Convert video to readable format
|
220 |
+
|
221 |
+
final_output_file = f"converted_{talking_portrait_vid}"
|
222 |
+
change_video_codec(talking_portrait_vid, final_output_file)
|
223 |
+
|
224 |
+
return final_output_file
|
225 |
+
|
226 |
+
|
227 |
+
|
228 |
+
|
229 |
+
|
230 |
+
css = '''
|
231 |
+
#col-container {
|
232 |
+
margin: 0 auto;
|
233 |
+
}
|
234 |
+
#column-names {
|
235 |
+
margin-top: 50px;
|
236 |
+
}
|
237 |
+
#main-group {
|
238 |
+
background-color: none;
|
239 |
+
}
|
240 |
+
.tabs {
|
241 |
+
background-color: unset;
|
242 |
+
}
|
243 |
+
#image-block {
|
244 |
+
flex: 1;
|
245 |
+
}
|
246 |
+
#video-block {
|
247 |
+
flex: 9;
|
248 |
+
}
|
249 |
+
#audio-block, #audio-clone-elm {
|
250 |
+
flex: 1;
|
251 |
+
}
|
252 |
+
div#audio-clone-elm > .audio-container > button {
|
253 |
+
height: 180px!important;
|
254 |
+
}
|
255 |
+
div#audio-clone-elm > .audio-container > button > .wrap {
|
256 |
+
font-size: 0.9em;
|
257 |
+
}
|
258 |
+
#text-synth, #voice-desc{
|
259 |
+
height: 130px;
|
260 |
+
}
|
261 |
+
#text-synth-wsp {
|
262 |
+
height: 120px;
|
263 |
+
}
|
264 |
+
#audio-column, #result-column {
|
265 |
+
display: flex;
|
266 |
+
}
|
267 |
+
#gen-voice-btn {
|
268 |
+
flex: 1;
|
269 |
+
}
|
270 |
+
#parler-tab, #whisperspeech-tab {
|
271 |
+
padding: 0;
|
272 |
+
}
|
273 |
+
#main-submit{
|
274 |
+
flex: 1;
|
275 |
+
}
|
276 |
+
#pro-tips {
|
277 |
+
margin-top: 50px;
|
278 |
+
}
|
279 |
+
div#warning-ready {
|
280 |
+
background-color: #ecfdf5;
|
281 |
+
padding: 0 16px 16px;
|
282 |
+
margin: 20px 0;
|
283 |
+
color: #030303!important;
|
284 |
+
}
|
285 |
+
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
|
286 |
+
color: #057857!important;
|
287 |
+
}
|
288 |
+
div#warning-duplicate {
|
289 |
+
background-color: #ebf5ff;
|
290 |
+
padding: 0 16px 16px;
|
291 |
+
margin: 20px 0;
|
292 |
+
color: #030303!important;
|
293 |
+
}
|
294 |
+
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
|
295 |
+
color: #0f4592!important;
|
296 |
+
}
|
297 |
+
div#warning-duplicate strong {
|
298 |
+
color: #0f4592;
|
299 |
+
}
|
300 |
+
p.actions {
|
301 |
+
display: flex;
|
302 |
+
align-items: center;
|
303 |
+
margin: 20px 0;
|
304 |
+
}
|
305 |
+
div#warning-duplicate .actions a {
|
306 |
+
display: inline-block;
|
307 |
+
margin-right: 10px;
|
308 |
+
}
|
309 |
+
.dark #warning-duplicate {
|
310 |
+
background-color: #0c0c0c !important;
|
311 |
+
border: 1px solid white !important;
|
312 |
+
}
|
313 |
+
'''
|
314 |
+
|
315 |
+
with gr.Blocks(css=css) as demo:
|
316 |
+
with gr.Column(elem_id="col-container"):
|
317 |
+
gr.Markdown("""
|
318 |
+
# CPS - 584 Deep Learning Project by Vignesh Yanamalamanda and Srija Tatineni
|
319 |
+
|
320 |
+
This can be achieved with the help of several open-source model: Stable Diffusiion XL Lightning | Parler TextToSpeec | WhisperSpeech | Hallo
|
321 |
+
|
322 |
+
|
323 |
+
Thanks to Professor Mehdi For Inspiring Us to be creative while learning. and FYI. 4-5 seconds of audio will take ~5 minutes per inference, please be patient.
|
324 |
+
""")
|
325 |
+
with gr.Row(elem_id="column-names"):
|
326 |
+
gr.Markdown("## 1. Load Image or Type")
|
327 |
+
gr.Markdown("## 2. Load Voice or Type")
|
328 |
+
gr.Markdown("## 3. Result")
|
329 |
+
with gr.Group(elem_id="main-group"):
|
330 |
+
with gr.Row():
|
331 |
+
with gr.Column():
|
332 |
+
|
333 |
+
portrait = gr.Image(
|
334 |
+
sources = ["upload"],
|
335 |
+
type = "filepath",
|
336 |
+
format = "png",
|
337 |
+
elem_id = "image-block"
|
338 |
+
)
|
339 |
+
|
340 |
+
prompt_image = gr.Textbox(
|
341 |
+
label = "Generate image",
|
342 |
+
lines = 2,
|
343 |
+
max_lines = 2
|
344 |
+
)
|
345 |
+
|
346 |
+
gen_image_btn = gr.Button("Generate portrait (optional)")
|
347 |
+
|
348 |
+
with gr.Column(elem_id="audio-column"):
|
349 |
+
|
350 |
+
voice = gr.Audio(
|
351 |
+
type = "filepath",
|
352 |
+
elem_id = "audio-block"
|
353 |
+
)
|
354 |
+
|
355 |
+
preprocess_audio_file = gr.File(visible=False)
|
356 |
+
|
357 |
+
|
358 |
+
with gr.Tab("Parler TTS", elem_id="parler-tab"):
|
359 |
+
|
360 |
+
prompt_audio = gr.Textbox(
|
361 |
+
label = "Text to synthetize",
|
362 |
+
lines = 3,
|
363 |
+
max_lines = 3,
|
364 |
+
elem_id = "text-synth"
|
365 |
+
)
|
366 |
+
|
367 |
+
voice_description = gr.Textbox(
|
368 |
+
label = "Voice description",
|
369 |
+
lines = 3,
|
370 |
+
max_lines = 3,
|
371 |
+
elem_id = "voice-desc"
|
372 |
+
)
|
373 |
+
|
374 |
+
gen_voice_btn = gr.Button("Generate voice (optional)")
|
375 |
+
|
376 |
+
with gr.Tab("WhisperSpeech", elem_id="whisperspeech-tab"):
|
377 |
+
prompt_audio_whisperspeech = gr.Textbox(
|
378 |
+
label = "Text to synthetize",
|
379 |
+
lines = 2,
|
380 |
+
max_lines = 2,
|
381 |
+
elem_id = "text-synth-wsp"
|
382 |
+
)
|
383 |
+
audio_to_clone = gr.Audio(
|
384 |
+
label = "Voice to clone",
|
385 |
+
type = "filepath",
|
386 |
+
elem_id = "audio-clone-elm"
|
387 |
+
)
|
388 |
+
gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
|
389 |
+
|
390 |
+
with gr.Column(elem_id="result-column"):
|
391 |
+
|
392 |
+
result = gr.Video(
|
393 |
+
elem_id="video-block"
|
394 |
+
)
|
395 |
+
|
396 |
+
submit_btn = gr.Button("Go talking Portrait !", elem_id="main-submit")
|
397 |
+
|
398 |
+
with gr.Row(elem_id="pro-tips"):
|
399 |
+
gr.Markdown("""
|
400 |
+
# Project done in Summer 2024 at University of Dayton, Dayton, OH
|
401 |
+
|
402 |
+
|
403 |
+
""")
|
404 |
+
|
405 |
+
gr.Markdown("""
|
406 |
+
# Application is made on Gradio and Follow up with files for reference
|
407 |
+
|
408 |
+
""")
|
409 |
+
|
410 |
+
portrait.upload(
|
411 |
+
fn = convert_user_uploded_webp,
|
412 |
+
inputs = [portrait],
|
413 |
+
outputs = [portrait],
|
414 |
+
queue = False,
|
415 |
+
show_api = False
|
416 |
+
)
|
417 |
+
|
418 |
+
voice.upload(
|
419 |
+
fn = check_mp3,
|
420 |
+
inputs = [voice],
|
421 |
+
outputs = [voice, preprocess_audio_file],
|
422 |
+
queue = False,
|
423 |
+
show_api = False
|
424 |
+
)
|
425 |
+
|
426 |
+
voice.clear(
|
427 |
+
fn = clear_audio_elms,
|
428 |
+
inputs = None,
|
429 |
+
outputs = [preprocess_audio_file],
|
430 |
+
queue = False,
|
431 |
+
show_api = False
|
432 |
+
)
|
433 |
+
|
434 |
+
gen_image_btn.click(
|
435 |
+
fn = generate_portrait,
|
436 |
+
inputs = [prompt_image],
|
437 |
+
outputs = [portrait],
|
438 |
+
queue = False,
|
439 |
+
show_api = False
|
440 |
+
)
|
441 |
+
|
442 |
+
gen_voice_btn.click(
|
443 |
+
fn = generate_voice_with_parler,
|
444 |
+
inputs = [prompt_audio, voice_description],
|
445 |
+
outputs = [voice, preprocess_audio_file],
|
446 |
+
queue = False,
|
447 |
+
show_api = False
|
448 |
+
)
|
449 |
+
|
450 |
+
gen_wsp_voice_btn.click(
|
451 |
+
fn = get_whisperspeech,
|
452 |
+
inputs = [prompt_audio_whisperspeech, audio_to_clone],
|
453 |
+
outputs = [voice, preprocess_audio_file],
|
454 |
+
queue = False,
|
455 |
+
show_api = False
|
456 |
+
)
|
457 |
+
|
458 |
+
submit_btn.click(
|
459 |
+
fn = generate_talking_portrait,
|
460 |
+
inputs = [portrait, voice],
|
461 |
+
outputs = [result],
|
462 |
+
show_api = False
|
463 |
+
)
|
464 |
+
|
465 |
+
|
466 |
+
demo.queue(max_size=100).launch(show_error=True, show_api=False, share =True)
|