fx PATH
Browse files- api.py +15 -14
- landscape2soundscape.py +2 -2
api.py
CHANGED
@@ -21,7 +21,8 @@ from audiocraft.audiogen import AudioGen, audio_write
|
|
21 |
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
22 |
sound_generator.set_generation_params(duration=6)
|
23 |
|
24 |
-
|
|
|
25 |
|
26 |
# SSH AGENT
|
27 |
# eval $(ssh-agent -s)
|
@@ -127,15 +128,15 @@ def serve_wav():
|
|
127 |
|
128 |
# Physically Save Client Files
|
129 |
for filename, obj in request.files.items():
|
130 |
-
obj.save(f'
|
131 |
|
132 |
print('Saved all files on Server Side\n\n')
|
133 |
|
134 |
-
args = SimpleNamespace(text=None if r.get('text') is None else
|
135 |
-
video=None if r.get('video') is None else
|
136 |
-
image=None if r.get('image') is None else
|
137 |
voice=r.get('voice')[0],
|
138 |
-
native=None if r.get('native') is None else
|
139 |
affective = r.get('affective')[0],
|
140 |
scene=r.get('scene')[0]
|
141 |
)
|
@@ -291,7 +292,7 @@ def serve_wav():
|
|
291 |
# ==== TTS .srt ====
|
292 |
|
293 |
if do_video_dub:
|
294 |
-
OUT_FILE = '
|
295 |
subtitles = text
|
296 |
MAX_LEN = int(subtitles[-1][2] + 17) * 24000
|
297 |
# 17 extra seconds fail-safe for long-last-segment
|
@@ -321,7 +322,7 @@ def serve_wav():
|
|
321 |
(.64 * total + .27 * x_native)[:, None],
|
322 |
24000)
|
323 |
else: # Video from plain (.txt)
|
324 |
-
OUT_FILE = '
|
325 |
x = tts_multi_sentence(text=text,
|
326 |
precomputed_style_vector=precomputed_style_vector,
|
327 |
voice=args.voice,
|
@@ -333,7 +334,7 @@ def serve_wav():
|
|
333 |
if args.image is not None:
|
334 |
|
335 |
STATIC_FRAME = args.image # 'assets/image_from_T31.jpg'
|
336 |
-
OUT_FILE = '
|
337 |
|
338 |
# SILENT CLIP
|
339 |
|
@@ -346,7 +347,7 @@ def serve_wav():
|
|
346 |
scene=args.scene
|
347 |
)
|
348 |
soundfile.write(AUDIO_TRACK, x, 24000)
|
349 |
-
|
350 |
# write final output video
|
351 |
subprocess.call(
|
352 |
["ffmpeg",
|
@@ -361,7 +362,7 @@ def serve_wav():
|
|
361 |
"0:v:0",
|
362 |
"-map",
|
363 |
" 1:a:0",
|
364 |
-
OUT_FILE])
|
365 |
|
366 |
print(f'\noutput video is saved as {OUT_FILE}')
|
367 |
|
@@ -372,8 +373,8 @@ def serve_wav():
|
|
372 |
precomputed_style_vector=precomputed_style_vector,
|
373 |
voice=args.voice,
|
374 |
scene=args.scene)
|
375 |
-
OUT_FILE = '
|
376 |
-
soundfile.write(OUT_FILE, x, 24000)
|
377 |
|
378 |
|
379 |
|
@@ -393,7 +394,7 @@ def serve_wav():
|
|
393 |
|
394 |
# send server's output as default file -> srv_result.xx
|
395 |
print(f'\n=SERVER saved as {OUT_FILE=}\n')
|
396 |
-
response = send_from_directory(
|
397 |
response.headers['suffix-file-type'] = OUT_FILE
|
398 |
return response
|
399 |
|
|
|
21 |
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
22 |
sound_generator.set_generation_params(duration=6)
|
23 |
|
24 |
+
CACHE_DIR = 'flask_cache/'
|
25 |
+
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
26 |
|
27 |
# SSH AGENT
|
28 |
# eval $(ssh-agent -s)
|
|
|
128 |
|
129 |
# Physically Save Client Files
|
130 |
for filename, obj in request.files.items():
|
131 |
+
obj.save(f'{CACHE_DIR}{filename.replace("/","")}')
|
132 |
|
133 |
print('Saved all files on Server Side\n\n')
|
134 |
|
135 |
+
args = SimpleNamespace(text=None if r.get('text') is None else CACHE_DIR + r.get('text')[0].replace("/",""),
|
136 |
+
video=None if r.get('video') is None else CACHE_DIR + r.get('video')[0].replace("/",""),
|
137 |
+
image=None if r.get('image') is None else CACHE_DIR + r.get('image')[0].replace("/",""),
|
138 |
voice=r.get('voice')[0],
|
139 |
+
native=None if r.get('native') is None else CACHE_DIR + r.get('native')[0].replace("/",""),
|
140 |
affective = r.get('affective')[0],
|
141 |
scene=r.get('scene')[0]
|
142 |
)
|
|
|
292 |
# ==== TTS .srt ====
|
293 |
|
294 |
if do_video_dub:
|
295 |
+
OUT_FILE = 'tmp.mp4' #args.out_file + '_video_dub.mp4'
|
296 |
subtitles = text
|
297 |
MAX_LEN = int(subtitles[-1][2] + 17) * 24000
|
298 |
# 17 extra seconds fail-safe for long-last-segment
|
|
|
322 |
(.64 * total + .27 * x_native)[:, None],
|
323 |
24000)
|
324 |
else: # Video from plain (.txt)
|
325 |
+
OUT_FILE = 'tmp.mp4'
|
326 |
x = tts_multi_sentence(text=text,
|
327 |
precomputed_style_vector=precomputed_style_vector,
|
328 |
voice=args.voice,
|
|
|
334 |
if args.image is not None:
|
335 |
|
336 |
STATIC_FRAME = args.image # 'assets/image_from_T31.jpg'
|
337 |
+
OUT_FILE = 'tmp.mp4' #args.out_file + '_image_to_speech.mp4'
|
338 |
|
339 |
# SILENT CLIP
|
340 |
|
|
|
347 |
scene=args.scene
|
348 |
)
|
349 |
soundfile.write(AUDIO_TRACK, x, 24000)
|
350 |
+
if args.video or args.image:
|
351 |
# write final output video
|
352 |
subprocess.call(
|
353 |
["ffmpeg",
|
|
|
362 |
"0:v:0",
|
363 |
"-map",
|
364 |
" 1:a:0",
|
365 |
+
CACHE_DIR + OUT_FILE])
|
366 |
|
367 |
print(f'\noutput video is saved as {OUT_FILE}')
|
368 |
|
|
|
373 |
precomputed_style_vector=precomputed_style_vector,
|
374 |
voice=args.voice,
|
375 |
scene=args.scene)
|
376 |
+
OUT_FILE = 'tmp.wav'
|
377 |
+
soundfile.write(CACHE_DIR + OUT_FILE, x, 24000)
|
378 |
|
379 |
|
380 |
|
|
|
394 |
|
395 |
# send server's output as default file -> srv_result.xx
|
396 |
print(f'\n=SERVER saved as {OUT_FILE=}\n')
|
397 |
+
response = send_from_directory(CACHE_DIR, path=OUT_FILE)
|
398 |
response.headers['suffix-file-type'] = OUT_FILE
|
399 |
return response
|
400 |
|
landscape2soundscape.py
CHANGED
@@ -56,7 +56,7 @@ DESCRIPTIONS = [
|
|
56 |
'01_Schick_AII840_001.jpg', # image
|
57 |
'01_Schick_AII840_001.txt', # text
|
58 |
'Statue in shire hill on autumn beach.', # audiocraft
|
59 |
-
'Gottlieb
|
60 |
'en_US/m-ailabs_low#mary_ann',
|
61 |
],
|
62 |
# 2
|
@@ -156,7 +156,7 @@ SILENT_VIDEO = '_silent_video.mp4'
|
|
156 |
# SILENT CLIP
|
157 |
|
158 |
|
159 |
-
for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:
|
160 |
|
161 |
# cv2put txt
|
162 |
im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape
|
|
|
56 |
'01_Schick_AII840_001.jpg', # image
|
57 |
'01_Schick_AII840_001.txt', # text
|
58 |
'Statue in shire hill on autumn beach.', # audiocraft
|
59 |
+
'Gottlieb Schick - Bildnis der Heinrike Dannecker - 1802', # cv2 puttext title
|
60 |
'en_US/m-ailabs_low#mary_ann',
|
61 |
],
|
62 |
# 2
|
|
|
156 |
# SILENT CLIP
|
157 |
|
158 |
|
159 |
+
for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:
|
160 |
|
161 |
# cv2put txt
|
162 |
im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape
|