Dionyssos commited on
Commit
0572d9a
·
1 Parent(s): a8efc9a
Files changed (2) hide show
  1. api.py +15 -14
  2. landscape2soundscape.py +2 -2
api.py CHANGED
@@ -21,7 +21,8 @@ from audiocraft.audiogen import AudioGen, audio_write
21
  sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
22
  sound_generator.set_generation_params(duration=6)
23
 
24
- Path('./flask_cache').mkdir(parents=True, exist_ok=True)
 
25
 
26
  # SSH AGENT
27
  # eval $(ssh-agent -s)
@@ -127,15 +128,15 @@ def serve_wav():
127
 
128
  # Physically Save Client Files
129
  for filename, obj in request.files.items():
130
- obj.save(f'flask_cache/{filename.replace("/","")}')
131
 
132
  print('Saved all files on Server Side\n\n')
133
 
134
- args = SimpleNamespace(text=None if r.get('text') is None else 'flask_cache/' + r.get('text')[0].replace("/",""),
135
- video=None if r.get('video') is None else 'flask_cache/' + r.get('video')[0].replace("/",""),
136
- image=None if r.get('image') is None else 'flask_cache/' + r.get('image')[0].replace("/",""),
137
  voice=r.get('voice')[0],
138
- native=None if r.get('native') is None else 'flask_cache/' + r.get('native')[0].replace("/",""),
139
  affective = r.get('affective')[0],
140
  scene=r.get('scene')[0]
141
  )
@@ -291,7 +292,7 @@ def serve_wav():
291
  # ==== TTS .srt ====
292
 
293
  if do_video_dub:
294
- OUT_FILE = './flask_cache/tmp.mp4' #args.out_file + '_video_dub.mp4'
295
  subtitles = text
296
  MAX_LEN = int(subtitles[-1][2] + 17) * 24000
297
  # 17 extra seconds fail-safe for long-last-segment
@@ -321,7 +322,7 @@ def serve_wav():
321
  (.64 * total + .27 * x_native)[:, None],
322
  24000)
323
  else: # Video from plain (.txt)
324
- OUT_FILE = './flask_cache/tmp.mp4' #args.out_file + '_video_from_txt.mp4'
325
  x = tts_multi_sentence(text=text,
326
  precomputed_style_vector=precomputed_style_vector,
327
  voice=args.voice,
@@ -333,7 +334,7 @@ def serve_wav():
333
  if args.image is not None:
334
 
335
  STATIC_FRAME = args.image # 'assets/image_from_T31.jpg'
336
- OUT_FILE = './flask_cache/tmp.mp4' #args.out_file + '_image_to_speech.mp4'
337
 
338
  # SILENT CLIP
339
 
@@ -346,7 +347,7 @@ def serve_wav():
346
  scene=args.scene
347
  )
348
  soundfile.write(AUDIO_TRACK, x, 24000)
349
- elif args.video or args.image:
350
  # write final output video
351
  subprocess.call(
352
  ["ffmpeg",
@@ -361,7 +362,7 @@ def serve_wav():
361
  "0:v:0",
362
  "-map",
363
  " 1:a:0",
364
- OUT_FILE])
365
 
366
  print(f'\noutput video is saved as {OUT_FILE}')
367
 
@@ -372,8 +373,8 @@ def serve_wav():
372
  precomputed_style_vector=precomputed_style_vector,
373
  voice=args.voice,
374
  scene=args.scene)
375
- OUT_FILE = './flask_cache/tmp.wav' #args.out_file + '.wav'
376
- soundfile.write(OUT_FILE, x, 24000)
377
 
378
 
379
 
@@ -393,7 +394,7 @@ def serve_wav():
393
 
394
  # send server's output as default file -> srv_result.xx
395
  print(f'\n=SERVER saved as {OUT_FILE=}\n')
396
- response = send_from_directory('flask_cache/', path=OUT_FILE)
397
  response.headers['suffix-file-type'] = OUT_FILE
398
  return response
399
 
 
21
  sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
22
  sound_generator.set_generation_params(duration=6)
23
 
24
+ CACHE_DIR = 'flask_cache/'
25
+ Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
26
 
27
  # SSH AGENT
28
  # eval $(ssh-agent -s)
 
128
 
129
  # Physically Save Client Files
130
  for filename, obj in request.files.items():
131
+ obj.save(f'{CACHE_DIR}{filename.replace("/","")}')
132
 
133
  print('Saved all files on Server Side\n\n')
134
 
135
+ args = SimpleNamespace(text=None if r.get('text') is None else CACHE_DIR + r.get('text')[0].replace("/",""),
136
+ video=None if r.get('video') is None else CACHE_DIR + r.get('video')[0].replace("/",""),
137
+ image=None if r.get('image') is None else CACHE_DIR + r.get('image')[0].replace("/",""),
138
  voice=r.get('voice')[0],
139
+ native=None if r.get('native') is None else CACHE_DIR + r.get('native')[0].replace("/",""),
140
  affective = r.get('affective')[0],
141
  scene=r.get('scene')[0]
142
  )
 
292
  # ==== TTS .srt ====
293
 
294
  if do_video_dub:
295
+ OUT_FILE = 'tmp.mp4' #args.out_file + '_video_dub.mp4'
296
  subtitles = text
297
  MAX_LEN = int(subtitles[-1][2] + 17) * 24000
298
  # 17 extra seconds fail-safe for long-last-segment
 
322
  (.64 * total + .27 * x_native)[:, None],
323
  24000)
324
  else: # Video from plain (.txt)
325
+ OUT_FILE = 'tmp.mp4'
326
  x = tts_multi_sentence(text=text,
327
  precomputed_style_vector=precomputed_style_vector,
328
  voice=args.voice,
 
334
  if args.image is not None:
335
 
336
  STATIC_FRAME = args.image # 'assets/image_from_T31.jpg'
337
+ OUT_FILE = 'tmp.mp4' #args.out_file + '_image_to_speech.mp4'
338
 
339
  # SILENT CLIP
340
 
 
347
  scene=args.scene
348
  )
349
  soundfile.write(AUDIO_TRACK, x, 24000)
350
+ if args.video or args.image:
351
  # write final output video
352
  subprocess.call(
353
  ["ffmpeg",
 
362
  "0:v:0",
363
  "-map",
364
  " 1:a:0",
365
+ CACHE_DIR + OUT_FILE])
366
 
367
  print(f'\noutput video is saved as {OUT_FILE}')
368
 
 
373
  precomputed_style_vector=precomputed_style_vector,
374
  voice=args.voice,
375
  scene=args.scene)
376
+ OUT_FILE = 'tmp.wav'
377
+ soundfile.write(CACHE_DIR + OUT_FILE, x, 24000)
378
 
379
 
380
 
 
394
 
395
  # send server's output as default file -> srv_result.xx
396
  print(f'\n=SERVER saved as {OUT_FILE=}\n')
397
+ response = send_from_directory(CACHE_DIR, path=OUT_FILE)
398
  response.headers['suffix-file-type'] = OUT_FILE
399
  return response
400
 
landscape2soundscape.py CHANGED
@@ -56,7 +56,7 @@ DESCRIPTIONS = [
56
  '01_Schick_AII840_001.jpg', # image
57
  '01_Schick_AII840_001.txt', # text
58
  'Statue in shire hill on autumn beach.', # audiocraft
59
- 'Gottlieb Chick - Bildnis der Heinrike Dannecker - 1802', # cv2 puttext title
60
  'en_US/m-ailabs_low#mary_ann',
61
  ],
62
  # 2
@@ -156,7 +156,7 @@ SILENT_VIDEO = '_silent_video.mp4'
156
  # SILENT CLIP
157
 
158
 
159
- for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:1]:
160
 
161
  # cv2put txt
162
  im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape
 
56
  '01_Schick_AII840_001.jpg', # image
57
  '01_Schick_AII840_001.txt', # text
58
  'Statue in shire hill on autumn beach.', # audiocraft
59
+ 'Gottlieb Schick - Bildnis der Heinrike Dannecker - 1802', # cv2 puttext title
60
  'en_US/m-ailabs_low#mary_ann',
61
  ],
62
  # 2
 
156
  # SILENT CLIP
157
 
158
 
159
+ for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:
160
 
161
  # cv2put txt
162
  im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape