fx
Browse files- README.md +15 -6
- api.py +1 -0
- landscape2soundscape.py +5 -90
- logo_raw_smb_aud.png +0 -0
README.md
CHANGED
@@ -18,12 +18,14 @@ tags:
|
|
18 |
---
|
19 |
|
20 |
|
21 |
-
# Affective TTS &
|
22 |
|
23 |
-
|
24 |
-
-
|
|
|
|
|
|
|
25 |
- `134` build-in affective voices available, tuned for [StyleTTS2](https://github.com/yl4579/StyleTTS2).
|
26 |
-
- [GitHub](https://github.com/audeering/shift)
|
27 |
|
28 |
### Available Voices
|
29 |
|
@@ -40,7 +42,7 @@ cd shift/
|
|
40 |
pip install -r requirements.txt
|
41 |
```
|
42 |
|
43 |
-
|
44 |
|
45 |
```
|
46 |
CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=./hf_home CUDA_VISIBLE_DEVICES=2 python api.py
|
@@ -48,7 +50,7 @@ CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=./hf_home CUDA_VISIBLE_DEVICES=2 python api
|
|
48 |
|
49 |
## Inference
|
50 |
|
51 |
-
The following need `api.py` to be running
|
52 |
|
53 |
**Text 2 Speech**
|
54 |
|
@@ -77,6 +79,13 @@ python tts.py --text assets/head_of_fortuna_en.srt --video assets/head_of_fortun
|
|
77 |
python tts.py --text assets/head_of_fortuna_GPT.txt --video assets/head_of_fortuna.mp4
|
78 |
```
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
## Examples
|
81 |
|
82 |
Substitute Native voice via TTS
|
|
|
18 |
---
|
19 |
|
20 |
|
21 |
+
# Affective TTS & Soundscapes
|
22 |
|
23 |
+
Synthesize affective TTS using [SHIFT TTS tool](https://github.com/audeering/shift), as well as audio soundscape.
|
24 |
+
- Affective TTS is based on this [phenomenon](https://huggingface.co/dkounadis/artificial-styletts2/discussions/2)
|
25 |
+
- Soundscapes, e.g. trees, water, leaves, are text-described generations from [AudioGen](https://huggingface.co/dkounadis/artificial-styletts2/discussions/3)
|
26 |
+
- `landscape2soundscape.py` shows how to example
|
27 |
+
- plain text or subtitles (.srt) & overlays it to videos.
|
28 |
- `134` build-in affective voices available, tuned for [StyleTTS2](https://github.com/yl4579/StyleTTS2).
|
|
|
29 |
|
30 |
### Available Voices
|
31 |
|
|
|
42 |
pip install -r requirements.txt
|
43 |
```
|
44 |
|
45 |
+
Flask
|
46 |
|
47 |
```
|
48 |
CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=./hf_home CUDA_VISIBLE_DEVICES=2 python api.py
|
|
|
50 |
|
51 |
## Inference
|
52 |
|
53 |
+
The following need `api.py` to be running on a `tmux session`.
|
54 |
|
55 |
**Text 2 Speech**
|
56 |
|
|
|
79 |
python tts.py --text assets/head_of_fortuna_GPT.txt --video assets/head_of_fortuna.mp4
|
80 |
```
|
81 |
|
82 |
+
**Landscape 2 Soundscape**
|
83 |
+
|
84 |
+
```python
|
85 |
+
# TTS & soundscape - overlay to .mp4
|
86 |
+
python landscape2soundscape.py
|
87 |
+
```
|
88 |
+
|
89 |
## Examples
|
90 |
|
91 |
Substitute Native voice via TTS
|
api.py
CHANGED
@@ -396,6 +396,7 @@ def serve_wav():
|
|
396 |
print(f'\n=SERVER saved as {OUT_FILE=}\n')
|
397 |
response = send_from_directory(CACHE_DIR, path=OUT_FILE)
|
398 |
response.headers['suffix-file-type'] = OUT_FILE
|
|
|
399 |
return response
|
400 |
|
401 |
|
|
|
396 |
print(f'\n=SERVER saved as {OUT_FILE=}\n')
|
397 |
response = send_from_directory(CACHE_DIR, path=OUT_FILE)
|
398 |
response.headers['suffix-file-type'] = OUT_FILE
|
399 |
+
print('_________________________________________________________\n ? \n_______________')
|
400 |
return response
|
401 |
|
402 |
|
landscape2soundscape.py
CHANGED
@@ -3,28 +3,18 @@ import subprocess
|
|
3 |
import cv2
|
4 |
|
5 |
# with subprocess and an extra argument 'scene' and a 'resized image saved as png' we can call the server
|
6 |
-
|
7 |
# yt-dlp is instaled in .d4
|
8 |
# Download Part of Video
|
9 |
# yt-dlp https://www.youtube.com/watch?v=UZ9uyQI3pF0 --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 997 -to 2512"
|
10 |
# ffmpeg -i Sandra\ Kotevska\,\ Painting\ Rose\ bush\,\ mixed\ media\,\ 2017.\ \[NMzC_036MtE\].mkv -f mp3 -ar 22050 -vn out44.wav -ac 1
|
11 |
-
|
12 |
# https://superuser.com/questions/583393/how-to-extract-subtitle-from-video-using-ffmpeg
|
13 |
-
|
14 |
-
def _shift(x):
|
15 |
-
n = x.shape[0]
|
16 |
-
i = np.random.randint(.24 * n, .74 * n)
|
17 |
-
return np.roll(x, i)
|
18 |
-
|
19 |
#___________________________________________________________________________________________________
|
20 |
# VIDEO FROM IMAGE with CAPTIONS
|
21 |
#
|
22 |
# UPLOAD to: Simaviro: Documents General WORK PACKAGES WP1 ContentRepository ANBPR_ROMANIA TTSvideos
|
23 |
# __________________________________________________________________________________________________
|
24 |
-
|
25 |
# TO DONLOAD SRT for youtub
|
26 |
# yt-dlp --write-sub --sub-lang en --convert-subs "srt" https://www.youtube.com/watch?v=F1Ib7TAu7eg&list=PL4x2B6LSwFewdDvRnUTpBM7jkmpwouhPv&index=2
|
27 |
-
|
28 |
# _voice = 'en_US/vctk_low#p330'
|
29 |
# _voice = 'en_US/cmu-arctic_low#lnh' #en_US/vctk_low#p249' # 'en_US/vctk_low#p282'
|
30 |
# _voice = ''en_US/vctk_low#p351''
|
@@ -93,7 +83,7 @@ DESCRIPTIONS = [
|
|
93 |
],
|
94 |
# 6
|
95 |
[
|
96 |
-
'06_Menzel_AI900_001.jpg'
|
97 |
'06_Menzel_AI900_001.txt',
|
98 |
'Olive trees in Seville',
|
99 |
'Adolph Menzel - Bauplatz mit Weiden - 1846',
|
@@ -181,92 +171,17 @@ for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:
|
|
181 |
offset_h = 24
|
182 |
im[offset_h:h+offset_h, :w, :] = (.4 * im[offset_h:h+offset_h, :w, :] + .6 * fram).astype(np.uint8)
|
183 |
# cv2.imshow('i', im); cv2.waitKey(); cv2.destroyAllWindows()
|
184 |
-
|
185 |
-
# logo aud
|
186 |
-
|
187 |
-
logo = cv2.imread('assets/audeering_logo.jpg')[:740, :, :]
|
188 |
-
logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
|
189 |
-
h, w, _ = logo.shape
|
190 |
-
offset_h = im.shape[0] - h
|
191 |
-
im[offset_h:h+offset_h, :w, :] = (.23 * im[offset_h:h+offset_h, :w, :] + .77 * logo).astype(np.uint8)
|
192 |
-
|
193 |
-
# logo SMB
|
194 |
-
|
195 |
-
logo = cv2.imread('assets/SMB_logo.png')#[:740, :, :]
|
196 |
-
logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
|
197 |
-
h, w, _ = logo.shape
|
198 |
-
offset_h = im.shape[0] - h
|
199 |
-
# fill logo SMB with the pixels of im - where SMB is empty
|
200 |
-
ptc = im[offset_h:h+offset_h, :w, :]
|
201 |
-
logo[logo == 0] = ptc[logo == 0] # fill empty
|
202 |
-
im[offset_h:h+offset_h, :w, :] = (.13 * im[offset_h:h+offset_h, :w, :] + .86 * logo).astype(np.uint8)
|
203 |
-
|
204 |
-
# # logo shift
|
205 |
-
|
206 |
-
# logo = cv2.imread('assets/shift_logo.png')#[:740, :, :]
|
207 |
-
# logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
|
208 |
-
# h, w, _ = logo.shape
|
209 |
-
# offset_h = im.shape[0] - h #-274
|
210 |
-
# offset_w = im.shape[1] - w #400
|
211 |
-
# # # fill logo SMB with the pixels of im - where SMB is empty
|
212 |
-
# ptc = im[offset_h:h+offset_h, :w, :]
|
213 |
-
# # msk = np.tile(logo[:, :,0:1] > 252, [1,1,3])
|
214 |
-
# # logo[msk] = ptc[msk] # fill empty
|
215 |
-
# im[offset_h:h+offset_h, offset_w:w+offset_w, :] = (.0 * im[offset_h:h+offset_h, offset_w:w+offset_w, :] + 1 * logo).astype(np.uint8)
|
216 |
-
|
217 |
-
# silent video - img
|
218 |
-
# im = cv2.resize(im, (700, 700))
|
219 |
-
cv2.imwrite('pic_logo_emb.png', im)
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
# raw, _ = soundfile.read(soundscape_file) # 12345, 2
|
225 |
-
|
226 |
-
# # fill
|
227 |
-
# soundscape = []
|
228 |
-
# for _replica in range(math.ceil(len(total) / raw.shape[0])+1):
|
229 |
-
# soundscape.append(raw) # _shift non defined for stereo
|
230 |
-
# soundscape = np.concatenate(soundscape, 0)
|
231 |
-
|
232 |
-
# total = .36 * np.concatenate([total[:, None],
|
233 |
-
# total[:, None]], 1) + .64 * soundscape[:len(total), :]
|
234 |
-
|
235 |
-
# outfile
|
236 |
-
|
237 |
OUT_FILE = _img_.split('/')[-1].replace('.','__') + '.mp4' # assets / -1
|
238 |
print(f'{OUT_FILE=}\n')
|
239 |
-
# call API passing img
|
240 |
-
|
241 |
subprocess.run(
|
242 |
[
|
243 |
"python",
|
244 |
"tts.py",
|
245 |
"--text", PIC_DIR + _text_,
|
246 |
-
'--image', '
|
247 |
# "--title", _title_,
|
248 |
# '--soundscape_text', soundscape_text,
|
249 |
'--voice', _voice_,
|
250 |
-
'--out_file', OUT_FILE,
|
251 |
-
])
|
252 |
-
|
253 |
-
# soundfile.write(AUDIO_TRACK, total, 22050)
|
254 |
-
# subprocess.call(
|
255 |
-
# ["ffmpeg",
|
256 |
-
# "-y",
|
257 |
-
# "-i",
|
258 |
-
# SILENT_VIDEO,
|
259 |
-
# "-i",
|
260 |
-
# AUDIO_TRACK,
|
261 |
-
# #"-c:v",
|
262 |
-
# #"copy",
|
263 |
-
# "-map",
|
264 |
-
# "0:v:0",
|
265 |
-
# "-map",
|
266 |
-
# " 1:a:0",
|
267 |
-
# "-vf",
|
268 |
-
# "pad",
|
269 |
-
# OUT_FILE])
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
3 |
import cv2
|
4 |
|
5 |
# with subprocess and an extra argument 'scene' and a 'resized image saved as png' we can call the server
|
|
|
6 |
# yt-dlp is instaled in .d4
|
7 |
# Download Part of Video
|
8 |
# yt-dlp https://www.youtube.com/watch?v=UZ9uyQI3pF0 --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 997 -to 2512"
|
9 |
# ffmpeg -i Sandra\ Kotevska\,\ Painting\ Rose\ bush\,\ mixed\ media\,\ 2017.\ \[NMzC_036MtE\].mkv -f mp3 -ar 22050 -vn out44.wav -ac 1
|
|
|
10 |
# https://superuser.com/questions/583393/how-to-extract-subtitle-from-video-using-ffmpeg
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
#___________________________________________________________________________________________________
|
12 |
# VIDEO FROM IMAGE with CAPTIONS
|
13 |
#
|
14 |
# UPLOAD to: Simaviro: Documents General WORK PACKAGES WP1 ContentRepository ANBPR_ROMANIA TTSvideos
|
15 |
# __________________________________________________________________________________________________
|
|
|
16 |
# TO DONLOAD SRT for youtub
|
17 |
# yt-dlp --write-sub --sub-lang en --convert-subs "srt" https://www.youtube.com/watch?v=F1Ib7TAu7eg&list=PL4x2B6LSwFewdDvRnUTpBM7jkmpwouhPv&index=2
|
|
|
18 |
# _voice = 'en_US/vctk_low#p330'
|
19 |
# _voice = 'en_US/cmu-arctic_low#lnh' #en_US/vctk_low#p249' # 'en_US/vctk_low#p282'
|
20 |
# _voice = ''en_US/vctk_low#p351''
|
|
|
83 |
],
|
84 |
# 6
|
85 |
[
|
86 |
+
'06_Menzel_AI900_001.jpg',
|
87 |
'06_Menzel_AI900_001.txt',
|
88 |
'Olive trees in Seville',
|
89 |
'Adolph Menzel - Bauplatz mit Weiden - 1846',
|
|
|
171 |
offset_h = 24
|
172 |
im[offset_h:h+offset_h, :w, :] = (.4 * im[offset_h:h+offset_h, :w, :] + .6 * fram).astype(np.uint8)
|
173 |
# cv2.imshow('i', im); cv2.waitKey(); cv2.destroyAllWindows()
|
174 |
+
cv2.imwrite('_tmp_banner.png', im)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
OUT_FILE = _img_.split('/')[-1].replace('.','__') + '.mp4' # assets / -1
|
176 |
print(f'{OUT_FILE=}\n')
|
|
|
|
|
177 |
subprocess.run(
|
178 |
[
|
179 |
"python",
|
180 |
"tts.py",
|
181 |
"--text", PIC_DIR + _text_,
|
182 |
+
'--image', '_tmp_banner.png',
|
183 |
# "--title", _title_,
|
184 |
# '--soundscape_text', soundscape_text,
|
185 |
'--voice', _voice_,
|
186 |
+
'--out_file', OUT_FILE, # save to correct location is handled in client
|
187 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logo_raw_smb_aud.png
ADDED