audiocraft x Nrepeat
Browse files- README.md +4 -4
- api.py +33 -32
- uc_spk_Landscape2Soundscape_Masterpieces_pics/02_Constable_AI555_001.txt +5 -4
README.md
CHANGED
@@ -20,10 +20,10 @@ tags:
|
|
20 |
|
21 |
|
22 |
# Affective TTS - SoundScape
|
23 |
-
-
|
24 |
-
- Soundscapes
|
25 |
-
- `landscape2soundscape.py` shows how to overlay TTS & Soundscape to Images
|
26 |
-
- `134`
|
27 |
|
28 |
## Available Voices
|
29 |
|
|
|
20 |
|
21 |
|
22 |
# Affective TTS - SoundScape
|
23 |
+
- [SHIFT TTS tool](https://github.com/audeering/shift)
|
24 |
+
- Soundscapes `e.g. trees, water, leaves,`, generation via [AudioGen](https://huggingface.co/dkounadis/artificial-styletts2/discussions/3)
|
25 |
+
- `landscape2soundscape.py` shows how to overlay TTS & Soundscape to Images and create videos
|
26 |
+
- `134` TTS Voices - Affective / Non-affective mode for every voice
|
27 |
|
28 |
## Available Voices
|
29 |
|
api.py
CHANGED
@@ -17,13 +17,14 @@ from flask import Flask, request, send_from_directory
|
|
17 |
from flask_cors import CORS
|
18 |
from moviepy.editor import *
|
19 |
from audiocraft.audiogen import AudioGen, audio_write
|
20 |
-
|
21 |
-
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
22 |
-
sound_generator.set_generation_params(duration=6)
|
23 |
-
|
24 |
CACHE_DIR = 'flask_cache/'
|
|
|
|
|
|
|
|
|
25 |
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
26 |
|
|
|
27 |
# SSH AGENT
|
28 |
# eval $(ssh-agent -s)
|
29 |
# ssh-add ~/.ssh/id_ed25519_github2024
|
@@ -35,25 +36,35 @@ def _shift(x):
|
|
35 |
n = x.shape[0]
|
36 |
i = np.random.randint(.24 * n, max(1, .74 * n)) # high should be above >= 0
|
37 |
x = np.roll(x, i)
|
38 |
-
#
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
42 |
|
43 |
-
def overlay(x,
|
44 |
-
if
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
print(f'\n====SOUND BACKGROUND SHAPE\n{sound_background.shape=}{np.abs(sound_background.max())=}\n{x.shape=}\n- - - -')
|
56 |
x = .9 * x + .1 * sound_background[:len_speech]
|
|
|
|
|
57 |
return x
|
58 |
|
59 |
def tts_multi_sentence(precomputed_style_vector=None,
|
@@ -68,17 +79,7 @@ def tts_multi_sentence(precomputed_style_vector=None,
|
|
68 |
voice : string or None (falls to styleTTS)
|
69 |
scene : 'A castle in far away lands' -> if passed will generate background sound scene
|
70 |
'''
|
71 |
-
|
72 |
-
if scene is not None:
|
73 |
-
|
74 |
-
sound_background = sound_generator.generate([scene])[0]
|
75 |
-
sound_background = audio_write(None,
|
76 |
-
sound_background.cpu(),
|
77 |
-
24000, # sound_generator.sample_rate,
|
78 |
-
strategy="loudness",
|
79 |
-
loudness_compressor=True)
|
80 |
-
else:
|
81 |
-
sound_background = None
|
82 |
|
83 |
# StyleTTS2
|
84 |
if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
|
@@ -93,7 +94,7 @@ def tts_multi_sentence(precomputed_style_vector=None,
|
|
93 |
embedding_scale=1))
|
94 |
x = np.concatenate(x)
|
95 |
|
96 |
-
return overlay(x,
|
97 |
|
98 |
# Fallback - Mimic-3
|
99 |
text_utils.store_ssml(text=text, voice=voice) # Text has to be list of single sentences
|
|
|
17 |
from flask_cors import CORS
|
18 |
from moviepy.editor import *
|
19 |
from audiocraft.audiogen import AudioGen, audio_write
|
|
|
|
|
|
|
|
|
20 |
CACHE_DIR = 'flask_cache/'
|
21 |
+
SOUNDSCAPE_DURATION = 6
|
22 |
+
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
23 |
+
sound_generator.set_generation_params(duration=SOUNDSCAPE_DURATION)
|
24 |
+
print(f'{sound_generator.sample_rate=}')
|
25 |
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
26 |
|
27 |
+
|
28 |
# SSH AGENT
|
29 |
# eval $(ssh-agent -s)
|
30 |
# ssh-add ~/.ssh/id_ed25519_github2024
|
|
|
36 |
n = x.shape[0]
|
37 |
i = np.random.randint(.24 * n, max(1, .74 * n)) # high should be above >= 0
|
38 |
x = np.roll(x, i)
|
39 |
+
# we can add the one or fade it and then amplify
|
40 |
+
# the audio is so short 6s that is difficult to not hear the shift somewhere
|
41 |
+
# Just concatenate - raw - and then shift - the longconcat audio - many times may fix it
|
42 |
+
# fade_in = 1 - .5 * np.tanh(-4*(np.linspace(-10, 10, n) - 9.4)) + .5 * np.tanh(4*(np.linspace(-10, 10, n) + 9.4))
|
43 |
+
return x #* fade_in # silence this
|
44 |
|
45 |
+
def overlay(x, scene=None):
|
46 |
+
if scene is not None:
|
47 |
+
|
48 |
+
n_repeat = len(x) // ((SOUNDSCAPE_DURATION + 1) * 24000)
|
49 |
+
sound_background = []
|
50 |
+
for _ in range(n_repeat):
|
51 |
+
print(f'AudioCraft: {_} of {n_repeat}', end='\r')
|
52 |
+
x = sound_generator.generate(
|
53 |
+
[scene]
|
54 |
+
)[0].detach().cpu().numpy()[0, :]
|
55 |
+
x = audresample.resample(x,
|
56 |
+
original_rate=sound_generator.sample_rate, # 16000
|
57 |
+
target_rate=24000)[0, :]
|
58 |
+
sound_background.append(x)
|
59 |
+
sound_background = np.concatenate(sound_background)
|
60 |
+
|
61 |
+
for d in range(10):
|
62 |
+
# roll long audio to concentrate the peaks
|
63 |
+
sound_background = _shift(sound_background)
|
64 |
print(f'\n====SOUND BACKGROUND SHAPE\n{sound_background.shape=}{np.abs(sound_background.max())=}\n{x.shape=}\n- - - -')
|
65 |
x = .9 * x + .1 * sound_background[:len_speech]
|
66 |
+
else:
|
67 |
+
print('sound_background = None')
|
68 |
return x
|
69 |
|
70 |
def tts_multi_sentence(precomputed_style_vector=None,
|
|
|
79 |
voice : string or None (falls to styleTTS)
|
80 |
scene : 'A castle in far away lands' -> if passed will generate background sound scene
|
81 |
'''
|
82 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# StyleTTS2
|
85 |
if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
|
|
|
94 |
embedding_scale=1))
|
95 |
x = np.concatenate(x)
|
96 |
|
97 |
+
return overlay(x, scene=scene)
|
98 |
|
99 |
# Fallback - Mimic-3
|
100 |
text_utils.store_ssml(text=text, voice=voice) # Text has to be list of single sentences
|
uc_spk_Landscape2Soundscape_Masterpieces_pics/02_Constable_AI555_001.txt
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
-
From the hand of John Constable, the world of modern landscape painting received its deepest inspiration. "He is the true founder of the
|
2 |
|
3 |
-
In 1802, still a student of the Academy, Constable confided in his friend John Dunthorne, declaring that he would return to his birthplace in Bergholt to develop an honest
|
4 |
|
5 |
-
Among his early masterpieces
|
|
|
6 |
|
7 |
-
Tschudi once observed,
|
8 |
|
9 |
In this timeless scene, Constable’s brush paints not just a landscape, but the spirit of England itself—a place of gentle beauty, serenity, and the quiet drama of nature unfolding.
|
|
|
1 |
+
From the hand of John Constable, the world of modern landscape painting received its deepest inspiration. "He is the true founder of the paysan intime wrote Hugo von Tschudi in 1896 as he reflected on the acquisition of several of Constables works. It is on his achievements that the Barbyzon school essentially rests. Constable’s journey began not in the hallowed halls of a master’s workshop, but as a self-taught artist, only finding his way to the Royal Academy at the age of 23, in the year 1799.
|
2 |
|
3 |
+
In 1802, still a student of the Academy, Constable confided in his friend John Dunthorne, declaring that he would return to his birthplace in Bergholt to develop an honest original style, striving to become a true painter of nature.
|
4 |
|
5 |
+
Among his early masterpieces is the view of Higham Village by the River Stour painted at 1804.
|
6 |
+
The village high nestled in the county of Suffolk, near Constable’s own birthplace of East Bergholt, the scene offers a gaze over the rooftops of the village and across the Dedham Vale, stretching toward Stratford St. Mary and Gun Hill. Remarkably, this view remains almost unchanged even today.
|
7 |
|
8 |
+
Tschudi once observed, The portrayal of the English landscape is extraordinarily true where subtle echoes of 17th-century Dutch masters in Constable's finely tuned but uniformly brownish tones. Yet, Constable’s vision was grounded in a precise observation of nature. He captured the valley with its rising mist in soft, muted hues. Cows graze in the foreground, while birds soar above the lush trees to the right. Through the cloudy sky, rays of sunlight break free, casting dappled light upon the land.
|
9 |
|
10 |
In this timeless scene, Constable’s brush paints not just a landscape, but the spirit of England itself—a place of gentle beauty, serenity, and the quiet drama of nature unfolding.
|