freq = 16000
Browse files- audiocraft/audiogen.py +1 -1
- live_api.py +16 -12
- live_demo.py +17 -17
- visualize_tts_plesantness.py +1 -1
audiocraft/audiogen.py
CHANGED
@@ -87,7 +87,7 @@ class AudioGen(BaseGenModel):
|
|
87 |
|
88 |
def set_generation_params(self, use_sampling: bool = True, top_k: int = 250,
|
89 |
top_p: float = 0.0, temperature: float = 1.0,
|
90 |
-
duration: float = 10.0, cfg_coef: float =
|
91 |
two_step_cfg: bool = False, extend_stride: float = 2):
|
92 |
"""Set the generation parameters for AudioGen.
|
93 |
|
|
|
87 |
|
88 |
def set_generation_params(self, use_sampling: bool = True, top_k: int = 250,
|
89 |
top_p: float = 0.0, temperature: float = 1.0,
|
90 |
+
duration: float = 10.0, cfg_coef: float = 2.4,
|
91 |
two_step_cfg: bool = False, extend_stride: float = 2):
|
92 |
"""Set the generation parameters for AudioGen.
|
93 |
|
live_api.py
CHANGED
@@ -14,7 +14,7 @@ from pathlib import Path
|
|
14 |
from types import SimpleNamespace
|
15 |
from flask import Flask, request, send_from_directory
|
16 |
from flask_cors import CORS
|
17 |
-
from audiocraft.audiogen import AudioGen
|
18 |
|
19 |
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
20 |
sound_generator.set_generation_params(duration=4)
|
@@ -46,16 +46,20 @@ Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
|
46 |
|
47 |
|
48 |
def tts_multi_sentence(scene=None):
|
49 |
-
if scene is not None:
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
sound_background = audio_write(None,
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
57 |
else:
|
58 |
-
|
|
|
59 |
|
60 |
# # StyleTTS2
|
61 |
# if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
|
@@ -72,7 +76,7 @@ def tts_multi_sentence(scene=None):
|
|
72 |
|
73 |
# return overlay(x, sound_background)
|
74 |
|
75 |
-
return
|
76 |
|
77 |
|
78 |
|
@@ -109,9 +113,9 @@ def serve_wav():
|
|
109 |
|
110 |
|
111 |
x = tts_multi_sentence(args.scene)
|
112 |
-
|
113 |
OUT_FILE = 'tmp.wav'
|
114 |
-
soundfile.write(CACHE_DIR + OUT_FILE, x,
|
115 |
|
116 |
|
117 |
|
|
|
14 |
from types import SimpleNamespace
|
15 |
from flask import Flask, request, send_from_directory
|
16 |
from flask_cors import CORS
|
17 |
+
from audiocraft.audiogen import AudioGen #, audio_write
|
18 |
|
19 |
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
20 |
sound_generator.set_generation_params(duration=4)
|
|
|
46 |
|
47 |
|
48 |
def tts_multi_sentence(scene=None):
|
49 |
+
if scene is not None and len(scene) >= 4:
|
50 |
+
print(f'Processing: {scene} ..')
|
51 |
+
x = sound_generator.generate([scene])[0].detach().cpu().numpy()[0, :]
|
52 |
|
53 |
+
x /= np.abs(x).max() + 1e-7
|
54 |
+
# sound_background = audio_write(None,
|
55 |
+
# sound_background.cpu(),
|
56 |
+
# 16000, #24000, # Same as styleTTs sample_rate,
|
57 |
+
# strategy="loudness",
|
58 |
+
# loudness_compressor=True)
|
59 |
+
print(f'Craft Finished for: {scene}\n\n\n\n____{x.shape}')
|
60 |
else:
|
61 |
+
print(scene, '\nDrop\n')
|
62 |
+
x = np.zeros(400)
|
63 |
|
64 |
# # StyleTTS2
|
65 |
# if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
|
|
|
76 |
|
77 |
# return overlay(x, sound_background)
|
78 |
|
79 |
+
return x
|
80 |
|
81 |
|
82 |
|
|
|
113 |
|
114 |
|
115 |
x = tts_multi_sentence(args.scene)
|
116 |
+
|
117 |
OUT_FILE = 'tmp.wav'
|
118 |
+
soundfile.write(CACHE_DIR + OUT_FILE, x, 16000)
|
119 |
|
120 |
|
121 |
|
live_demo.py
CHANGED
@@ -39,15 +39,15 @@ def send_to_server(args):
|
|
39 |
|
40 |
response = requests.post(url, data=payload) # NONEs do not arrive to servers dict
|
41 |
|
42 |
-
# Check the response from the server
|
43 |
-
if response.status_code == 200:
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
else:
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
return response
|
52 |
|
53 |
|
@@ -60,15 +60,15 @@ def cli(): # args.out_file is not send to server - server writes tmp - copied by
|
|
60 |
# _text, _scene = args.text.split('|')
|
61 |
# args.text = _text
|
62 |
args.scene = args.text #_scene
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
if __name__ == '__main__':
|
74 |
cli()
|
|
|
39 |
|
40 |
response = requests.post(url, data=payload) # NONEs do not arrive to servers dict
|
41 |
|
42 |
+
# # Check the response from the server
|
43 |
+
# if response.status_code == 200:
|
44 |
+
# print("\nRequest was successful!")
|
45 |
+
# # print("Response:", respdonse.__dict__.keys(), '\n=====\n')
|
46 |
|
47 |
+
# else:
|
48 |
+
# print("Failed to send the request")
|
49 |
+
# print("Status Code:", response.status_code)
|
50 |
+
# print("Response:", response.text)
|
51 |
return response
|
52 |
|
53 |
|
|
|
60 |
# _text, _scene = args.text.split('|')
|
61 |
# args.text = _text
|
62 |
args.scene = args.text #_scene
|
63 |
+
if len(args.text) >= 4:
|
64 |
+
response = send_to_server(args)
|
65 |
+
out_file = '_gen_.wav' #+ response.headers['suffix-file-type'].split('.')[-1]
|
66 |
+
with open(out_file, 'wb') as f:
|
67 |
+
f.write(response.content)
|
68 |
+
subprocess.run(["paplay", out_file])
|
69 |
+
else:
|
70 |
+
print(f'__\n{args.text}\n')
|
71 |
+
|
72 |
|
73 |
if __name__ == '__main__':
|
74 |
cli()
|
visualize_tts_plesantness.py
CHANGED
@@ -452,6 +452,6 @@ for lang in ['english',
|
|
452 |
|
453 |
|
454 |
|
455 |
-
plt.savefig(f'fig_{lang}_{WIN=}_{HOP=}
|
456 |
plt.close()
|
457 |
|
|
|
452 |
|
453 |
|
454 |
|
455 |
+
plt.savefig(f'fig_{lang}_{WIN=}_{HOP=}_HFdisc.png', bbox_inches='tight')
|
456 |
plt.close()
|
457 |
|