Dionyssos commited on
Commit
2d0e2b6
1 Parent(s): c4effd2

freq = 16000

Browse files
audiocraft/audiogen.py CHANGED
@@ -87,7 +87,7 @@ class AudioGen(BaseGenModel):
87
 
88
  def set_generation_params(self, use_sampling: bool = True, top_k: int = 250,
89
  top_p: float = 0.0, temperature: float = 1.0,
90
- duration: float = 10.0, cfg_coef: float = 3.0,
91
  two_step_cfg: bool = False, extend_stride: float = 2):
92
  """Set the generation parameters for AudioGen.
93
 
 
87
 
88
  def set_generation_params(self, use_sampling: bool = True, top_k: int = 250,
89
  top_p: float = 0.0, temperature: float = 1.0,
90
+ duration: float = 10.0, cfg_coef: float = 2.4,
91
  two_step_cfg: bool = False, extend_stride: float = 2):
92
  """Set the generation parameters for AudioGen.
93
 
live_api.py CHANGED
@@ -14,7 +14,7 @@ from pathlib import Path
14
  from types import SimpleNamespace
15
  from flask import Flask, request, send_from_directory
16
  from flask_cors import CORS
17
- from audiocraft.audiogen import AudioGen, audio_write
18
 
19
  sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
20
  sound_generator.set_generation_params(duration=4)
@@ -46,16 +46,20 @@ Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
46
 
47
 
48
  def tts_multi_sentence(scene=None):
49
- if scene is not None:
 
 
50
 
51
- sound_background = sound_generator.generate([scene])[0]
52
- sound_background = audio_write(None,
53
- sound_background.cpu(),
54
- 24000, # Same as styleTTs sample_rate,
55
- strategy="loudness",
56
- loudness_compressor=True).detach().cpu().numpy()[0, :]
 
57
  else:
58
- sound_background = None
 
59
 
60
  # # StyleTTS2
61
  # if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
@@ -72,7 +76,7 @@ def tts_multi_sentence(scene=None):
72
 
73
  # return overlay(x, sound_background)
74
 
75
- return sound_background
76
 
77
 
78
 
@@ -109,9 +113,9 @@ def serve_wav():
109
 
110
 
111
  x = tts_multi_sentence(args.scene)
112
- # print('\n\n\n\n Obtai TTS output shape', x.shape)
113
  OUT_FILE = 'tmp.wav'
114
- soundfile.write(CACHE_DIR + OUT_FILE, x, 24000)
115
 
116
 
117
 
 
14
  from types import SimpleNamespace
15
  from flask import Flask, request, send_from_directory
16
  from flask_cors import CORS
17
+ from audiocraft.audiogen import AudioGen #, audio_write
18
 
19
  sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
20
  sound_generator.set_generation_params(duration=4)
 
46
 
47
 
48
  def tts_multi_sentence(scene=None):
49
+ if scene is not None and len(scene) >= 4:
50
+ print(f'Processing: {scene} ..')
51
+ x = sound_generator.generate([scene])[0].detach().cpu().numpy()[0, :]
52
 
53
+ x /= np.abs(x).max() + 1e-7
54
+ # sound_background = audio_write(None,
55
+ # sound_background.cpu(),
56
+ # 16000, #24000, # Same as styleTTs sample_rate,
57
+ # strategy="loudness",
58
+ # loudness_compressor=True)
59
+ print(f'Craft Finished for: {scene}\n\n\n\n____{x.shape}')
60
  else:
61
+ print(scene, '\nDrop\n')
62
+ x = np.zeros(400)
63
 
64
  # # StyleTTS2
65
  # if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
 
76
 
77
  # return overlay(x, sound_background)
78
 
79
+ return x
80
 
81
 
82
 
 
113
 
114
 
115
  x = tts_multi_sentence(args.scene)
116
+
117
  OUT_FILE = 'tmp.wav'
118
+ soundfile.write(CACHE_DIR + OUT_FILE, x, 16000)
119
 
120
 
121
 
live_demo.py CHANGED
@@ -39,15 +39,15 @@ def send_to_server(args):
39
 
40
  response = requests.post(url, data=payload) # NONEs do not arrive to servers dict
41
 
42
- # Check the response from the server
43
- if response.status_code == 200:
44
- print("\nRequest was successful!")
45
- # print("Response:", respdonse.__dict__.keys(), '\n=====\n')
46
 
47
- else:
48
- print("Failed to send the request")
49
- print("Status Code:", response.status_code)
50
- print("Response:", response.text)
51
  return response
52
 
53
 
@@ -60,15 +60,15 @@ def cli(): # args.out_file is not send to server - server writes tmp - copied by
60
  # _text, _scene = args.text.split('|')
61
  # args.text = _text
62
  args.scene = args.text #_scene
63
- response = send_to_server(args)
64
- out_file = '_gen_.wav' #+ response.headers['suffix-file-type'].split('.')[-1]
65
-
66
- with open(out_file, 'wb') as f:
67
- f.write(response.content)
68
- # print('REsponse AT client []\n----------------------------', response.headers)
69
-
70
- subprocess.run(["paplay", out_file])
71
-
72
 
73
  if __name__ == '__main__':
74
  cli()
 
39
 
40
  response = requests.post(url, data=payload) # NONEs do not arrive to servers dict
41
 
42
+ # # Check the response from the server
43
+ # if response.status_code == 200:
44
+ # print("\nRequest was successful!")
45
+ # # print("Response:", respdonse.__dict__.keys(), '\n=====\n')
46
 
47
+ # else:
48
+ # print("Failed to send the request")
49
+ # print("Status Code:", response.status_code)
50
+ # print("Response:", response.text)
51
  return response
52
 
53
 
 
60
  # _text, _scene = args.text.split('|')
61
  # args.text = _text
62
  args.scene = args.text #_scene
63
+ if len(args.text) >= 4:
64
+ response = send_to_server(args)
65
+ out_file = '_gen_.wav' #+ response.headers['suffix-file-type'].split('.')[-1]
66
+ with open(out_file, 'wb') as f:
67
+ f.write(response.content)
68
+ subprocess.run(["paplay", out_file])
69
+ else:
70
+ print(f'__\n{args.text}\n')
71
+
72
 
73
  if __name__ == '__main__':
74
  cli()
visualize_tts_plesantness.py CHANGED
@@ -452,6 +452,6 @@ for lang in ['english',
452
 
453
 
454
 
455
- plt.savefig(f'fig_{lang}_{WIN=}_{HOP=}_fin0.pdf', bbox_inches='tight')
456
  plt.close()
457
 
 
452
 
453
 
454
 
455
+ plt.savefig(f'fig_{lang}_{WIN=}_{HOP=}_HFdisc.png', bbox_inches='tight')
456
  plt.close()
457