jbilcke-hf HF staff commited on
Commit
5ca5d91
1 Parent(s): ef0447a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -4,10 +4,9 @@ import os
4
  from io import BytesIO
5
  import base64
6
  import numpy as np
7
-
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
10
- from scipy.io.wavfile import write
11
 
12
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
 
@@ -36,14 +35,23 @@ def gen_tts(secret_token, text, description):
36
  )
37
  audio_arr = generation.cpu().numpy().squeeze()
38
 
39
- # Write the numpy array as a WAV file
40
- buffer = BytesIO()
41
- write(buffer, SAMPLE_RATE, audio_arr.astype(np.int16))
42
- buffer.seek(0)
43
-
44
- # Encode the WAV file in base64
45
- audio_base64 = base64.b64encode(buffer.read()).decode('utf-8')
46
- data_uri = 'data:audio/wav;base64,' + audio_base64
 
 
 
 
 
 
 
 
 
47
 
48
  return data_uri
49
 
 
4
  from io import BytesIO
5
  import base64
6
  import numpy as np
7
+ from pydub import AudioSegment
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 
10
 
11
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
 
 
35
  )
36
  audio_arr = generation.cpu().numpy().squeeze()
37
 
38
+ # Create an AudioSegment directly from numpy array data
39
+ samples = np.array(audio_arr * (2**15 - 1), dtype=np.int16)
40
+ sound = AudioSegment(
41
+ samples.tobytes(),
42
+ frame_rate=SAMPLE_RATE,
43
+ sample_width=samples.dtype.itemsize,
44
+ channels=1
45
+ )
46
+
47
+ # Export to MP3
48
+ buff_mp3 = BytesIO()
49
+ sound.export(buff_mp3, format="mp3")
50
+ buff_mp3.seek(0)
51
+
52
+ # Encode the MP3 file in base64
53
+ audio_base64 = base64.b64encode(buff_mp3.read()).decode('utf-8')
54
+ data_uri = 'data:audio/mp3;base64,' + audio_base64
55
 
56
  return data_uri
57