jbilcke-hf HF staff commited on
Commit
4156639
1 Parent(s): 9df4ebb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -197,6 +197,30 @@ def format_prompt_zephyr(message, history, system_message=system_message):
197
  print(prompt)
198
  return prompt
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  def generate_local(
201
  prompt,
202
  history,
@@ -587,7 +611,7 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
587
  wav_bytestream = wav_bytestream.tobytes()
588
 
589
  # Directly encode the WAV bytestream to base64
590
- base64_audio = base64.b64encode(wav_bytestream).decode('utf8')
591
 
592
  if audio_stream is not None:
593
  return (history, base64_audio)
 
197
  print(prompt)
198
  return prompt
199
 
200
+ import struct
201
+
202
+ # Generated by GPT-4
203
+ def pcm_to_wav(pcm_data, sample_rate=24000, channels=1, bit_depth=16):
204
+ # Check if the input data is already in the WAV format
205
+ if pcm_data.startswith(b"RIFF"):
206
+ return pcm_data
207
+
208
+ # Calculate subchunk sizes
209
+ fmt_subchunk_size = 16 # for PCM
210
+ data_subchunk_size = len(pcm_data)
211
+ chunk_size = 4 + (8 + fmt_subchunk_size) + (8 + data_subchunk_size)
212
+
213
+ # Prepare the WAV file headers
214
+ wav_header = struct.pack('<4sI4s', b'RIFF', chunk_size, b'WAVE') # 'RIFF' chunk descriptor
215
+ fmt_subchunk = struct.pack('<4sIHHIIHH',
216
+ b'fmt ', fmt_subchunk_size, 1, channels,
217
+ sample_rate, sample_rate * channels * bit_depth // 8,
218
+ channels * bit_depth // 8, bit_depth)
219
+
220
+ data_subchunk = struct.pack('<4sI', b'data', data_subchunk_size)
221
+
222
+ return wav_header + fmt_subchunk + data_subchunk + pcm_data
223
+
224
  def generate_local(
225
  prompt,
226
  history,
 
611
  wav_bytestream = wav_bytestream.tobytes()
612
 
613
  # Directly encode the WAV bytestream to base64
614
+ base64_audio = base64.b64encode(pcm_to_wav(wav_bytestream)).decode('utf8')
615
 
616
  if audio_stream is not None:
617
  return (history, base64_audio)