jauntybrain commited on
Commit
180d507
1 Parent(s): 7555d4c

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +22 -3
handler.py CHANGED
@@ -1,6 +1,11 @@
1
  from typing import Dict, List, Any
2
  from transformers import AutoProcessor, MusicgenForConditionalGeneration
3
  import torch
 
 
 
 
 
4
 
5
  class EndpointHandler:
6
  def __init__(self, path=""):
@@ -28,7 +33,21 @@ class EndpointHandler:
28
  with torch.autocast("cuda"):
29
  outputs = self.model.generate(**inputs, do_sample=False, max_new_tokens=400)
30
 
31
- # postprocess the prediction
32
- prediction = outputs[0].cpu().numpy().tolist()
33
 
34
- return [{"generated_audio": prediction}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict, List, Any
2
  from transformers import AutoProcessor, MusicgenForConditionalGeneration
3
  import torch
4
+ import array
5
+ import base64
6
+ import io
7
+ import wave
8
+ import numpy as np
9
 
10
  class EndpointHandler:
11
  def __init__(self, path=""):
 
33
  with torch.autocast("cuda"):
34
  outputs = self.model.generate(**inputs, do_sample=False, max_new_tokens=400)
35
 
36
+ # postprocess the prediction
37
+ audio_samples = outputs[0].cpu().numpy()[0].tolist()
38
 
39
+ audio_samples = [int(min(max(sample * 32767, -32768), 32767)) for sample in audio_samples]
40
+
41
+ # Create BytesIO object to capture the audio in-memory
42
+ audio_io = io.BytesIO()
43
+
44
+ # Create WAV file
45
+ with wave.open(audio_io, 'wb') as wf:
46
+ wf.setnchannels(1)
47
+ wf.setsampwidth(2) # 2 bytes for 16-bit PCM
48
+ wf.setframerate(sampling_rate)
49
+ wf.writeframes(array.array('h', audio_samples).tobytes())
50
+
51
+ audio_base64 = base64.b64encode(audio_io.get value()).decode('utf-8')
52
+
53
+ return [{'sampling_rate': sampling_rate, 'audio': audio_base64}]