pbotsaris commited on
Commit
edf8016
1 Parent(s): 9634b4e

added temp sine wave to test base64 encoding

Browse files
__pycache__/handler.cpython-310.pyc CHANGED
Binary files a/__pycache__/handler.cpython-310.pyc and b/__pycache__/handler.cpython-310.pyc differ
 
__pycache__/handler.cpython-311.pyc ADDED
Binary file (4.42 kB). View file
 
handler.py CHANGED
@@ -4,6 +4,43 @@ from transformers import AutoProcessor, MusicgenForConditionalGeneration
4
  import torch
5
  import io
6
  import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def create_params(params, fr):
9
  # default
@@ -38,7 +75,7 @@ class EndpointHandler:
38
  self.model = MusicgenForConditionalGeneration.from_pretrained(path, torch_dtype=torch.float16)
39
  self.model.to('cuda')
40
 
41
- def __call__(self, data: Dict[str, Any]) -> str:
42
  """
43
  Args:
44
  data (:dict:):
@@ -47,37 +84,37 @@ class EndpointHandler:
47
  Returns: wav file in bytes
48
  """
49
 
50
- inputs = data.pop("inputs", data)
51
- params = data.pop("parameters", None)
52
 
53
- inputs = self.processor(
54
- text=[inputs],
55
- padding=True,
56
- return_tensors="pt"
57
- ).to('cuda')
58
 
59
- params = create_params(params, self.model.config.audio_encoder.frame_rate)
60
 
61
- with torch.cuda.amp.autocast():
62
- outputs = self.model.generate(**inputs, **params)
63
 
64
- pred = outputs[0, 0].cpu().numpy()
65
- sr = 32000
66
 
67
- try:
68
- sr = self.model.config.audio_encoder.sampling_rate
69
 
70
- except:
71
- sr = 32000
72
 
73
- wav_buffer = io.BytesIO()
74
- wavfile.write(wav_buffer, rate=sr, data=pred)
 
75
 
76
- wav_data = wav_buffer.getvalue()
77
- base64_encoded_wav = base64.b64encode(wav_data).decode('utf-8')
78
 
79
- # will return the audio in base64 and client must request with accept: image/jpeg and decode the base64
80
- return base64_encoded_wav
81
 
82
 
83
  if __name__ == "__main__":
 
4
  import torch
5
  import io
6
  import base64
7
+ import wave
8
+ import array
9
+ import math
10
+
11
+ def generate_sine_wave(freq, duration, sample_rate, amplitude):
12
+ n_samples = int(sample_rate * duration)
13
+ samples = []
14
+
15
+ for x in range(n_samples):
16
+ value = amplitude * math.sin(2 * math.pi * freq * x / sample_rate)
17
+ samples.append(int(value)) # rounding to the nearest integer
18
+
19
+ return array.array("h", samples) # array of short integers
20
+
21
+
22
+ def sine_to_base64():
23
+ frequency = 440.0 # Frequency in Hz
24
+ duration = 1.0 # seconds
25
+ volume = 0.5 # 0.0 to 1.0
26
+ sample_rate = 44100
27
+ amplitude = int(volume * 32767) # 16-bit audio
28
+
29
+ sine_wave = generate_sine_wave(frequency, duration, sample_rate, amplitude)
30
+
31
+ wav_buffer = io.BytesIO()
32
+ with wave.open(wav_buffer, "w") as wav_file:
33
+ n_channels = 1
34
+ sampwidth = 2
35
+ n_frames = len(sine_wave)
36
+ comptype = "NONE"
37
+ compname = "not compressed"
38
+ wav_file.setparams((n_channels, sampwidth, int(sample_rate), n_frames, comptype, compname))
39
+ wav_file.writeframes(sine_wave.tobytes())
40
+
41
+ base64_string = base64.b64encode(wav_buffer.getvalue()).decode('utf-8')
42
+ return base64_string
43
+
44
 
45
  def create_params(params, fr):
46
  # default
 
75
  self.model = MusicgenForConditionalGeneration.from_pretrained(path, torch_dtype=torch.float16)
76
  self.model.to('cuda')
77
 
78
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, str]]:
79
  """
80
  Args:
81
  data (:dict:):
 
84
  Returns: wav file in bytes
85
  """
86
 
87
+ # inputs = data.pop("inputs", data)
88
+ # params = data.pop("parameters", None)
89
 
90
+ # inputs = self.processor(
91
+ # text=[inputs],
92
+ # padding=True,
93
+ # return_tensors="pt"
94
+ # ).to('cuda')
95
 
96
+ # params = create_params(params, self.model.config.audio_encoder.frame_rate)
97
 
98
+ # with torch.cuda.amp.autocast():
99
+ # outputs = self.model.generate(**inputs, **params)
100
 
101
+ # pred = outputs[0, 0].cpu().numpy()
102
+ # sr = 32000
103
 
104
+ # try:
105
+ # sr = self.model.config.audio_encoder.sampling_rate
106
 
107
+ # except:
108
+ # sr = 32000
109
 
110
+ # wav_buffer = io.BytesIO()
111
+ # wavfile.write(wav_buffer, rate=sr, data=pred)
112
+ # wav_data = wav_buffer.getvalue()
113
 
114
+ # base64_encoded_wav = base64.b64encode(wav_data).decode('utf-8')
 
115
 
116
+ base64_encoded_wav = sine_to_base64()
117
+ return [{"audio": base64_encoded_wav}]
118
 
119
 
120
  if __name__ == "__main__":
test.js ADDED
The diff for this file is too large to render. See raw diff