KIFF commited on
Commit
3c44976
1 Parent(s): 5bca4d0

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +8 -6
handler.py CHANGED
@@ -2,17 +2,16 @@ from typing import Dict
2
  from pyannote.audio import Pipeline
3
  from transformers.pipelines.audio_utils import ffmpeg_read
4
  import torch
 
 
5
 
6
  SAMPLE_RATE = 16000
7
 
8
-
9
-
10
  class EndpointHandler():
11
  def __init__(self, path=""):
12
  # load the model
13
  self.pipeline = Pipeline.from_pretrained("philschmid/pyannote-speaker-diarization-endpoint")
14
 
15
-
16
  def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
17
  """
18
  Args:
@@ -23,14 +22,16 @@ class EndpointHandler():
23
  """
24
  # process input
25
  inputs = data.pop("inputs", data)
26
- parameters = data.pop("parameters", None) # min_speakers=2, max_speakers=5
27
 
 
 
 
28
 
29
  # prepare pynannote input
30
- audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
31
  audio_tensor= torch.from_numpy(audio_nparray).unsqueeze(0)
32
  pyannote_input = {"waveform": audio_tensor, "sample_rate": SAMPLE_RATE}
33
-
34
  # apply pretrained pipeline
35
  # pass inputs with all kwargs in data
36
  if parameters is not None:
@@ -45,3 +46,4 @@ class EndpointHandler():
45
  ]
46
 
47
  return {"diarization": processed_diarization}
 
 
2
  from pyannote.audio import Pipeline
3
  from transformers.pipelines.audio_utils import ffmpeg_read
4
  import torch
5
+ import base64
6
+ import numpy as np
7
 
8
  SAMPLE_RATE = 16000
9
 
 
 
10
  class EndpointHandler():
11
  def __init__(self, path=""):
12
  # load the model
13
  self.pipeline = Pipeline.from_pretrained("philschmid/pyannote-speaker-diarization-endpoint")
14
 
 
15
  def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
16
  """
17
  Args:
 
22
  """
23
  # process input
24
  inputs = data.pop("inputs", data)
25
+ parameters = data.pop("parameters", None) # min_speakers=2, max_speakers=5
26
 
27
+ # decode the base64 audio data
28
+ audio_data = base64.b64decode(inputs)
29
+ audio_nparray = np.frombuffer(audio_data, dtype=np.int16)
30
 
31
  # prepare pynannote input
 
32
  audio_tensor= torch.from_numpy(audio_nparray).unsqueeze(0)
33
  pyannote_input = {"waveform": audio_tensor, "sample_rate": SAMPLE_RATE}
34
+
35
  # apply pretrained pipeline
36
  # pass inputs with all kwargs in data
37
  if parameters is not None:
 
46
  ]
47
 
48
  return {"diarization": processed_diarization}
49
+