KIFF commited on
Commit
7c12535
·
1 Parent(s): be15a16

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +6 -7
handler.py CHANGED
@@ -1,6 +1,5 @@
1
- from typing import Dict
2
  from pyannote.audio import Pipeline
3
- from transformers.pipelines.audio_utils import ffmpeg_read
4
  import torch
5
  import base64
6
  import numpy as np
@@ -22,16 +21,16 @@ class EndpointHandler():
22
  """
23
  # process input
24
  inputs = data.pop("inputs", data)
25
- parameters = data.pop("parameters", None) # min_speakers=2, max_speakers=5
26
 
27
  # decode the base64 audio data
28
  audio_data = base64.b64decode(inputs)
29
  audio_nparray = np.frombuffer(audio_data, dtype=np.int16)
30
-
31
  # prepare pynannote input
32
- audio_tensor= torch.from_numpy(audio_nparray).unsqueeze(0)
33
  pyannote_input = {"waveform": audio_tensor, "sample_rate": SAMPLE_RATE}
34
-
35
  # apply pretrained pipeline
36
  # pass inputs with all kwargs in data
37
  if parameters is not None:
@@ -45,4 +44,4 @@ class EndpointHandler():
45
  for segment, _, label in diarization.itertracks(yield_label=True)
46
  ]
47
 
48
- return {"diarization": processed_diarization}
 
1
+ from typing import Dict
2
  from pyannote.audio import Pipeline
 
3
  import torch
4
  import base64
5
  import numpy as np
 
21
  """
22
  # process input
23
  inputs = data.pop("inputs", data)
24
+ parameters = data.pop("parameters", None) # min_speakers=2, max_speakers=5
25
 
26
  # decode the base64 audio data
27
  audio_data = base64.b64decode(inputs)
28
  audio_nparray = np.frombuffer(audio_data, dtype=np.int16)
29
+
30
  # prepare pynannote input
31
+ audio_tensor= torch.from_numpy(audio_nparray).float().unsqueeze(0)
32
  pyannote_input = {"waveform": audio_tensor, "sample_rate": SAMPLE_RATE}
33
+
34
  # apply pretrained pipeline
35
  # pass inputs with all kwargs in data
36
  if parameters is not None:
 
44
  for segment, _, label in diarization.itertracks(yield_label=True)
45
  ]
46
 
47
+ return {"diarization": processed_diarization}