KIFF commited on
Commit
d9d7bad
1 Parent(s): a2774a3

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +7 -6
handler.py CHANGED
@@ -1,5 +1,6 @@
1
- from typing import Dict
2
  from pyannote.audio import Pipeline
 
3
  import torch
4
  import base64
5
  import numpy as np
@@ -21,16 +22,16 @@ class EndpointHandler():
21
  """
22
  # process input
23
  inputs = data.pop("inputs", data)
24
- parameters = data.pop("parameters", None) # min_speakers=2, max_speakers=5
25
 
26
  # decode the base64 audio data
27
  audio_data = base64.b64decode(inputs)
28
  audio_nparray = np.frombuffer(audio_data, dtype=np.int16)
29
-
30
  # prepare pynannote input
31
- audio_tensor= torch.from_numpy(audio_nparray).float().unsqueeze(0)
32
  pyannote_input = {"waveform": audio_tensor, "sample_rate": SAMPLE_RATE}
33
-
34
  # apply pretrained pipeline
35
  # pass inputs with all kwargs in data
36
  if parameters is not None:
@@ -44,4 +45,4 @@ class EndpointHandler():
44
  for segment, _, label in diarization.itertracks(yield_label=True)
45
  ]
46
 
47
- return {"diarization": processed_diarization}
 
1
+ from typing import Dict
2
  from pyannote.audio import Pipeline
3
+ from transformers.pipelines.audio_utils import ffmpeg_read
4
  import torch
5
  import base64
6
  import numpy as np
 
22
  """
23
  # process input
24
  inputs = data.pop("inputs", data)
25
+ parameters = data.pop("parameters", None) # min_speakers=2, max_speakers=5
26
 
27
  # decode the base64 audio data
28
  audio_data = base64.b64decode(inputs)
29
  audio_nparray = np.frombuffer(audio_data, dtype=np.int16)
30
+
31
  # prepare pynannote input
32
+ audio_tensor= torch.from_numpy(audio_nparray).unsqueeze(0)
33
  pyannote_input = {"waveform": audio_tensor, "sample_rate": SAMPLE_RATE}
34
+
35
  # apply pretrained pipeline
36
  # pass inputs with all kwargs in data
37
  if parameters is not None:
 
45
  for segment, _, label in diarization.itertracks(yield_label=True)
46
  ]
47
 
48
+ return {"diarization": processed_diarization}