salmanmapkar commited on
Commit
7da7864
1 Parent(s): 9764cff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -166,7 +166,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
166
  global SPEAKERS
167
  SPEAKERS = [speaker.strip() for speaker in sp.split(',')]
168
 
169
- audio = Audio()
170
  GenerateSpeakerDict(speaker_names)
171
  def get_output(segments):
172
  # print(segments)
@@ -183,7 +183,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
183
  with contextlib.closing(wave.open(path,'r')) as f:
184
  frames = f.getnframes()
185
  rate = f.getframerate()
186
- return frames / float(rate)
187
 
188
  def make_embeddings(path, segments, duration):
189
  embeddings = np.zeros(shape=(len(segments), 192))
@@ -196,7 +196,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
196
  # Whisper overshoots the end timestamp in the last segment
197
  end = min(duration, segment["end"])
198
  clip = Segment(start, end)
199
- waveform, sample_rate = audio.crop(path, clip)
200
  return embedding_model(waveform[None])
201
 
202
  def add_speaker_labels(segments, embeddings, num_speakers):
 
166
  global SPEAKERS
167
  SPEAKERS = [speaker.strip() for speaker in sp.split(',')]
168
 
169
+ # audio = Audio()
170
  GenerateSpeakerDict(speaker_names)
171
  def get_output(segments):
172
  # print(segments)
 
183
  with contextlib.closing(wave.open(path,'r')) as f:
184
  frames = f.getnframes()
185
  rate = f.getframerate()
186
+ return frames / float(rate)
187
 
188
  def make_embeddings(path, segments, duration):
189
  embeddings = np.zeros(shape=(len(segments), 192))
 
196
  # Whisper overshoots the end timestamp in the last segment
197
  end = min(duration, segment["end"])
198
  clip = Segment(start, end)
199
+ waveform, sample_rate = Audio().crop(path, clip)
200
  return embedding_model(waveform[None])
201
 
202
  def add_speaker_labels(segments, embeddings, num_speakers):