dwarkesh commited on
Commit
4731011
1 Parent(s): c97242d

fix first speaker bug

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -21,7 +21,8 @@ import numpy as np
21
  model = whisper.load_model("large-v2")
22
  embedding_model = PretrainedSpeakerEmbedding(
23
  "speechbrain/spkrec-ecapa-voxceleb",
24
- device=torch.device("cuda"))
 
25
 
26
  def transcribe(audio, num_speakers):
27
  path = convert_to_wav(audio)
@@ -84,7 +85,7 @@ def get_output(segments):
84
  for (i, segment) in enumerate(segments):
85
  if i > 0:
86
  output += '\n\n'
87
- if segments[i - 1]["speaker"] != segment["speaker"]:
88
  output += segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
89
  output += segment["text"][1:] + ' '
90
  return output
 
21
  model = whisper.load_model("large-v2")
22
  embedding_model = PretrainedSpeakerEmbedding(
23
  "speechbrain/spkrec-ecapa-voxceleb",
24
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
+ )
26
 
27
  def transcribe(audio, num_speakers):
28
  path = convert_to_wav(audio)
 
85
  for (i, segment) in enumerate(segments):
86
  if i > 0:
87
  output += '\n\n'
88
+ if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
89
  output += segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
90
  output += segment["text"][1:] + ' '
91
  return output