salmanmapkar commited on
Commit
2391914
1 Parent(s): 588b426

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -25,6 +25,7 @@ import contextlib
25
  from sklearn.cluster import AgglomerativeClustering
26
  import numpy as np
27
  import json
 
28
 
29
  __FILES = set()
30
 
@@ -191,17 +192,17 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
191
  for (i, segment) in enumerate(segments):
192
  # print(f"{i}, {segment["speaker"]}, {segments[i - 1]["speaker"]}, {}")
193
  if not len(conversation):
194
- conversation.append([GetSpeaker(segment["speaker"]), segment["text"].lstrip()])
195
  elif conversation[-1][0] == GetSpeaker(segment["speaker"]):
196
- conversation[-1][1] += segment["text"].lstrip()
197
  else:
198
- conversation.append([GetSpeaker(segment["speaker"]), segment["text"].lstrip()])
199
  # if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
200
  # if i != 0:
201
  # conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
202
  # conversation[-1][1] += segment["text"][1:]
203
  # return output
204
- return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation])), ({ "data": [{"speaker": speaker, "text": text} for speaker, text in conversation]})
205
 
206
  def get_duration(path):
207
  with contextlib.closing(wave.open(path,'r')) as f:
 
25
  from sklearn.cluster import AgglomerativeClustering
26
  import numpy as np
27
  import json
28
+ from datetime import timedelta
29
 
30
  __FILES = set()
31
 
 
192
  for (i, segment) in enumerate(segments):
193
  # print(f"{i}, {segment["speaker"]}, {segments[i - 1]["speaker"]}, {}")
194
  if not len(conversation):
195
+ conversation.append([str(timedelta(seconds=float(segment['start']))),str(timedelta(seconds=float(segment['end']))),GetSpeaker(segment["speaker"]), segment["text"].lstrip()])
196
  elif conversation[-1][0] == GetSpeaker(segment["speaker"]):
197
+ conversation[-1][3] += segment["text"].lstrip()
198
  else:
199
+ conversation.append([str(timedelta(seconds=float(segment['start']))),str(timedelta(seconds=float(segment['end']))),GetSpeaker(segment["speaker"]), segment["text"].lstrip()])
200
  # if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
201
  # if i != 0:
202
  # conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
203
  # conversation[-1][1] += segment["text"][1:]
204
  # return output
205
+ return ("".join([f"[{start}] - {speaker} \n{text}\n" for start, end, speaker, text in conversation])), ({ "data": [{"start": start, "end":end, "speaker": speaker, "text": text} for start, end, speaker, text in conversation]})
206
 
207
  def get_duration(path):
208
  with contextlib.closing(wave.open(path,'r')) as f: