Spaces:
Runtime error
Runtime error
Commit
·
628d0d9
1
Parent(s):
2f9b63d
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,7 @@ import wave
|
|
24 |
import contextlib
|
25 |
from sklearn.cluster import AgglomerativeClustering
|
26 |
import numpy as np
|
|
|
27 |
|
28 |
__FILES = set()
|
29 |
|
@@ -157,6 +158,32 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
157 |
return speaker
|
158 |
|
159 |
# audio = Audio()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
def get_output(segments):
|
161 |
# print(segments)
|
162 |
conversation=[]
|
@@ -209,6 +236,8 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
209 |
return "Audio duration too long"
|
210 |
|
211 |
result = model.transcribe(audio)
|
|
|
|
|
212 |
|
213 |
segments = result["segments"]
|
214 |
|
|
|
24 |
import contextlib
|
25 |
from sklearn.cluster import AgglomerativeClustering
|
26 |
import numpy as np
|
27 |
+
import json
|
28 |
|
29 |
__FILES = set()
|
30 |
|
|
|
158 |
return speaker
|
159 |
|
160 |
# audio = Audio()
|
161 |
+
def diarization(audio):
|
162 |
+
def millisec(timeStr):
|
163 |
+
spl = timeStr.split(":")
|
164 |
+
s = (int)((int(spl[0]) * 60 * 60 + int(spl[1]) * 60 + float(spl[2]) )* 1000)
|
165 |
+
return s
|
166 |
+
as_audio = AudioSegment.from_wav(audio)
|
167 |
+
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
168 |
+
hparams = pipeline.parameters(instantiated=True)
|
169 |
+
hparams["segmentation_onset"] += 0.1
|
170 |
+
pipeline.instantiate(hparams)
|
171 |
+
if num_speakers:
|
172 |
+
dz = pipeline(DEMO_FILE, num_speakers=num_speakers)
|
173 |
+
else:
|
174 |
+
dz = pipeline(DEMO_FILE)
|
175 |
+
with open(CreateFile(f"diarization_{audio}.txt"), "w") as text_file:
|
176 |
+
text_file.write(str(dz))
|
177 |
+
dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
|
178 |
+
dzList = []
|
179 |
+
for l in dz:
|
180 |
+
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
181 |
+
start = millisec(start)
|
182 |
+
end = millisec(end)
|
183 |
+
lex = GetSpeaker(re.findall('(SPEAKER_[0-9][0-9])', string=l)[0])
|
184 |
+
dzList.append([start, end, lex])
|
185 |
+
return dzList
|
186 |
+
|
187 |
def get_output(segments):
|
188 |
# print(segments)
|
189 |
conversation=[]
|
|
|
236 |
return "Audio duration too long"
|
237 |
|
238 |
result = model.transcribe(audio)
|
239 |
+
json.dumps(result)
|
240 |
+
json.dumps(diarization(audio))
|
241 |
|
242 |
segments = result["segments"]
|
243 |
|