Spaces:
Runtime error
Runtime error
datasciencedojo
commited on
Commit
•
a6370cb
1
Parent(s):
c5239ac
Upload 3 files
Browse files- .gitattributes +1 -0
- TestAudio1.wav +3 -0
- app.py +44 -0
- requirements.txt +6 -0
.gitattributes
CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
32 |
+
TestAudio1.wav filter=lfs diff=lfs merge=lfs -text
|
TestAudio1.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63619394ce122f680985b597df2eb2721309811662f0e53a71c688ad3d073426
|
3 |
+
size 4611246
|
app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pyannote.audio import Pipeline
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
|
6 |
+
asr = pipeline(
|
7 |
+
"automatic-speech-recognition",
|
8 |
+
model="facebook/wav2vec2-large-960h-lv60-self",
|
9 |
+
feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
|
10 |
+
|
11 |
+
)
|
12 |
+
speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")
|
13 |
+
|
14 |
+
def segmentation(audio):
|
15 |
+
speaker_output = speaker_segmentation(audio)
|
16 |
+
text_output = asr(audio,return_timestamps="word")
|
17 |
+
|
18 |
+
full_text = text_output['text'].lower()
|
19 |
+
chunks = text_output['chunks']
|
20 |
+
|
21 |
+
diarized_output = ""
|
22 |
+
i = 0
|
23 |
+
for turn, _, speaker in speaker_output.itertracks(yield_label=True):
|
24 |
+
diarized = ""
|
25 |
+
while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
|
26 |
+
diarized += chunks[i]['text'].lower() + ' '
|
27 |
+
i += 1
|
28 |
+
|
29 |
+
if diarized != "":
|
30 |
+
diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end)
|
31 |
+
|
32 |
+
return diarized_output, full_text
|
33 |
+
|
34 |
+
inputs = gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:")
|
35 |
+
outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
|
36 |
+
gr.outputs.Textbox(type="auto",label="Full Text")]
|
37 |
+
examples = [["TestAudio1.wav"],]
|
38 |
+
|
39 |
+
app = gr.Interface(fn=segmentation,
|
40 |
+
inputs=inputs,
|
41 |
+
outputs=outputs,
|
42 |
+
examples=examples,
|
43 |
+
allow_flagging=False)
|
44 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pyannote.audio
|
2 |
+
https://github.com/pyannote/pyannote-audio/archive/develop.zip
|
3 |
+
speechbrain
|
4 |
+
gradio
|
5 |
+
Jinja2
|
6 |
+
transformers
|