Spaces:

datasciencedojo
/

Speaker-Diarization

Runtime error

datasciencedojo commited on Oct 8, 2022

Commit

a6370cb

1 Parent(s): c5239ac

Upload 3 files

Files changed (4) hide show

.gitattributes CHANGED Viewed

@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+TestAudio1.wav filter=lfs diff=lfs merge=lfs -text

TestAudio1.wav ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:63619394ce122f680985b597df2eb2721309811662f0e53a71c688ad3d073426
+size 4611246

app.py ADDED Viewed

+import gradio as gr
+from pyannote.audio import Pipeline
+from transformers import pipeline
+asr = pipeline(
+    "automatic-speech-recognition",
+    model="facebook/wav2vec2-large-960h-lv60-self",
+    feature_extractor="facebook/wav2vec2-large-960h-lv60-self",
+)
+speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")
+def segmentation(audio):
+    speaker_output = speaker_segmentation(audio)
+    text_output = asr(audio,return_timestamps="word")
+    full_text = text_output['text'].lower()
+    chunks = text_output['chunks']
+    diarized_output = ""
+    i = 0
+    for turn, _, speaker in speaker_output.itertracks(yield_label=True):
+        diarized = ""
+        while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
+            diarized += chunks[i]['text'].lower() + ' '
+            i += 1
+        if diarized != "":
+            diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end)
+    return diarized_output, full_text
+inputs = gr.inputs.Audio(source="upload", type="filepath", label="Upload your audio file here:")
+outputs = [gr.outputs.Textbox(type="auto", label="Diarized Output"),
+            gr.outputs.Textbox(type="auto",label="Full Text")]
+examples = [["TestAudio1.wav"],]
+app = gr.Interface(fn=segmentation,
+                inputs=inputs,
+                outputs=outputs,
+                examples=examples,
+                allow_flagging=False)
+app.launch()

requirements.txt ADDED Viewed

+pyannote.audio
+https://github.com/pyannote/pyannote-audio/archive/develop.zip
+speechbrain
+gradio
+Jinja2
+transformers