whisper-speaker-diarization

Runtime error

sanchit-gandhi HF staff commited on Jan 25, 2023

Commit

4b01587

•

1 Parent(s): 7860c23

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,17 +2,15 @@ import torch
 import gradio as gr
 import pytube as pt
-from transformers import pipeline
 from huggingface_hub import model_info
-MODEL_NAME = "openai/whisper-small" #this always needs to stay in line 8 :D sorry for the hackiness
-lang = "en"
 device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_NAME,
-    chunk_length_s=30,
     device=device,
 )
@@ -31,9 +29,9 @@ def transcribe(microphone, file_upload):
     file = microphone if microphone is not None else file_upload
-    text = pipe(file)["text"]
-    return warn_output + text
 def _return_yt_html_embed(yt_url):
@@ -51,9 +49,9 @@ def yt_transcribe(yt_url):
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename="audio.mp3")
-    text = pipe("audio.mp3")["text"]
-    return html_embed_str, text
 demo = gr.Blocks()

 import gradio as gr
 import pytube as pt
+from asr_diarizer import ASRDiarizationPipeline
 from huggingface_hub import model_info
+MODEL_NAME = "openai/whisper-small"
 device = 0 if torch.cuda.is_available() else "cpu"
+pipe = ASRDiarizationPipeline(
+    asr_model=MODEL_NAME,
     device=device,
 )
     file = microphone if microphone is not None else file_upload
+    text = pipe(file)
+    return warn_output + "\n\n".join([chunk["speaker"] + chunk["text"] for chunk in text])
 def _return_yt_html_embed(yt_url):
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename="audio.mp3")
+    text = pipe("audio.mp3")
+    return html_embed_str, "\n\n".join([chunk["speaker"] + chunk["text"] for chunk in text])
 demo = gr.Blocks()