Spaces:

distil-whisper
/

whisper-vs-distil-whisper

Runtime error

App Files Files Community

sanchit-gandhi HF staff commited on Nov 2, 2023

Commit

7091430

•

1 Parent(s): bd5a509

create app.py

Browse files

Files changed (2) hide show

app.py +150 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from transformers.utils import is_flash_attn_2_available
+import torch
+import gradio as gr
+import matplotlib.pyplot as plt
+import time
+import os
+BATCH_SIZE = 16
+TOKEN = os.environ.get("HF_TOKEN", None)
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+use_flash_attention_2 = is_flash_attn_2_available()
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    "openai/whisper-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2
+)
+distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    "sanchit-gandhi/distil-large-v2-private", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2, token=TOKEN
+)
+if not use_flash_attention_2:
+    model = model.bettertransformer()
+    distilled_model = distilled_model.bettertransformer()
+processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
+model.to(device)
+distilled_model.to(device)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=30,
+    torch_dtype=torch_dtype,
+    device=device,
+    language="en",
+    task="transcribe",
+)
+pipe_forward = pipe._forward
+distil_pipe = pipeline(
+    "automatic-speech-recognition",
+    model=distilled_model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=15,
+    torch_dtype=torch_dtype,
+    device=device,
+    language="en",
+    task="transcribe",
+)
+distil_pipe_forward = distil_pipe._forward
+def transcribe(inputs):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please record or upload an audio file before submitting your request.")
+    def _forward_distil_time(*args, **kwargs):
+        global distil_runtime
+        start_time = time.time()
+        result = distil_pipe_forward(*args, **kwargs)
+        distil_runtime = time.time() - start_time
+        return result
+    distil_pipe._forward = _forward_distil_time
+    distil_text = distil_pipe(inputs, batch_size=BATCH_SIZE)["text"]
+    yield distil_text, distil_runtime, None, None, None
+    def _forward_time(*args, **kwargs):
+        global runtime
+        start_time = time.time()
+        result = pipe_forward(*args, **kwargs)
+        runtime = time.time() - start_time
+        return result
+    pipe._forward = _forward_time
+    text = pipe(inputs, batch_size=BATCH_SIZE)["text"]
+    relative_latency = runtime / distil_runtime
+    # Create figure and axis
+    fig, ax = plt.subplots(figsize=(5, 5))
+    # Define bar width and positions
+    bar_width = 0.1
+    positions = [0, 0.1]  # Adjusted positions to bring bars closer
+    # Plot data
+    ax.bar(positions[0], distil_runtime, bar_width, edgecolor='black')
+    ax.bar(positions[1], runtime, bar_width, edgecolor='black')
+    # Set title, labels, and xticks
+    ax.set_ylabel('Transcription time (s)')
+    ax.set_xticks(positions)
+    ax.set_xticklabels(['Distil-Whisper', 'Whisper'])
+    # Gridlines and other styling
+    ax.grid(which='major', axis='y', linestyle='--', linewidth=0.5)
+    # Use tight layout to avoid overlaps
+    plt.tight_layout()
+    yield distil_text, distil_runtime, text, runtime, plt
+if __name__ == "__main__":
+    with gr.Blocks() as demo:
+        gr.HTML(
+            """
+                <div style="text-align: center; max-width: 700px; margin: 0 auto;">
+                  <div
+                    style="
+                      display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
+                    "
+                  >
+                    <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
+                      Distil-Whisper VS Whisper
+                    </h1>
+                  </div>
+                </div>
+            """
+        )
+        gr.HTML(
+            f"""
+            This demo evaluates the <a href="https://huggingface.co/distil-whisper/distil-large-v2"> Distil-Whisper </a> model
+            against the <a href="https://huggingface.co/openai/whisper-large-v2"> Whisper </a> model.
+            """
+        )
+        audio = gr.components.Audio(source="upload", type="filepath", label="Audio file")
+        button = gr.Button("Transcribe")
+        plot = gr.components.Plot()
+        with gr.Row():
+            distil_runtime = gr.components.Textbox(label="Distil-Whisper Transcription Time (s)")
+            runtime = gr.components.Textbox(label="Whisper Transcription Time (s)")
+        with gr.Row():
+            distil_transcription = gr.components.Textbox(label="Distil-Whisper Transcription").style(show_copy_button=True)
+            transcription = gr.components.Textbox(label="Whisper Transcription").style(show_copy_button=True)
+        button.click(
+            fn=transcribe,
+            inputs=audio,
+            outputs=[distil_transcription, distil_runtime, transcription, runtime, plot],
+        )
+    demo.queue().launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+--extra-index-url https://download.pytorch.org/whl/cu113
+torch
+pip install git+https://github.com/huggingface/transformers
+accelerate
+optimum