Spaces:

adriszmar
/

whisper-large-v3-turbo-vs-base-model

Running on Zero

App Files Files Community

adrian-saez-martinez commited on 24 days ago

Commit

6046e53

•

1 Parent(s): c9c13a6

naming base model

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import time
 # Load both models
 MODEL_NAME_TURBO = "openai/whisper-large-v3-turbo"
-MODEL_NAME_STANDARD = "openai/whisper-large-v3"
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -19,9 +19,9 @@ pipe_turbo = pipeline(
     device=device,
 )
-pipe_standard = pipeline(
     task="automatic-speech-recognition",
-    model=MODEL_NAME_STANDARD,
     chunk_length_s=30,
     device=device,
 )
@@ -34,13 +34,13 @@ def transcribe_turbo(audio):
     elapsed_time = time.time() - start_time
     return text_turbo, elapsed_time
-# Function to transcribe audio using the standard model
 @spaces.GPU
-def transcribe_standard(audio):
     start_time = time.time()
-    text_standard = pipe_standard(audio)["text"]
     elapsed_time = time.time() - start_time
-    return text_standard, elapsed_time
 # Function to compare transcriptions and speed
 @spaces.GPU
@@ -51,14 +51,14 @@ def compare_transcriptions(audio):
     # Run both transcriptions in parallel
     with concurrent.futures.ThreadPoolExecutor() as executor:
         future_turbo = executor.submit(transcribe_turbo, audio)
-        future_standard = executor.submit(transcribe_standard, audio)
         # Get the results
         text_turbo, time_turbo = future_turbo.result()
-        text_standard, time_standard = future_standard.result()
     # Return both transcriptions and processing times
-    return (text_standard, f"{time_standard:.2f} seconds"), (text_turbo, f"{time_turbo:.2f} seconds")
 css = """
 h1 {
@@ -70,8 +70,8 @@ h1 {
 # Gradio Interface
 with gr.Blocks(css=css) as demo:
     # Title and description
-    gr.Markdown("# Whisper large-v3-turbo ...vs... Whisper large-v3")
-    gr.Markdown("This app compares the transcription performance and processing time between openAI 'Whisper large-v3' and 'Whisper large-v3-turbo' models")
     with gr.Column():
         with gr.Row():
@@ -82,16 +82,16 @@ with gr.Blocks(css=css) as demo:
         with gr.Row():
             with gr.Row():
                with gr.Group():
-                  gr.Markdown("### 📝 **Standard model**")
-                  standard_output = gr.Textbox(label="Transcription")
-                  standard_time = gr.Textbox(label="Processing Time")
                with gr.Group():
                  gr.Markdown("### ⚡ **Turbo model**")
                  turbo_output = gr.Textbox(label="Transcription")
                  turbo_time = gr.Textbox(label="Processing Time")
     # Set up the interaction
-    transcribe_button.click(fn=compare_transcriptions, inputs=audio_input, outputs=[standard_output, standard_time, turbo_output, turbo_time])
 # Launch the demo
 demo.launch()

 # Load both models
 MODEL_NAME_TURBO = "openai/whisper-large-v3-turbo"
+MODEL_NAME_base = "openai/whisper-large-v3"
 device = 0 if torch.cuda.is_available() else "cpu"
     device=device,
 )
+pipe_base = pipeline(
     task="automatic-speech-recognition",
+    model=MODEL_NAME_base,
     chunk_length_s=30,
     device=device,
 )
     elapsed_time = time.time() - start_time
     return text_turbo, elapsed_time
+# Function to transcribe audio using the base model
 @spaces.GPU
+def transcribe_base(audio):
     start_time = time.time()
+    text_base = pipe_base(audio)["text"]
     elapsed_time = time.time() - start_time
+    return text_base, elapsed_time
 # Function to compare transcriptions and speed
 @spaces.GPU
     # Run both transcriptions in parallel
     with concurrent.futures.ThreadPoolExecutor() as executor:
         future_turbo = executor.submit(transcribe_turbo, audio)
+        future_base = executor.submit(transcribe_base, audio)
         # Get the results
         text_turbo, time_turbo = future_turbo.result()
+        text_base, time_base = future_base.result()
     # Return both transcriptions and processing times
+    return (text_base, f"{time_base:.2f} seconds"), (text_turbo, f"{time_turbo:.2f} seconds")
 css = """
 h1 {
 # Gradio Interface
 with gr.Blocks(css=css) as demo:
     # Title and description
+    gr.Markdown("# Whisper large-v3-turbo vs Whisper large-v3")
+    gr.Markdown("This app compares the transcription performance and processing time between openAI Whisper large-v3-turbo and the its Base model Whisper large-v3")
     with gr.Column():
         with gr.Row():
         with gr.Row():
             with gr.Row():
                with gr.Group():
+                  gr.Markdown("### 📝 **Base model**")
+                  base_output = gr.Textbox(label="Transcription")
+                  base_time = gr.Textbox(label="Processing Time")
                with gr.Group():
                  gr.Markdown("### ⚡ **Turbo model**")
                  turbo_output = gr.Textbox(label="Transcription")
                  turbo_time = gr.Textbox(label="Processing Time")
     # Set up the interaction
+    transcribe_button.click(fn=compare_transcriptions, inputs=audio_input, outputs=[base_output, base_time, turbo_output, turbo_time])
 # Launch the demo
 demo.launch()