adrian-saez-martinez commited on
Commit
6046e53
β€’
1 Parent(s): c9c13a6

naming base model

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -7,7 +7,7 @@ import time
7
 
8
  # Load both models
9
  MODEL_NAME_TURBO = "openai/whisper-large-v3-turbo"
10
- MODEL_NAME_STANDARD = "openai/whisper-large-v3"
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
@@ -19,9 +19,9 @@ pipe_turbo = pipeline(
19
  device=device,
20
  )
21
 
22
- pipe_standard = pipeline(
23
  task="automatic-speech-recognition",
24
- model=MODEL_NAME_STANDARD,
25
  chunk_length_s=30,
26
  device=device,
27
  )
@@ -34,13 +34,13 @@ def transcribe_turbo(audio):
34
  elapsed_time = time.time() - start_time
35
  return text_turbo, elapsed_time
36
 
37
- # Function to transcribe audio using the standard model
38
  @spaces.GPU
39
- def transcribe_standard(audio):
40
  start_time = time.time()
41
- text_standard = pipe_standard(audio)["text"]
42
  elapsed_time = time.time() - start_time
43
- return text_standard, elapsed_time
44
 
45
  # Function to compare transcriptions and speed
46
  @spaces.GPU
@@ -51,14 +51,14 @@ def compare_transcriptions(audio):
51
  # Run both transcriptions in parallel
52
  with concurrent.futures.ThreadPoolExecutor() as executor:
53
  future_turbo = executor.submit(transcribe_turbo, audio)
54
- future_standard = executor.submit(transcribe_standard, audio)
55
 
56
  # Get the results
57
  text_turbo, time_turbo = future_turbo.result()
58
- text_standard, time_standard = future_standard.result()
59
 
60
  # Return both transcriptions and processing times
61
- return (text_standard, f"{time_standard:.2f} seconds"), (text_turbo, f"{time_turbo:.2f} seconds")
62
 
63
  css = """
64
  h1 {
@@ -70,8 +70,8 @@ h1 {
70
  # Gradio Interface
71
  with gr.Blocks(css=css) as demo:
72
  # Title and description
73
- gr.Markdown("# Whisper large-v3-turbo ...vs... Whisper large-v3")
74
- gr.Markdown("This app compares the transcription performance and processing time between openAI 'Whisper large-v3' and 'Whisper large-v3-turbo' models")
75
 
76
  with gr.Column():
77
  with gr.Row():
@@ -82,16 +82,16 @@ with gr.Blocks(css=css) as demo:
82
  with gr.Row():
83
  with gr.Row():
84
  with gr.Group():
85
- gr.Markdown("### πŸ“ **Standard model**")
86
- standard_output = gr.Textbox(label="Transcription")
87
- standard_time = gr.Textbox(label="Processing Time")
88
  with gr.Group():
89
  gr.Markdown("### ⚑ **Turbo model**")
90
  turbo_output = gr.Textbox(label="Transcription")
91
  turbo_time = gr.Textbox(label="Processing Time")
92
 
93
  # Set up the interaction
94
- transcribe_button.click(fn=compare_transcriptions, inputs=audio_input, outputs=[standard_output, standard_time, turbo_output, turbo_time])
95
 
96
  # Launch the demo
97
  demo.launch()
 
7
 
8
  # Load both models
9
  MODEL_NAME_TURBO = "openai/whisper-large-v3-turbo"
10
+ MODEL_NAME_base = "openai/whisper-large-v3"
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
 
19
  device=device,
20
  )
21
 
22
+ pipe_base = pipeline(
23
  task="automatic-speech-recognition",
24
+ model=MODEL_NAME_base,
25
  chunk_length_s=30,
26
  device=device,
27
  )
 
34
  elapsed_time = time.time() - start_time
35
  return text_turbo, elapsed_time
36
 
37
+ # Function to transcribe audio using the base model
38
  @spaces.GPU
39
+ def transcribe_base(audio):
40
  start_time = time.time()
41
+ text_base = pipe_base(audio)["text"]
42
  elapsed_time = time.time() - start_time
43
+ return text_base, elapsed_time
44
 
45
  # Function to compare transcriptions and speed
46
  @spaces.GPU
 
51
  # Run both transcriptions in parallel
52
  with concurrent.futures.ThreadPoolExecutor() as executor:
53
  future_turbo = executor.submit(transcribe_turbo, audio)
54
+ future_base = executor.submit(transcribe_base, audio)
55
 
56
  # Get the results
57
  text_turbo, time_turbo = future_turbo.result()
58
+ text_base, time_base = future_base.result()
59
 
60
  # Return both transcriptions and processing times
61
+ return (text_base, f"{time_base:.2f} seconds"), (text_turbo, f"{time_turbo:.2f} seconds")
62
 
63
  css = """
64
  h1 {
 
70
  # Gradio Interface
71
  with gr.Blocks(css=css) as demo:
72
  # Title and description
73
+ gr.Markdown("# Whisper large-v3-turbo vs Whisper large-v3")
74
+ gr.Markdown("This app compares the transcription performance and processing time between openAI Whisper large-v3-turbo and the its Base model Whisper large-v3")
75
 
76
  with gr.Column():
77
  with gr.Row():
 
82
  with gr.Row():
83
  with gr.Row():
84
  with gr.Group():
85
+ gr.Markdown("### πŸ“ **Base model**")
86
+ base_output = gr.Textbox(label="Transcription")
87
+ base_time = gr.Textbox(label="Processing Time")
88
  with gr.Group():
89
  gr.Markdown("### ⚑ **Turbo model**")
90
  turbo_output = gr.Textbox(label="Transcription")
91
  turbo_time = gr.Textbox(label="Processing Time")
92
 
93
  # Set up the interaction
94
+ transcribe_button.click(fn=compare_transcriptions, inputs=audio_input, outputs=[base_output, base_time, turbo_output, turbo_time])
95
 
96
  # Launch the demo
97
  demo.launch()