ardaatahan commited on
Commit
50ce297
β€’
1 Parent(s): b54f134

fix explanation texts

Browse files
Files changed (3) hide show
  1. constants.py +0 -3
  2. main.py +3 -3
  3. utils.py +1 -1
constants.py CHANGED
@@ -73,7 +73,6 @@ METHODOLOGY_TEXT = dedent(
73
  - **WER (Word Error Rate)** (⬇️): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
74
  - **QoI (Quality of Inference)** (⬆️): The ratio of examples where WhisperKit performs no worse than the reference model.
75
  - This metric does not capture improvements to the reference. It only measures potential regressions.
76
- - **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
77
  - **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
78
 
79
  ## Data
@@ -107,7 +106,6 @@ PERFORMANCE_TEXT = dedent(
107
  ## Metrics
108
  - **Speed factor** (⬆️): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
109
  - **Tok/s (Tokens per second)** (⬆️): Total number of text decoder forward passes divided by the end-to-end processing time.
110
- - **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
111
 
112
  ## Data
113
 
@@ -136,7 +134,6 @@ COL_NAMES = {
136
  "model": "Model",
137
  "device": "Device",
138
  "os": "OS",
139
- "parity": "Parity %",
140
  "english_wer": "English WER",
141
  "multilingual_wer": "Multilingual WER",
142
  }
 
73
  - **WER (Word Error Rate)** (⬇️): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
74
  - **QoI (Quality of Inference)** (⬆️): The ratio of examples where WhisperKit performs no worse than the reference model.
75
  - This metric does not capture improvements to the reference. It only measures potential regressions.
 
76
  - **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
77
 
78
  ## Data
 
106
  ## Metrics
107
  - **Speed factor** (⬆️): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
108
  - **Tok/s (Tokens per second)** (⬆️): Total number of text decoder forward passes divided by the end-to-end processing time.
 
109
 
110
  ## Data
111
 
 
134
  "model": "Model",
135
  "device": "Device",
136
  "os": "OS",
 
137
  "english_wer": "English WER",
138
  "multilingual_wer": "Multilingual WER",
139
  }
main.py CHANGED
@@ -522,7 +522,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
522
  )
523
  with gr.Column(scale=4, elem_classes="exclude_models_column"):
524
  exclude_performance_models = gr.Textbox(
525
- placeholder="πŸ” Exclude (separate multiple queries with ';')",
526
  label="Exclude Models",
527
  )
528
  with gr.Row():
@@ -757,7 +757,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
757
  )
758
  with gr.Column(scale=4, elem_classes="exclude_models_column"):
759
  exclude_quality_models = gr.Textbox(
760
- placeholder="πŸ” Exclude Model (separate multiple models with ';')",
761
  label="Exclude Models",
762
  )
763
  with gr.Row():
@@ -1184,7 +1184,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
1184
  )
1185
  with gr.Column(scale=4, elem_classes="exclude_models_column"):
1186
  exclude_support_models = gr.Textbox(
1187
- placeholder="πŸ” Exclude Model (separate multiple models with ';')",
1188
  label="Exclude Models",
1189
  )
1190
  with gr.Row():
 
522
  )
523
  with gr.Column(scale=4, elem_classes="exclude_models_column"):
524
  exclude_performance_models = gr.Textbox(
525
+ placeholder="πŸ” Exclude Model (separate multiple queries with ';')",
526
  label="Exclude Models",
527
  )
528
  with gr.Row():
 
757
  )
758
  with gr.Column(scale=4, elem_classes="exclude_models_column"):
759
  exclude_quality_models = gr.Textbox(
760
+ placeholder="πŸ” Exclude Model (separate multiple queries with ';')",
761
  label="Exclude Models",
762
  )
763
  with gr.Row():
 
1184
  )
1185
  with gr.Column(scale=4, elem_classes="exclude_models_column"):
1186
  exclude_support_models = gr.Textbox(
1187
+ placeholder="πŸ” Exclude Model (separate multiple queries with ';')",
1188
  label="Exclude Models",
1189
  )
1190
  with gr.Row():
utils.py CHANGED
@@ -545,7 +545,7 @@ def create_initial_performance_column_dict():
545
  :return: A list of column dictionaries
546
 
547
  This function defines the basic structure of the performance table,
548
- including columns for model, device, OS, parity, average WER, QoI, speed, and tokens per second.
549
  """
550
  return [
551
  [
 
545
  :return: A list of column dictionaries
546
 
547
  This function defines the basic structure of the performance table,
548
+ including columns for model, device, OS, average WER, QoI, speed, and tokens per second.
549
  """
550
  return [
551
  [