Spaces:
Running
Running
ardaatahan
commited on
Commit
β’
50ce297
1
Parent(s):
b54f134
fix explanation texts
Browse files- constants.py +0 -3
- main.py +3 -3
- utils.py +1 -1
constants.py
CHANGED
@@ -73,7 +73,6 @@ METHODOLOGY_TEXT = dedent(
|
|
73 |
- **WER (Word Error Rate)** (β¬οΈ): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
|
74 |
- **QoI (Quality of Inference)** (β¬οΈ): The ratio of examples where WhisperKit performs no worse than the reference model.
|
75 |
- This metric does not capture improvements to the reference. It only measures potential regressions.
|
76 |
-
- **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
|
77 |
- **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
|
78 |
|
79 |
## Data
|
@@ -107,7 +106,6 @@ PERFORMANCE_TEXT = dedent(
|
|
107 |
## Metrics
|
108 |
- **Speed factor** (β¬οΈ): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
|
109 |
- **Tok/s (Tokens per second)** (β¬οΈ): Total number of text decoder forward passes divided by the end-to-end processing time.
|
110 |
-
- **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
|
111 |
|
112 |
## Data
|
113 |
|
@@ -136,7 +134,6 @@ COL_NAMES = {
|
|
136 |
"model": "Model",
|
137 |
"device": "Device",
|
138 |
"os": "OS",
|
139 |
-
"parity": "Parity %",
|
140 |
"english_wer": "English WER",
|
141 |
"multilingual_wer": "Multilingual WER",
|
142 |
}
|
|
|
73 |
- **WER (Word Error Rate)** (β¬οΈ): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
|
74 |
- **QoI (Quality of Inference)** (β¬οΈ): The ratio of examples where WhisperKit performs no worse than the reference model.
|
75 |
- This metric does not capture improvements to the reference. It only measures potential regressions.
|
|
|
76 |
- **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
|
77 |
|
78 |
## Data
|
|
|
106 |
## Metrics
|
107 |
- **Speed factor** (β¬οΈ): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
|
108 |
- **Tok/s (Tokens per second)** (β¬οΈ): Total number of text decoder forward passes divided by the end-to-end processing time.
|
|
|
109 |
|
110 |
## Data
|
111 |
|
|
|
134 |
"model": "Model",
|
135 |
"device": "Device",
|
136 |
"os": "OS",
|
|
|
137 |
"english_wer": "English WER",
|
138 |
"multilingual_wer": "Multilingual WER",
|
139 |
}
|
main.py
CHANGED
@@ -522,7 +522,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
|
|
522 |
)
|
523 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
524 |
exclude_performance_models = gr.Textbox(
|
525 |
-
placeholder="π Exclude (separate multiple queries with ';')",
|
526 |
label="Exclude Models",
|
527 |
)
|
528 |
with gr.Row():
|
@@ -757,7 +757,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
|
|
757 |
)
|
758 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
759 |
exclude_quality_models = gr.Textbox(
|
760 |
-
placeholder="π Exclude Model (separate multiple
|
761 |
label="Exclude Models",
|
762 |
)
|
763 |
with gr.Row():
|
@@ -1184,7 +1184,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
|
|
1184 |
)
|
1185 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
1186 |
exclude_support_models = gr.Textbox(
|
1187 |
-
placeholder="π Exclude Model (separate multiple
|
1188 |
label="Exclude Models",
|
1189 |
)
|
1190 |
with gr.Row():
|
|
|
522 |
)
|
523 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
524 |
exclude_performance_models = gr.Textbox(
|
525 |
+
placeholder="π Exclude Model (separate multiple queries with ';')",
|
526 |
label="Exclude Models",
|
527 |
)
|
528 |
with gr.Row():
|
|
|
757 |
)
|
758 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
759 |
exclude_quality_models = gr.Textbox(
|
760 |
+
placeholder="π Exclude Model (separate multiple queries with ';')",
|
761 |
label="Exclude Models",
|
762 |
)
|
763 |
with gr.Row():
|
|
|
1184 |
)
|
1185 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
1186 |
exclude_support_models = gr.Textbox(
|
1187 |
+
placeholder="π Exclude Model (separate multiple queries with ';')",
|
1188 |
label="Exclude Models",
|
1189 |
)
|
1190 |
with gr.Row():
|
utils.py
CHANGED
@@ -545,7 +545,7 @@ def create_initial_performance_column_dict():
|
|
545 |
:return: A list of column dictionaries
|
546 |
|
547 |
This function defines the basic structure of the performance table,
|
548 |
-
including columns for model, device, OS,
|
549 |
"""
|
550 |
return [
|
551 |
[
|
|
|
545 |
:return: A list of column dictionaries
|
546 |
|
547 |
This function defines the basic structure of the performance table,
|
548 |
+
including columns for model, device, OS, average WER, QoI, speed, and tokens per second.
|
549 |
"""
|
550 |
return [
|
551 |
[
|