Spaces:
Running
Running
mj-new
commited on
Commit
•
bba6ca7
1
Parent(s):
c1b8f16
Improved box plot - colors and hatching
Browse files- app.py +20 -8
- constants.py +9 -1
- utils.py +118 -1
app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
from constants import BIGOS_INFO, PELCRA_INFO, ANALYSIS_INFO, ABOUT_INFO, INSPECTION_INFO, COMPARISON_INFO
|
5 |
-
from utils import read_latest_results, basic_stats_per_dimension, retrieve_asr_systems_meta_from_the_catalog, box_plot_per_dimension, get_total_audio_duration, check_impact_of_normalization, calculate_wer_per_meta_category, calculate_wer_per_audio_feature
|
6 |
from app_utils import calculate_height_to_display, filter_dataframe
|
7 |
import matplotlib.pyplot as plt
|
8 |
import numpy as np
|
@@ -253,7 +253,18 @@ with lead_bigos:
|
|
253 |
no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
|
254 |
|
255 |
df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
########### EVALUATION PARAMETERS PRESENTATION ################
|
258 |
st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
|
259 |
st.markdown(BIGOS_INFO, unsafe_allow_html=True)
|
@@ -290,9 +301,6 @@ with lead_bigos:
|
|
290 |
h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
|
291 |
st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
|
292 |
|
293 |
-
st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
|
294 |
-
fig = box_plot_per_dimension(df_per_dataset, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]")
|
295 |
-
st.pyplot(fig, clear_figure=True, use_container_width=True)
|
296 |
|
297 |
##################### PER SUBSET ANALYSIS #########################
|
298 |
analysis_dim = "subset"
|
@@ -386,6 +394,13 @@ with lead_pelcra:
|
|
386 |
no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
|
387 |
|
388 |
df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
########### EVALUATION PARAMETERS PRESENTATION ################
|
391 |
st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
|
@@ -422,9 +437,6 @@ with lead_pelcra:
|
|
422 |
h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
|
423 |
st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
|
424 |
|
425 |
-
st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
|
426 |
-
fig = box_plot_per_dimension(df_per_dataset, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]")
|
427 |
-
st.pyplot(fig, clear_figure=True, use_container_width=True)
|
428 |
|
429 |
##################### PER SUBSET ANALYSIS #########################
|
430 |
analysis_dim = "subset"
|
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
from constants import BIGOS_INFO, PELCRA_INFO, ANALYSIS_INFO, ABOUT_INFO, INSPECTION_INFO, COMPARISON_INFO
|
5 |
+
from utils import read_latest_results, basic_stats_per_dimension, retrieve_asr_systems_meta_from_the_catalog, box_plot_per_dimension, box_plot_per_dimension_with_colors, get_total_audio_duration, check_impact_of_normalization, calculate_wer_per_meta_category, calculate_wer_per_audio_feature
|
6 |
from app_utils import calculate_height_to_display, filter_dataframe
|
7 |
import matplotlib.pyplot as plt
|
8 |
import numpy as np
|
|
|
253 |
no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
|
254 |
|
255 |
df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
|
256 |
+
print(df_per_dataset_with_asr_systems_meta.sample(5))
|
257 |
+
# save sample to tsv
|
258 |
+
df_per_dataset_with_asr_systems_meta.sample(5).to_csv("sample.tsv", sep="\t", index=False)
|
259 |
+
|
260 |
+
# MOST IMPORTANT RESULTS
|
261 |
+
analysis_dim = "system"
|
262 |
+
metric = "WER"
|
263 |
+
st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
|
264 |
+
fig = box_plot_per_dimension_with_colors(df_per_dataset_with_asr_systems_meta, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]","System", "Type")
|
265 |
+
st.pyplot(fig, clear_figure=True, use_container_width=True)
|
266 |
+
|
267 |
+
|
268 |
########### EVALUATION PARAMETERS PRESENTATION ################
|
269 |
st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
|
270 |
st.markdown(BIGOS_INFO, unsafe_allow_html=True)
|
|
|
301 |
h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
|
302 |
st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
|
303 |
|
|
|
|
|
|
|
304 |
|
305 |
##################### PER SUBSET ANALYSIS #########################
|
306 |
analysis_dim = "subset"
|
|
|
394 |
no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
|
395 |
|
396 |
df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
|
397 |
+
|
398 |
+
# MOST IMPORTANT RESULTS
|
399 |
+
analysis_dim = "system"
|
400 |
+
metric = "WER"
|
401 |
+
st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
|
402 |
+
fig = box_plot_per_dimension_with_colors(df_per_dataset_with_asr_systems_meta, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]","System", "Type")
|
403 |
+
st.pyplot(fig, clear_figure=True, use_container_width=True)
|
404 |
|
405 |
########### EVALUATION PARAMETERS PRESENTATION ################
|
406 |
st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
|
|
|
437 |
h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
|
438 |
st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
|
439 |
|
|
|
|
|
|
|
440 |
|
441 |
##################### PER SUBSET ANALYSIS #########################
|
442 |
analysis_dim = "subset"
|
constants.py
CHANGED
@@ -1,6 +1,14 @@
|
|
1 |
ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
|
2 |
The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
|
3 |
-
To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
|
6 |
Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"
|
|
|
1 |
ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
|
2 |
The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
|
3 |
+
To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard).<br> \
|
4 |
+
If you use this work, please use the citation below: <br> \
|
5 |
+
```@misc{amu_cai_pl_asr_leaderboard, \
|
6 |
+
author = {Michał Junczyk}, \
|
7 |
+
title = {{AMU Polish ASR Leaderboard}}, \
|
8 |
+
year = {2024}, \
|
9 |
+
howpublished = {url{https://huggingface.co/spaces/amu-cai/pl-asr-leaderboard}}, \
|
10 |
+
publisher = {Hugging Face} \
|
11 |
+
}```"
|
12 |
|
13 |
BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
|
14 |
Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"
|
utils.py
CHANGED
@@ -7,6 +7,23 @@ import requests
|
|
7 |
import numpy as np
|
8 |
from datasets import Dataset
|
9 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def download_tsv_from_google_sheet(sheet_url):
|
12 |
# Modify the Google Sheet URL to export it as TSV
|
@@ -164,7 +181,7 @@ def filter_bottom_outliers(df_input, metric, min_threshold):
|
|
164 |
|
165 |
def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
|
166 |
# Box plot for WER per dataset
|
167 |
-
plt.
|
168 |
|
169 |
# generate box plot without outliers
|
170 |
sns.boxplot(x=dimension, y=metric, data=df_input, order=df_input.groupby(dimension)[metric].median().sort_values().index, showfliers=False)
|
@@ -176,7 +193,107 @@ def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
|
|
176 |
#return figure
|
177 |
return plt
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
def check_impact_of_normalization(data_in, ref_type='orig'):
|
181 |
|
182 |
# Filter the data to include only the specific reference type
|
|
|
7 |
import numpy as np
|
8 |
from datasets import Dataset
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
+
import matplotlib.patches as mpatches
|
11 |
+
import matplotlib as mpl
|
12 |
+
|
13 |
+
|
14 |
+
asr_systems_colors_mapping = {
|
15 |
+
'azure': '#1f77b4', # Blue
|
16 |
+
'google': '#2ca02c', # Green
|
17 |
+
'wav2vec2': '#d62728', # Red
|
18 |
+
'nemo': '#9467bd', # Purple
|
19 |
+
'assemblyai': '#8c564b', # Brown
|
20 |
+
'mms': '#e377c2', # Pink
|
21 |
+
'google_v2': '#7f7f7f', # Gray
|
22 |
+
'whisper_cloud': '#bcbd22', # Olive
|
23 |
+
'whisper_local': '#ff7f0e', # Orange
|
24 |
+
|
25 |
+
# Add or override other systems and their colors
|
26 |
+
}
|
27 |
|
28 |
def download_tsv_from_google_sheet(sheet_url):
|
29 |
# Modify the Google Sheet URL to export it as TSV
|
|
|
181 |
|
182 |
def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
|
183 |
# Box plot for WER per dataset
|
184 |
+
fig, ax = plt.subplots(figsize=(20, 10))
|
185 |
|
186 |
# generate box plot without outliers
|
187 |
sns.boxplot(x=dimension, y=metric, data=df_input, order=df_input.groupby(dimension)[metric].median().sort_values().index, showfliers=False)
|
|
|
193 |
#return figure
|
194 |
return plt
|
195 |
|
196 |
+
def box_plot_per_dimension_with_colors(df_input, metric, dimension, title, xlabel, ylabel, system_col, type_col):
|
197 |
+
# Create a figure and axis object
|
198 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
199 |
+
|
200 |
+
# Define the order of categories based on the median of the metric
|
201 |
+
order = df_input.groupby(dimension)[metric].median().sort_values().index.tolist()
|
202 |
+
|
203 |
+
# Create custom color mapping for systems
|
204 |
+
unique_systems = df_input[system_col].unique()
|
205 |
+
# Define your custom colors here
|
206 |
+
system_color_mapping = asr_systems_colors_mapping
|
207 |
+
# For systems not specified, assign colors from a palette
|
208 |
+
remaining_systems = [s for s in unique_systems if s not in system_color_mapping]
|
209 |
+
palette = sns.color_palette("tab10", len(remaining_systems))
|
210 |
+
system_color_mapping.update(dict(zip(remaining_systems, palette)))
|
211 |
+
|
212 |
+
# Create hatching patterns for types
|
213 |
+
unique_types = df_input[type_col].unique()
|
214 |
+
type_hatch_mapping = {
|
215 |
+
'free': '', # No hatching
|
216 |
+
'commercial': '///', # Diagonal hatching
|
217 |
+
# Add more patterns if needed
|
218 |
+
}
|
219 |
+
# For types not specified, assign default hatches
|
220 |
+
default_hatches = ['', '///', '\\\\', 'xx', '++', '--', '...']
|
221 |
+
for idx, t in enumerate(unique_types):
|
222 |
+
if t not in type_hatch_mapping:
|
223 |
+
type_hatch_mapping[t] = default_hatches[idx % len(default_hatches)]
|
224 |
+
|
225 |
+
# Map colors and hatches to each dimension based on system and type
|
226 |
+
dimension_system_mapping = df_input.drop_duplicates(subset=dimension).set_index(dimension)[system_col].reindex(order)
|
227 |
+
colors = dimension_system_mapping.map(system_color_mapping).tolist()
|
228 |
+
|
229 |
+
dimension_type_mapping = df_input.drop_duplicates(subset=dimension).set_index(dimension)[type_col].reindex(order)
|
230 |
+
hatches = dimension_type_mapping.map(type_hatch_mapping).tolist()
|
231 |
+
|
232 |
+
# Generate box plot without specifying hue
|
233 |
+
sns.boxplot(
|
234 |
+
x=dimension,
|
235 |
+
y=metric,
|
236 |
+
data=df_input,
|
237 |
+
order=order,
|
238 |
+
ax=ax,
|
239 |
+
showfliers=False,
|
240 |
+
linewidth=1.5,
|
241 |
+
boxprops=dict(facecolor='white') # Set initial facecolor to white
|
242 |
+
)
|
243 |
|
244 |
+
# Access the box artists
|
245 |
+
box_patches = [patch for patch in ax.artists if isinstance(patch, mpatches.PathPatch)]
|
246 |
+
# Alternatively, you can use ax.patches if ax.artists doesn't work
|
247 |
+
if not box_patches:
|
248 |
+
box_patches = [patch for patch in ax.patches if isinstance(patch, mpatches.PathPatch)]
|
249 |
+
|
250 |
+
# Color the boxes and apply hatching patterns
|
251 |
+
for patch, color, hatch in zip(box_patches, colors, hatches):
|
252 |
+
patch.set_facecolor(color)
|
253 |
+
patch.set_edgecolor('black')
|
254 |
+
patch.set_linewidth(1.5)
|
255 |
+
patch.set_hatch(hatch)
|
256 |
+
|
257 |
+
# Create custom legend for systems (colors)
|
258 |
+
system_handles = []
|
259 |
+
for system in unique_systems:
|
260 |
+
color = system_color_mapping[system]
|
261 |
+
handle = mpatches.Patch(facecolor=color, edgecolor='black', label=system)
|
262 |
+
system_handles.append(handle)
|
263 |
+
|
264 |
+
# Create custom legend for types (hatching patterns)
|
265 |
+
type_handles = []
|
266 |
+
for typ in unique_types:
|
267 |
+
hatch = type_hatch_mapping[typ]
|
268 |
+
handle = mpatches.Patch(facecolor='white', edgecolor='black', hatch=hatch, label=typ)
|
269 |
+
type_handles.append(handle)
|
270 |
+
|
271 |
+
# Add legends to the plot
|
272 |
+
legend1 = ax.legend(handles=system_handles, title='System', bbox_to_anchor=(0.01, 1), loc='upper left')
|
273 |
+
legend2 = ax.legend(handles=type_handles, title='Type', bbox_to_anchor=(0.01, 0.6), loc='upper left')
|
274 |
+
ax.add_artist(legend1) # Add the first legend back to the plot
|
275 |
+
|
276 |
+
ax.set_title(title)
|
277 |
+
ax.set_xlabel(xlabel)
|
278 |
+
ax.set_ylabel(ylabel)
|
279 |
+
# improve readibility of the x-axis labels
|
280 |
+
# decrease the font size of x-axis labels
|
281 |
+
ax.tick_params(axis='x', labelsize=8)
|
282 |
+
# shift left to align the x-axis labels with the boxes
|
283 |
+
ax.set_xticklabels(ax.get_xticklabels(), ha='right')
|
284 |
+
|
285 |
+
# rotate them by 90 degrees
|
286 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=55)
|
287 |
+
|
288 |
+
# add more granularity to the y-axis. Make sure the y-axis contains 20 ticks
|
289 |
+
ax.yaxis.set_major_locator(plt.MaxNLocator(20))
|
290 |
+
|
291 |
+
plt.tight_layout()
|
292 |
+
|
293 |
+
# Return the figure object
|
294 |
+
return fig
|
295 |
+
|
296 |
+
|
297 |
def check_impact_of_normalization(data_in, ref_type='orig'):
|
298 |
|
299 |
# Filter the data to include only the specific reference type
|