mj-new commited on
Commit
bba6ca7
1 Parent(s): c1b8f16

Improved box plot - colors and hatching

Browse files
Files changed (3) hide show
  1. app.py +20 -8
  2. constants.py +9 -1
  3. utils.py +118 -1
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import streamlit as st
3
  import pandas as pd
4
  from constants import BIGOS_INFO, PELCRA_INFO, ANALYSIS_INFO, ABOUT_INFO, INSPECTION_INFO, COMPARISON_INFO
5
- from utils import read_latest_results, basic_stats_per_dimension, retrieve_asr_systems_meta_from_the_catalog, box_plot_per_dimension, get_total_audio_duration, check_impact_of_normalization, calculate_wer_per_meta_category, calculate_wer_per_audio_feature
6
  from app_utils import calculate_height_to_display, filter_dataframe
7
  import matplotlib.pyplot as plt
8
  import numpy as np
@@ -253,7 +253,18 @@ with lead_bigos:
253
  no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
254
 
255
  df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
256
-
 
 
 
 
 
 
 
 
 
 
 
257
  ########### EVALUATION PARAMETERS PRESENTATION ################
258
  st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
259
  st.markdown(BIGOS_INFO, unsafe_allow_html=True)
@@ -290,9 +301,6 @@ with lead_bigos:
290
  h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
291
  st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
292
 
293
- st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
294
- fig = box_plot_per_dimension(df_per_dataset, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]")
295
- st.pyplot(fig, clear_figure=True, use_container_width=True)
296
 
297
  ##################### PER SUBSET ANALYSIS #########################
298
  analysis_dim = "subset"
@@ -386,6 +394,13 @@ with lead_pelcra:
386
  no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
387
 
388
  df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
 
 
 
 
 
 
 
389
 
390
  ########### EVALUATION PARAMETERS PRESENTATION ################
391
  st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
@@ -422,9 +437,6 @@ with lead_pelcra:
422
  h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
423
  st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
424
 
425
- st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
426
- fig = box_plot_per_dimension(df_per_dataset, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]")
427
- st.pyplot(fig, clear_figure=True, use_container_width=True)
428
 
429
  ##################### PER SUBSET ANALYSIS #########################
430
  analysis_dim = "subset"
 
2
  import streamlit as st
3
  import pandas as pd
4
  from constants import BIGOS_INFO, PELCRA_INFO, ANALYSIS_INFO, ABOUT_INFO, INSPECTION_INFO, COMPARISON_INFO
5
+ from utils import read_latest_results, basic_stats_per_dimension, retrieve_asr_systems_meta_from_the_catalog, box_plot_per_dimension, box_plot_per_dimension_with_colors, get_total_audio_duration, check_impact_of_normalization, calculate_wer_per_meta_category, calculate_wer_per_audio_feature
6
  from app_utils import calculate_height_to_display, filter_dataframe
7
  import matplotlib.pyplot as plt
8
  import numpy as np
 
253
  no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
254
 
255
  df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
256
+ print(df_per_dataset_with_asr_systems_meta.sample(5))
257
+ # save sample to tsv
258
+ df_per_dataset_with_asr_systems_meta.sample(5).to_csv("sample.tsv", sep="\t", index=False)
259
+
260
+ # MOST IMPORTANT RESULTS
261
+ analysis_dim = "system"
262
+ metric = "WER"
263
+ st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
264
+ fig = box_plot_per_dimension_with_colors(df_per_dataset_with_asr_systems_meta, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]","System", "Type")
265
+ st.pyplot(fig, clear_figure=True, use_container_width=True)
266
+
267
+
268
  ########### EVALUATION PARAMETERS PRESENTATION ################
269
  st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
270
  st.markdown(BIGOS_INFO, unsafe_allow_html=True)
 
301
  h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
302
  st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
303
 
 
 
 
304
 
305
  ##################### PER SUBSET ANALYSIS #########################
306
  analysis_dim = "subset"
 
394
  no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
395
 
396
  df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
397
+
398
+ # MOST IMPORTANT RESULTS
399
+ analysis_dim = "system"
400
+ metric = "WER"
401
+ st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
402
+ fig = box_plot_per_dimension_with_colors(df_per_dataset_with_asr_systems_meta, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]","System", "Type")
403
+ st.pyplot(fig, clear_figure=True, use_container_width=True)
404
 
405
  ########### EVALUATION PARAMETERS PRESENTATION ################
406
  st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
 
437
  h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
438
  st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
439
 
 
 
 
440
 
441
  ##################### PER SUBSET ANALYSIS #########################
442
  analysis_dim = "subset"
constants.py CHANGED
@@ -1,6 +1,14 @@
1
  ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
2
  The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
3
- To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard)."
 
 
 
 
 
 
 
 
4
 
5
  BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
6
  Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"
 
1
  ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
2
  The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
3
+ To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard).<br> \
4
+ If you use this work, please use the citation below: <br> \
5
+ ```@misc{amu_cai_pl_asr_leaderboard, \
6
+ author = {Michał Junczyk}, \
7
+ title = {{AMU Polish ASR Leaderboard}}, \
8
+ year = {2024}, \
9
+ howpublished = {url{https://huggingface.co/spaces/amu-cai/pl-asr-leaderboard}}, \
10
+ publisher = {Hugging Face} \
11
+ }```"
12
 
13
  BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
14
  Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"
utils.py CHANGED
@@ -7,6 +7,23 @@ import requests
7
  import numpy as np
8
  from datasets import Dataset
9
  from huggingface_hub import hf_hub_download
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def download_tsv_from_google_sheet(sheet_url):
12
  # Modify the Google Sheet URL to export it as TSV
@@ -164,7 +181,7 @@ def filter_bottom_outliers(df_input, metric, min_threshold):
164
 
165
  def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
166
  # Box plot for WER per dataset
167
- plt.figure(figsize=(20, 10))
168
 
169
  # generate box plot without outliers
170
  sns.boxplot(x=dimension, y=metric, data=df_input, order=df_input.groupby(dimension)[metric].median().sort_values().index, showfliers=False)
@@ -176,7 +193,107 @@ def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
176
  #return figure
177
  return plt
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def check_impact_of_normalization(data_in, ref_type='orig'):
181
 
182
  # Filter the data to include only the specific reference type
 
7
  import numpy as np
8
  from datasets import Dataset
9
  from huggingface_hub import hf_hub_download
10
+ import matplotlib.patches as mpatches
11
+ import matplotlib as mpl
12
+
13
+
14
+ asr_systems_colors_mapping = {
15
+ 'azure': '#1f77b4', # Blue
16
+ 'google': '#2ca02c', # Green
17
+ 'wav2vec2': '#d62728', # Red
18
+ 'nemo': '#9467bd', # Purple
19
+ 'assemblyai': '#8c564b', # Brown
20
+ 'mms': '#e377c2', # Pink
21
+ 'google_v2': '#7f7f7f', # Gray
22
+ 'whisper_cloud': '#bcbd22', # Olive
23
+ 'whisper_local': '#ff7f0e', # Orange
24
+
25
+ # Add or override other systems and their colors
26
+ }
27
 
28
  def download_tsv_from_google_sheet(sheet_url):
29
  # Modify the Google Sheet URL to export it as TSV
 
181
 
182
  def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
183
  # Box plot for WER per dataset
184
+ fig, ax = plt.subplots(figsize=(20, 10))
185
 
186
  # generate box plot without outliers
187
  sns.boxplot(x=dimension, y=metric, data=df_input, order=df_input.groupby(dimension)[metric].median().sort_values().index, showfliers=False)
 
193
  #return figure
194
  return plt
195
 
196
+ def box_plot_per_dimension_with_colors(df_input, metric, dimension, title, xlabel, ylabel, system_col, type_col):
197
+ # Create a figure and axis object
198
+ fig, ax = plt.subplots(figsize=(12, 8))
199
+
200
+ # Define the order of categories based on the median of the metric
201
+ order = df_input.groupby(dimension)[metric].median().sort_values().index.tolist()
202
+
203
+ # Create custom color mapping for systems
204
+ unique_systems = df_input[system_col].unique()
205
+ # Define your custom colors here
206
+ system_color_mapping = asr_systems_colors_mapping
207
+ # For systems not specified, assign colors from a palette
208
+ remaining_systems = [s for s in unique_systems if s not in system_color_mapping]
209
+ palette = sns.color_palette("tab10", len(remaining_systems))
210
+ system_color_mapping.update(dict(zip(remaining_systems, palette)))
211
+
212
+ # Create hatching patterns for types
213
+ unique_types = df_input[type_col].unique()
214
+ type_hatch_mapping = {
215
+ 'free': '', # No hatching
216
+ 'commercial': '///', # Diagonal hatching
217
+ # Add more patterns if needed
218
+ }
219
+ # For types not specified, assign default hatches
220
+ default_hatches = ['', '///', '\\\\', 'xx', '++', '--', '...']
221
+ for idx, t in enumerate(unique_types):
222
+ if t not in type_hatch_mapping:
223
+ type_hatch_mapping[t] = default_hatches[idx % len(default_hatches)]
224
+
225
+ # Map colors and hatches to each dimension based on system and type
226
+ dimension_system_mapping = df_input.drop_duplicates(subset=dimension).set_index(dimension)[system_col].reindex(order)
227
+ colors = dimension_system_mapping.map(system_color_mapping).tolist()
228
+
229
+ dimension_type_mapping = df_input.drop_duplicates(subset=dimension).set_index(dimension)[type_col].reindex(order)
230
+ hatches = dimension_type_mapping.map(type_hatch_mapping).tolist()
231
+
232
+ # Generate box plot without specifying hue
233
+ sns.boxplot(
234
+ x=dimension,
235
+ y=metric,
236
+ data=df_input,
237
+ order=order,
238
+ ax=ax,
239
+ showfliers=False,
240
+ linewidth=1.5,
241
+ boxprops=dict(facecolor='white') # Set initial facecolor to white
242
+ )
243
 
244
+ # Access the box artists
245
+ box_patches = [patch for patch in ax.artists if isinstance(patch, mpatches.PathPatch)]
246
+ # Alternatively, you can use ax.patches if ax.artists doesn't work
247
+ if not box_patches:
248
+ box_patches = [patch for patch in ax.patches if isinstance(patch, mpatches.PathPatch)]
249
+
250
+ # Color the boxes and apply hatching patterns
251
+ for patch, color, hatch in zip(box_patches, colors, hatches):
252
+ patch.set_facecolor(color)
253
+ patch.set_edgecolor('black')
254
+ patch.set_linewidth(1.5)
255
+ patch.set_hatch(hatch)
256
+
257
+ # Create custom legend for systems (colors)
258
+ system_handles = []
259
+ for system in unique_systems:
260
+ color = system_color_mapping[system]
261
+ handle = mpatches.Patch(facecolor=color, edgecolor='black', label=system)
262
+ system_handles.append(handle)
263
+
264
+ # Create custom legend for types (hatching patterns)
265
+ type_handles = []
266
+ for typ in unique_types:
267
+ hatch = type_hatch_mapping[typ]
268
+ handle = mpatches.Patch(facecolor='white', edgecolor='black', hatch=hatch, label=typ)
269
+ type_handles.append(handle)
270
+
271
+ # Add legends to the plot
272
+ legend1 = ax.legend(handles=system_handles, title='System', bbox_to_anchor=(0.01, 1), loc='upper left')
273
+ legend2 = ax.legend(handles=type_handles, title='Type', bbox_to_anchor=(0.01, 0.6), loc='upper left')
274
+ ax.add_artist(legend1) # Add the first legend back to the plot
275
+
276
+ ax.set_title(title)
277
+ ax.set_xlabel(xlabel)
278
+ ax.set_ylabel(ylabel)
279
+ # improve readibility of the x-axis labels
280
+ # decrease the font size of x-axis labels
281
+ ax.tick_params(axis='x', labelsize=8)
282
+ # shift left to align the x-axis labels with the boxes
283
+ ax.set_xticklabels(ax.get_xticklabels(), ha='right')
284
+
285
+ # rotate them by 90 degrees
286
+ ax.set_xticklabels(ax.get_xticklabels(), rotation=55)
287
+
288
+ # add more granularity to the y-axis. Make sure the y-axis contains 20 ticks
289
+ ax.yaxis.set_major_locator(plt.MaxNLocator(20))
290
+
291
+ plt.tight_layout()
292
+
293
+ # Return the figure object
294
+ return fig
295
+
296
+
297
  def check_impact_of_normalization(data_in, ref_type='orig'):
298
 
299
  # Filter the data to include only the specific reference type