Spaces:

amu-cai
/

pl-asr-leaderboard

Running

App Files Files Community

mj-new commited on Oct 28, 2024

Commit

bba6ca7

1 Parent(s): c1b8f16

Improved box plot - colors and hatching

Browse files

Files changed (3) hide show

app.py +20 -8
constants.py +9 -1
utils.py +118 -1

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import streamlit as st
 import pandas as pd
 from constants import BIGOS_INFO, PELCRA_INFO, ANALYSIS_INFO, ABOUT_INFO, INSPECTION_INFO, COMPARISON_INFO
-from utils import read_latest_results, basic_stats_per_dimension, retrieve_asr_systems_meta_from_the_catalog, box_plot_per_dimension, get_total_audio_duration, check_impact_of_normalization, calculate_wer_per_meta_category, calculate_wer_per_audio_feature
 from app_utils import calculate_height_to_display, filter_dataframe
 import matplotlib.pyplot as plt
 import numpy as np
@@ -253,7 +253,18 @@ with lead_bigos:
     no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
     df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
     ########### EVALUATION PARAMETERS PRESENTATION ################
     st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
     st.markdown(BIGOS_INFO, unsafe_allow_html=True)
@@ -290,9 +301,6 @@ with lead_bigos:
     h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
     st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
-    st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
-    fig = box_plot_per_dimension(df_per_dataset, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]")
-    st.pyplot(fig, clear_figure=True, use_container_width=True)
     ##################### PER SUBSET ANALYSIS #########################
     analysis_dim = "subset"
@@ -386,6 +394,13 @@ with lead_pelcra:
     no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
     df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
     ########### EVALUATION PARAMETERS PRESENTATION ################
     st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
@@ -422,9 +437,6 @@ with lead_pelcra:
     h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
     st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
-    st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
-    fig = box_plot_per_dimension(df_per_dataset, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]")
-    st.pyplot(fig, clear_figure=True, use_container_width=True)
     ##################### PER SUBSET ANALYSIS #########################
     analysis_dim = "subset"

 import streamlit as st
 import pandas as pd
 from constants import BIGOS_INFO, PELCRA_INFO, ANALYSIS_INFO, ABOUT_INFO, INSPECTION_INFO, COMPARISON_INFO
+from utils import read_latest_results, basic_stats_per_dimension, retrieve_asr_systems_meta_from_the_catalog, box_plot_per_dimension, box_plot_per_dimension_with_colors, get_total_audio_duration, check_impact_of_normalization, calculate_wer_per_meta_category, calculate_wer_per_audio_feature
 from app_utils import calculate_height_to_display, filter_dataframe
 import matplotlib.pyplot as plt
 import numpy as np
     no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
     df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
+    print(df_per_dataset_with_asr_systems_meta.sample(5))
+    # save sample to tsv
+    df_per_dataset_with_asr_systems_meta.sample(5).to_csv("sample.tsv", sep="\t", index=False)
+    # MOST IMPORTANT RESULTS
+    analysis_dim = "system"
+    metric = "WER"
+    st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
+    fig = box_plot_per_dimension_with_colors(df_per_dataset_with_asr_systems_meta, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]","System", "Type")
+    st.pyplot(fig, clear_figure=True, use_container_width=True)
     ########### EVALUATION PARAMETERS PRESENTATION ################
     st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
     st.markdown(BIGOS_INFO, unsafe_allow_html=True)
     h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
     st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
     ##################### PER SUBSET ANALYSIS #########################
     analysis_dim = "subset"
     no_of_unique_speakers = len(df_per_sample["speaker_id"].unique())
     df_per_dataset_with_asr_systems_meta = pd.merge(df_per_dataset, df_evaluated_systems, how="left", left_on="system", right_on="Shortname")
+    # MOST IMPORTANT RESULTS
+    analysis_dim = "system"
+    metric = "WER"
+    st.subheader("Boxplot showing {} per {} sorted by median values".format(metric, analysis_dim))
+    fig = box_plot_per_dimension_with_colors(df_per_dataset_with_asr_systems_meta, metric, analysis_dim, "{} per {}".format(metric, analysis_dim), analysis_dim, metric + "[%]","System", "Type")
+    st.pyplot(fig, clear_figure=True, use_container_width=True)
     ########### EVALUATION PARAMETERS PRESENTATION ################
     st.title("Leaderboard for {} {}".format(dataset_short_name, dataset_version))
     h_df_per_system_per_dataset = calculate_height_to_display(df_wer_per_system_from_per_dataset)
     st.dataframe(df_wer_per_system_from_per_dataset, height = h_df_per_system_per_dataset )
     ##################### PER SUBSET ANALYSIS #########################
     analysis_dim = "subset"

constants.py CHANGED Viewed

@@ -1,6 +1,14 @@
 ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
 The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
-To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard)."
 BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
 Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"

 ABOUT_INFO = "Polish ASR leaderboard by [AMU-CAI team](https://huggingface.co/amu-cai) aims to provide comprehensive overview of performance of ASR/STT systems for Polish. <br>\
 The leaderboard currently supports [BIGOS V2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) and [PELCRA for BIGOS](https://huggingface.co/datasets/pelcra/pl-asr-pelcra-for-bigos) datasets.<br>\
+To learn more please read blog post [here](https://huggingface.co/blog/michaljunczyk/introducing-polish-asr-leaderboard).<br> \
+If you use this work, please use the citation below: <br> \
+```@misc{amu_cai_pl_asr_leaderboard, \
+  author       = {Michał Junczyk}, \
+  title        = {{AMU Polish ASR Leaderboard}}, \
+  year         = {2024}, \
+  howpublished = {url{https://huggingface.co/spaces/amu-cai/pl-asr-leaderboard}}, \
+  publisher    = {Hugging Face} \
+}```"
 BIGOS_INFO = "BIGOS (Benchmark Intended Grouping of Open Speech) is the collection of freely available speech datasets curated by the [AMU-CAI team](https://huggingface.co/amu-cai). \
 Learn more [here](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2)"

utils.py CHANGED Viewed

@@ -7,6 +7,23 @@ import requests
 import numpy as np
 from datasets import Dataset
 from huggingface_hub import hf_hub_download
 def download_tsv_from_google_sheet(sheet_url):
     # Modify the Google Sheet URL to export it as TSV
@@ -164,7 +181,7 @@ def filter_bottom_outliers(df_input, metric, min_threshold):
 def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
     # Box plot for WER per dataset
-    plt.figure(figsize=(20, 10))
     # generate box plot without outliers
     sns.boxplot(x=dimension, y=metric, data=df_input, order=df_input.groupby(dimension)[metric].median().sort_values().index, showfliers=False)
@@ -176,7 +193,107 @@ def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
     #return figure
     return plt
 def check_impact_of_normalization(data_in, ref_type='orig'):
     # Filter the data to include only the specific reference type

 import numpy as np
 from datasets import Dataset
 from huggingface_hub import hf_hub_download
+import matplotlib.patches as mpatches
+import matplotlib as mpl
+asr_systems_colors_mapping = {
+        'azure': '#1f77b4',     # Blue
+        'google': '#2ca02c',    # Green
+        'wav2vec2': '#d62728',  # Red
+        'nemo': '#9467bd',      # Purple
+        'assemblyai': '#8c564b',  # Brown
+        'mms': '#e377c2',       # Pink
+        'google_v2': '#7f7f7f', # Gray
+        'whisper_cloud': '#bcbd22',  # Olive
+        'whisper_local': '#ff7f0e',   # Orange
+        # Add or override other systems and their colors
+    }
 def download_tsv_from_google_sheet(sheet_url):
     # Modify the Google Sheet URL to export it as TSV
 def box_plot_per_dimension(df_input, metric, dimension, title, xlabel, ylabel):
     # Box plot for WER per dataset
+    fig, ax = plt.subplots(figsize=(20, 10))
     # generate box plot without outliers
     sns.boxplot(x=dimension, y=metric, data=df_input, order=df_input.groupby(dimension)[metric].median().sort_values().index, showfliers=False)
     #return figure
     return plt
+def box_plot_per_dimension_with_colors(df_input, metric, dimension, title, xlabel, ylabel, system_col, type_col):
+    # Create a figure and axis object
+    fig, ax = plt.subplots(figsize=(12, 8))
+    # Define the order of categories based on the median of the metric
+    order = df_input.groupby(dimension)[metric].median().sort_values().index.tolist()
+    # Create custom color mapping for systems
+    unique_systems = df_input[system_col].unique()
+    # Define your custom colors here
+    system_color_mapping = asr_systems_colors_mapping
+    # For systems not specified, assign colors from a palette
+    remaining_systems = [s for s in unique_systems if s not in system_color_mapping]
+    palette = sns.color_palette("tab10", len(remaining_systems))
+    system_color_mapping.update(dict(zip(remaining_systems, palette)))
+    # Create hatching patterns for types
+    unique_types = df_input[type_col].unique()
+    type_hatch_mapping = {
+        'free': '',             # No hatching
+        'commercial': '///',    # Diagonal hatching
+        # Add more patterns if needed
+    }
+    # For types not specified, assign default hatches
+    default_hatches = ['', '///', '\\\\', 'xx', '++', '--', '...']
+    for idx, t in enumerate(unique_types):
+        if t not in type_hatch_mapping:
+            type_hatch_mapping[t] = default_hatches[idx % len(default_hatches)]
+    # Map colors and hatches to each dimension based on system and type
+    dimension_system_mapping = df_input.drop_duplicates(subset=dimension).set_index(dimension)[system_col].reindex(order)
+    colors = dimension_system_mapping.map(system_color_mapping).tolist()
+    dimension_type_mapping = df_input.drop_duplicates(subset=dimension).set_index(dimension)[type_col].reindex(order)
+    hatches = dimension_type_mapping.map(type_hatch_mapping).tolist()
+    # Generate box plot without specifying hue
+    sns.boxplot(
+        x=dimension,
+        y=metric,
+        data=df_input,
+        order=order,
+        ax=ax,
+        showfliers=False,
+        linewidth=1.5,
+        boxprops=dict(facecolor='white')  # Set initial facecolor to white
+    )
+    # Access the box artists
+    box_patches = [patch for patch in ax.artists if isinstance(patch, mpatches.PathPatch)]
+    # Alternatively, you can use ax.patches if ax.artists doesn't work
+    if not box_patches:
+        box_patches = [patch for patch in ax.patches if isinstance(patch, mpatches.PathPatch)]
+    # Color the boxes and apply hatching patterns
+    for patch, color, hatch in zip(box_patches, colors, hatches):
+        patch.set_facecolor(color)
+        patch.set_edgecolor('black')
+        patch.set_linewidth(1.5)
+        patch.set_hatch(hatch)
+    # Create custom legend for systems (colors)
+    system_handles = []
+    for system in unique_systems:
+        color = system_color_mapping[system]
+        handle = mpatches.Patch(facecolor=color, edgecolor='black', label=system)
+        system_handles.append(handle)
+    # Create custom legend for types (hatching patterns)
+    type_handles = []
+    for typ in unique_types:
+        hatch = type_hatch_mapping[typ]
+        handle = mpatches.Patch(facecolor='white', edgecolor='black', hatch=hatch, label=typ)
+        type_handles.append(handle)
+    # Add legends to the plot
+    legend1 = ax.legend(handles=system_handles, title='System', bbox_to_anchor=(0.01, 1), loc='upper left')
+    legend2 = ax.legend(handles=type_handles, title='Type', bbox_to_anchor=(0.01, 0.6), loc='upper left')
+    ax.add_artist(legend1)  # Add the first legend back to the plot
+    ax.set_title(title)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    # improve readibility of the x-axis labels
+    # decrease the font size of x-axis labels
+    ax.tick_params(axis='x', labelsize=8)
+    # shift left to align the x-axis labels with the boxes
+    ax.set_xticklabels(ax.get_xticklabels(), ha='right')
+    # rotate them by 90 degrees
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=55)
+    # add more granularity to the y-axis. Make sure the y-axis contains 20 ticks
+    ax.yaxis.set_major_locator(plt.MaxNLocator(20))
+    plt.tight_layout()
+    # Return the figure object
+    return fig
 def check_impact_of_normalization(data_in, ref_type='orig'):
     # Filter the data to include only the specific reference type