Spaces:

MERaLiON
/

AudioBench-Leaderboard

Running

App Files Files Community

zhuohan-7 commited on Oct 15, 2024

Commit

f3cadf1

verified ·

1 Parent(s): 9d76dc2

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

app/content.py +1 -1
app/draw_diagram.py +78 -52
app/pages.py +1 -1
app/show_examples.py +69 -51

app/content.py CHANGED Viewed

@@ -15,7 +15,7 @@ sqa_datasets = {'CN-College-Listen-MCQ-Test': 'Chinese College English Listening
                 'DREAM-TTS-MCQ-Test': 'DREAM dataset for spoken question-answering, derived from textual data and synthesized speech.',
                 'SLUE-P2-SQA5-Test': 'Spoken Language Understanding Evaluation (SLUE) dataset, part 2, focused on QA tasks.',
                 'Public-SG-Speech-QA-Test': 'Public dataset for speech-based question answering, gathered from Singapore.',
-                'Spoken-Squad-v1': 'Spoken SQuAD dataset, based on the textual SQuAD dataset, converted into audio.'
                 }
 si_datasets = {'OpenHermes-Audio-Test': 'Test set for spoken instructions. Synthesized from the OpenHermes dataset.',

                 'DREAM-TTS-MCQ-Test': 'DREAM dataset for spoken question-answering, derived from textual data and synthesized speech.',
                 'SLUE-P2-SQA5-Test': 'Spoken Language Understanding Evaluation (SLUE) dataset, part 2, focused on QA tasks.',
                 'Public-SG-Speech-QA-Test': 'Public dataset for speech-based question answering, gathered from Singapore.',
+                'Spoken-Squad-Test': 'Spoken SQuAD dataset, based on the textual SQuAD dataset, converted into audio.'
                 }
 si_datasets = {'OpenHermes-Audio-Test': 'Test set for spoken instructions. Synthesized from the OpenHermes dataset.',

app/draw_diagram.py CHANGED Viewed

@@ -51,21 +51,24 @@ def draw(folder_name, category_name, dataset_name, metrics):
     # remap model names
     display_model_names = {key.strip() :val.strip() for key, val in zip(info_df['AudioBench'], info_df['Proper Display Name'])}
-    chart_data['Model'] = chart_data['Model'].map(display_model_names)
     models = st.multiselect("Please choose the model",
-                            sorted(chart_data['Model'].tolist()),
-                            default =  sorted(chart_data['Model'].tolist()))
-    chart_data = chart_data[chart_data['Model'].isin(models)]
     chart_data = chart_data.sort_values(by=[new_dataset_name], ascending=True).dropna(axis=0)
     if len(chart_data) == 0:
         return
-    min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1*chart_data.iloc[:, 1::].min().min(), 1)
-    max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1*chart_data.iloc[:, 1::].max().max(), 1)
     options = {
         "title": {"text": f"{display_names[folder_name.upper()]}"},
@@ -82,7 +85,7 @@ def draw(folder_name, category_name, dataset_name, metrics):
                 "type": "category",
                 "boundaryGap": True,
                 "triggerEvent": True,
-                "data":  chart_data['Model'].tolist(),
             }
         ],
         "yAxis": [{"type": "value",
@@ -118,57 +121,80 @@ def draw(folder_name, category_name, dataset_name, metrics):
     with st.container():
         # st.write("")
         st.markdown('##### TABLE')
-        custom_css = """
-                    """
-        st.markdown(custom_css, unsafe_allow_html=True)
         model_link = {key.strip(): val for key, val in zip(info_df['Proper Display Name'], info_df['Link'])}
-        s = ''
-        for model in models:
-            try:
-                # <td align="center"><input type="checkbox" name="select"></td>
-                s += f"""<tr>
-                    <td><a href={model_link[model]}>{model}</a></td>
-                    <td>{chart_data[chart_data['Model'] == model][new_dataset_name].tolist()[0]}</td>
-                </tr>"""
-            except:
-                # print(f"{model} is not in {dataset_name}")
-                continue
-        # select all function
-        select_all_function = """<script>
-            function toggle(source) {
-                var checkboxes = document.querySelectorAll('input[type="checkbox"]');
-                for (var i = 0; i < checkboxes.length; i++) {
-                    if (checkboxes[i] != source)
-                        checkboxes[i].checked = source.checked;
-                }
-            }
-        </script>"""
-        st.markdown(f"""
-                    <div class="select_all">{select_all_function}</div>
-                    """, unsafe_allow_html=True)
-        info_body_details = f"""
-            <table style="width:80%">
-                <thead>
-                    <tr style="text-align: center;">
-                        <th style="width:45%">MODEL</th>
-                        <th style="width:45%">{dataset_name}</th>
-                    </tr>
-                    {s}
-                </thead>
-            </table>
-        """
-        #<th style="width:10%"><input type="checkbox" onclick="toggle(this);"></th>
-        # html_code = custom_css + select_all_function + info_body_details
-        # html(html_code, height = 300)
-        st.markdown(f"""
-                    <div class="my-data-table">{info_body_details}</div>
-                    """, unsafe_allow_html=True)
     # st.dataframe(chart_data,

     # remap model names
     display_model_names = {key.strip() :val.strip() for key, val in zip(info_df['AudioBench'], info_df['Proper Display Name'])}
+    chart_data['model_show'] = chart_data['Model'].map(display_model_names)
     models = st.multiselect("Please choose the model",
+                            sorted(chart_data['model_show'].tolist()),
+                            default = sorted(chart_data['model_show'].tolist()))
+    chart_data = chart_data[chart_data['model_show'].isin(models)]
     chart_data = chart_data.sort_values(by=[new_dataset_name], ascending=True).dropna(axis=0)
+    # import pdb
+    # pdb.set_trace()
     if len(chart_data) == 0:
         return
+    min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
+    max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
     options = {
         "title": {"text": f"{display_names[folder_name.upper()]}"},
                 "type": "category",
                 "boundaryGap": True,
                 "triggerEvent": True,
+                "data":  chart_data['model_show'].tolist(),
             }
         ],
         "yAxis": [{"type": "value",
     with st.container():
         # st.write("")
         st.markdown('##### TABLE')
+        # custom_css = """
+        #             """
+        # st.markdown(custom_css, unsafe_allow_html=True)
         model_link = {key.strip(): val for key, val in zip(info_df['Proper Display Name'], info_df['Link'])}
+        chart_data['model_link'] = chart_data['model_show'].map(model_link)
+        # import pdb
+        # pdb.set_trace()
+        st.dataframe(
+                chart_data,
+                column_config={
+                    'model_show': "Model",
+                    chart_data.columns[1]: {'alignment': 'center'},
+                    "model_link": st.column_config.LinkColumn(
+                        "Model Link",
+                        # # # help="",
+                        # validate=r"^https://(.*?)$",
+                        # # max_chars=100,
+                        # display_text=r"\[(.*?)\]"
+                    ),
+                },
+                hide_index=True,
+                use_container_width=True
+            )
+        # s = ''
+        # for model in models:
+        #     try:
+        #         # <td align="center"><input type="checkbox" name="select"></td>
+        #         s += f"""<tr>
+        #             <td><a href={model_link[model]}>{model}</a></td>
+        #             <td>{chart_data[chart_data['Model'] == model][new_dataset_name].tolist()[0]}</td>
+        #         </tr>"""
+        #     except:
+        #         # print(f"{model} is not in {dataset_name}")
+        #         continue
+        # # select all function
+        # select_all_function = """<script>
+        #     function toggle(source) {
+        #         var checkboxes = document.querySelectorAll('input[type="checkbox"]');
+        #         for (var i = 0; i < checkboxes.length; i++) {
+        #             if (checkboxes[i] != source)
+        #                 checkboxes[i].checked = source.checked;
+        #         }
+        #     }
+        # </script>"""
+        # st.markdown(f"""
+        #             <div class="select_all">{select_all_function}</div>
+        #             """, unsafe_allow_html=True)
+        # info_body_details = f"""
+        #     <table style="width:80%">
+        #         <thead>
+        #             <tr style="text-align: center;">
+        #                 <th style="width:45%">MODEL</th>
+        #                 <th style="width:45%">{dataset_name}</th>
+        #             </tr>
+        #             {s}
+        #         </thead>
+        #     </table>
+        # """
+        # #<th style="width:10%"><input type="checkbox" onclick="toggle(this);"></th>
+        # # html_code = custom_css + select_all_function + info_body_details
+        # # html(html_code, height = 300)
+        # st.markdown(f"""
+        #             <div class="my-data-table">{info_body_details}</div>
+        #             """, unsafe_allow_html=True)
     # st.dataframe(chart_data,

app/pages.py CHANGED Viewed

@@ -143,7 +143,7 @@ def sqa():
     rest = ['SLUE-P2-SQA5-Test',
             'Public-SG-Speech-QA-Test',
-            'Spoken-Squad-v1']
     filters_levelone = binary + rest

     rest = ['SLUE-P2-SQA5-Test',
             'Public-SG-Speech-QA-Test',
+            'Spoken-Squad-Test']
     filters_levelone = binary + rest

app/show_examples.py CHANGED Viewed

@@ -13,78 +13,94 @@ def show_examples(category_name, dataset_name, model_lists):
             st.markdown(f'##### EXAMPLE {index+1}')
             col1, col2 = st.columns([0.3, 0.7], vertical_alignment="center")
-            with col1:
-                st.audio(f'{sample_folder}/sample_{index}.wav', format="audio/wav")
-            with col2:
-                with st.container():
-                    custom_css = """
-                                <style>
-                                .my-container-question {
-                                background-color: #F5EEF8;
-                                padding: 10px;
-                                border-radius: 10px;
-                                height: auto;
-                                }
-                                </style>
-                                """
-                    st.markdown(custom_css, unsafe_allow_html=True)
-                    if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
-                        choices = dataset[index]['other_attributes']['choices']
-                        if isinstance(choices, str):
-                            choices_text = choices
-                        elif isinstance(choices, list):
-                            choices_text = ' '.join(i for i in choices)
-                        question_text = f"""<div class="my-container-question">
-                                            <p>QUESTION: {dataset[index]['instruction']['text']}</p>
-                                            <p>CHOICES: {choices_text}</p>
-                                            </div>
-                                            """
-                    else:
-                        question_text = f"""<div class="my-container-question">
-                                        <p>QUESTION: {dataset[index]['instruction']['text']}</p>
-                                        </div>"""
-                    st.markdown(question_text, unsafe_allow_html=True)
-                with st.container():
-                    custom_css = """
-                                <style>
-                                .my-container-answer {
-                                background-color: #F9EBEA;
-                                padding: 10px;
-                                border-radius: 10px;
-                                height: auto;
-                                }
-                                </style>
-                                """
-                    st.markdown(custom_css, unsafe_allow_html=True)
-                    st.markdown(f"""<div class="my-container-answer">
-                                <p>CORRECT ANSWER: {dataset[index]['answer']['text']}</p>
-                                </div>""", unsafe_allow_html=True)
             # st.divider()
             with st.container():
                 custom_css = """
                             <style>
                             .my-container-table {
-                            background-color: #F2F3F4;
                             padding: 10px;
                             border-radius: 5px;
                             # height: 50px;
                             }
                             </style>
                             """
                 st.markdown(custom_css, unsafe_allow_html=True)
                 model_lists.sort()
-                s = ''
                 if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
                     for model in model_lists:
                         try:
@@ -107,8 +123,10 @@ def show_examples(category_name, dataset_name, model_lists):
                         except:
                             print(f"{model} is not in {dataset_name}")
                             continue
-                body_details = f"""<table style="width:100%">
                 <thead>
                     <tr style="text-align: center;">
                         <th style="width:20%">MODEL</th>

             st.markdown(f'##### EXAMPLE {index+1}')
             col1, col2 = st.columns([0.3, 0.7], vertical_alignment="center")
+            # with col1:
+            st.audio(f'{sample_folder}/sample_{index}.wav', format="audio/wav")
+            # with col2:
+            #     with st.container():
+            #         custom_css = """
+            #                     <style>
+            #                     .my-container-question {
+            #                     background-color: #F5EEF8;
+            #                     padding: 10px;
+            #                     border-radius: 10px;
+            #                     height: auto;
+            #                     }
+            #                     </style>
+            #                     """
+            #         st.markdown(custom_css, unsafe_allow_html=True)
+            #         if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
+            #             choices = dataset[index]['other_attributes']['choices']
+            #             if isinstance(choices, str):
+            #                 choices_text = choices
+            #             elif isinstance(choices, list):
+            #                 choices_text = ' '.join(i for i in choices)
+            #             question_text = f"""<div class="my-container-question">
+            #                                 <p>QUESTION: {dataset[index]['instruction']['text']}</p>
+            #                                 <p>CHOICES: {choices_text}</p>
+            #                                 </div>
+            #                                 """
+            #         else:
+            #             question_text = f"""<div class="my-container-question">
+            #                             <p>QUESTION: {dataset[index]['instruction']['text']}</p>
+            #                             </div>"""
+            #         st.markdown(question_text, unsafe_allow_html=True)
+                # with st.container():
+                #     custom_css = """
+                #                 <style>
+                #                 .my-container-answer {
+                #                 background-color: #F9EBEA;
+                #                 padding: 10px;
+                #                 border-radius: 10px;
+                #                 height: auto;
+                #                 }
+                #                 </style>
+                #                 """
+                #     st.markdown(custom_css, unsafe_allow_html=True)
+                #     st.markdown(f"""<div class="my-container-answer">
+                #                 <p>CORRECT ANSWER: {dataset[index]['answer']['text']}</p>
+                #                 </div>""", unsafe_allow_html=True)
+            if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
+                choices = dataset[index]['other_attributes']['choices']
+                if isinstance(choices, str):
+                    choices_text = choices
+                elif isinstance(choices, list):
+                    choices_text = ' '.join(i for i in choices)
+                question_text = f"""{dataset[index]['instruction']['text']} {choices_text}"""
+            else:
+                question_text = f"""{dataset[index]['instruction']['text']}"""
             # st.divider()
             with st.container():
                 custom_css = """
                             <style>
                             .my-container-table {
+                            background-color: #cad8e7;
                             padding: 10px;
                             border-radius: 5px;
                             # height: 50px;
+                            word-wrap: break-word
                             }
                             </style>
                             """
                 st.markdown(custom_css, unsafe_allow_html=True)
                 model_lists.sort()
+                s = f"""<tr>
+                       <td>Reference</td>
+                       <td>{question_text}</td>
+                       <td>{dataset[index]['answer']['text']}</td>
+                </tr>"""
                 if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
                     for model in model_lists:
                         try:
                         except:
                             print(f"{model} is not in {dataset_name}")
                             continue
+                # import pdb
+                # pdb.set_trace()
+                body_details = f"""<table style="table-layout: fixed; width:100%">
                 <thead>
                     <tr style="text-align: center;">
                         <th style="width:20%">MODEL</th>