zhuohan-7 commited on
Commit
2e7bc8b
1 Parent(s): 7d4aeea

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app/draw_diagram.py +65 -16
  2. app/show_examples.py +19 -13
app/draw_diagram.py CHANGED
@@ -55,23 +55,39 @@ def draw(folder_name, category_name, dataset_name, metrics, cus_sort=True):
55
 
56
  models = st.multiselect("Please choose the model",
57
  sorted(chart_data['model_show'].tolist()),
58
- default = sorted(chart_data['model_show'].tolist()))
 
59
 
60
  chart_data = chart_data[chart_data['model_show'].isin(models)]
61
-
62
  chart_data = chart_data.sort_values(by=[new_dataset_name], ascending=cus_sort).dropna(axis=0)
63
 
64
- # import pdb
65
- # pdb.set_trace()
66
-
67
  if len(chart_data) == 0:
68
  return
 
 
 
69
 
70
- min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
71
- max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  options = {
74
- "title": {"text": f"{display_names[folder_name.upper()]}"},
 
75
  "tooltip": {
76
  "trigger": "axis",
77
  "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
@@ -120,7 +136,7 @@ def draw(folder_name, category_name, dataset_name, metrics, cus_sort=True):
120
  # st.divider()
121
  with st.container():
122
  # st.write("")
123
- st.markdown('##### TABLE')
124
  # custom_css = """
125
 
126
  # """
@@ -130,16 +146,46 @@ def draw(folder_name, category_name, dataset_name, metrics, cus_sort=True):
130
 
131
  chart_data['model_link'] = chart_data['model_show'].map(model_link)
132
 
133
- # import pdb
134
- # pdb.set_trace()
135
-
136
  chart_data_table = chart_data[['model_show', chart_data.columns[1], chart_data.columns[3]]]
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  st.dataframe(
139
- chart_data_table,
140
  column_config={
141
  'model_show': 'Model',
142
- chart_data_table.columns[1]: {'alignment': 'center'},
143
  "model_link": st.column_config.LinkColumn(
144
  "Model Link",
145
  # # # help="",
@@ -151,6 +197,9 @@ def draw(folder_name, category_name, dataset_name, metrics, cus_sort=True):
151
  hide_index=True,
152
  use_container_width=True
153
  )
 
 
 
154
 
155
 
156
  # s = ''
@@ -210,8 +259,8 @@ def draw(folder_name, category_name, dataset_name, metrics, cus_sort=True):
210
  '''
211
  show samples
212
  '''
213
- if dataset_name in ['Earnings21-Test', 'Earnings22-Test', 'Tedlium3-Long-form-Test']:
214
  pass
215
  else:
216
- show_examples(category_name, dataset_name, chart_data['Model'].tolist())
217
 
 
55
 
56
  models = st.multiselect("Please choose the model",
57
  sorted(chart_data['model_show'].tolist()),
58
+ default = sorted(chart_data['model_show'].tolist()),
59
+ )
60
 
61
  chart_data = chart_data[chart_data['model_show'].isin(models)]
 
62
  chart_data = chart_data.sort_values(by=[new_dataset_name], ascending=cus_sort).dropna(axis=0)
63
 
 
 
 
64
  if len(chart_data) == 0:
65
  return
66
+
67
+ # Get Values
68
+ data_values = chart_data.iloc[:, 1]
69
 
70
+ # Calculate Q1 and Q3
71
+ q1 = data_values.quantile(0.25)
72
+ q3 = data_values.quantile(0.75)
73
+
74
+ # Calculate IQR
75
+ iqr = q3 - q1
76
+
77
+ # Define lower and upper bounds (1.5*IQR is a common threshold)
78
+ lower_bound = q1 - 1.5 * iqr
79
+ upper_bound = q3 + 1.5 * iqr
80
+
81
+ # Filter data within the bounds
82
+ filtered_data = data_values[(data_values >= lower_bound) & (data_values <= upper_bound)]
83
+
84
+ # Calculate min and max values after outlier handling
85
+ min_value = round(filtered_data.min() - 0.1 * filtered_data.min(), 3)
86
+ max_value = round(filtered_data.max() + 0.1 * filtered_data.max(), 3)
87
 
88
  options = {
89
+ #"title": {"text": f"{display_names[folder_name.upper()]}"},
90
+ "title": {"text": f"{dataset_name}"},
91
  "tooltip": {
92
  "trigger": "axis",
93
  "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
 
136
  # st.divider()
137
  with st.container():
138
  # st.write("")
139
+ st.markdown('##### Results')
140
  # custom_css = """
141
 
142
  # """
 
146
 
147
  chart_data['model_link'] = chart_data['model_show'].map(model_link)
148
 
 
 
 
149
  chart_data_table = chart_data[['model_show', chart_data.columns[1], chart_data.columns[3]]]
150
 
151
+ cur_dataset_name = chart_data_table.columns[1]
152
+ print(cur_dataset_name)
153
+
154
+ if cur_dataset_name in [
155
+ 'librispeech_test_clean',
156
+ 'librispeech_test_other',
157
+ 'common_voice_15_en_test',
158
+ 'peoples_speech_test',
159
+ 'gigaspeech_test',
160
+ 'earnings21_test',
161
+ 'earnings22_test',
162
+ 'tedlium3_test',
163
+ 'tedlium3_long_form_test',
164
+ 'imda_part1_asr_test',
165
+ 'imda_part2_asr_test',
166
+
167
+ 'aishell_asr_zh_test',
168
+ ]:
169
+
170
+ styled_df = chart_data_table.style.highlight_min(
171
+ subset=[chart_data_table.columns[1]], color='yellow'
172
+ )
173
+ else:
174
+
175
+ chart_data_table = chart_data_table.sort_values(
176
+ by=chart_data_table.columns[1],
177
+ ascending=False
178
+ ).reset_index(drop=True)
179
+
180
+ styled_df = chart_data_table.style.highlight_max(
181
+ subset=[chart_data_table.columns[1]], color='yellow'
182
+ )
183
+
184
  st.dataframe(
185
+ styled_df,
186
  column_config={
187
  'model_show': 'Model',
188
+ chart_data_table.columns[1]: {'alignment': 'left'},
189
  "model_link": st.column_config.LinkColumn(
190
  "Model Link",
191
  # # # help="",
 
197
  hide_index=True,
198
  use_container_width=True
199
  )
200
+
201
+
202
+
203
 
204
 
205
  # s = ''
 
259
  '''
260
  show samples
261
  '''
262
+ if dataset_name in ['Earnings21-Test', 'Earnings22-Test', 'Tedlium3-Test', 'Tedlium3-Long-form-Test']:
263
  pass
264
  else:
265
+ show_examples(category_name, dataset_name, chart_data['Model'].tolist(), display_model_names)
266
 
app/show_examples.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  import datasets
3
  import numpy as np
4
 
5
- def show_examples(category_name, dataset_name, model_lists):
6
  st.divider()
7
  sample_folder = f"./examples/{category_name}/{dataset_name}"
8
 
@@ -10,7 +10,7 @@ def show_examples(category_name, dataset_name, model_lists):
10
 
11
  for index in range(len(dataset)):
12
  with st.container():
13
- st.markdown(f'##### EXAMPLE {index+1}')
14
  col1, col2 = st.columns([0.3, 0.7], vertical_alignment="center")
15
 
16
  # with col1:
@@ -83,10 +83,11 @@ def show_examples(category_name, dataset_name, model_lists):
83
  with st.container():
84
  custom_css = """
85
  <style>
86
- .my-container-table {
87
- background-color: #cad8e7;
88
  padding: 10px;
89
  border-radius: 5px;
 
90
  # height: 50px;
91
  word-wrap: break-word
92
  }
@@ -97,16 +98,22 @@ def show_examples(category_name, dataset_name, model_lists):
97
  model_lists.sort()
98
 
99
  s = f"""<tr>
100
- <td>Reference</td>
101
- <td>{question_text}</td>
102
- <td>{dataset[index]['answer']['text']}</td>
103
- </tr>"""
 
 
 
104
  if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
105
  for model in model_lists:
106
  try:
107
  s += f"""<tr>
108
- <td>{model}</td>
109
- <td><p>{dataset[index][model]['text']}</p> <p>{choices_text}</p></td>
 
 
 
110
  <td>{dataset[index][model]['model_prediction']}</td>
111
  </tr>"""
112
  except:
@@ -116,15 +123,14 @@ def show_examples(category_name, dataset_name, model_lists):
116
  for model in model_lists:
117
  try:
118
  s += f"""<tr>
119
- <td>{model}</td>
120
  <td>{dataset[index][model]['text']}</td>
121
  <td>{dataset[index][model]['model_prediction']}</td>
122
  </tr>"""
123
  except:
124
  print(f"{model} is not in {dataset_name}")
125
  continue
126
- # import pdb
127
- # pdb.set_trace()
128
 
129
  body_details = f"""<table style="table-layout: fixed; width:100%">
130
  <thead>
 
2
  import datasets
3
  import numpy as np
4
 
5
+ def show_examples(category_name, dataset_name, model_lists, display_model_names):
6
  st.divider()
7
  sample_folder = f"./examples/{category_name}/{dataset_name}"
8
 
 
10
 
11
  for index in range(len(dataset)):
12
  with st.container():
13
+ st.markdown(f'##### Example-{index+1}')
14
  col1, col2 = st.columns([0.3, 0.7], vertical_alignment="center")
15
 
16
  # with col1:
 
83
  with st.container():
84
  custom_css = """
85
  <style>
86
+ .my-container-table, p.my-container-text {
87
+ background-color: #fcf8dc;
88
  padding: 10px;
89
  border-radius: 5px;
90
+ font-size: 13px;
91
  # height: 50px;
92
  word-wrap: break-word
93
  }
 
98
  model_lists.sort()
99
 
100
  s = f"""<tr>
101
+ <td><b>REFERENCE</td>
102
+ <td><b>{question_text.replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)')}
103
+ </td>
104
+ <td><b>{dataset[index]['answer']['text']}
105
+ </td>
106
+ </tr>
107
+ """
108
  if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
109
  for model in model_lists:
110
  try:
111
  s += f"""<tr>
112
+ <td>{display_model_names[model]}</td>
113
+ <td>
114
+ {dataset[index][model]['text'].replace('Choices:', '<br>Choices:').replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)')
115
+ }
116
+ </td>
117
  <td>{dataset[index][model]['model_prediction']}</td>
118
  </tr>"""
119
  except:
 
123
  for model in model_lists:
124
  try:
125
  s += f"""<tr>
126
+ <td>{display_model_names[model]}</td>
127
  <td>{dataset[index][model]['text']}</td>
128
  <td>{dataset[index][model]['model_prediction']}</td>
129
  </tr>"""
130
  except:
131
  print(f"{model} is not in {dataset_name}")
132
  continue
133
+
 
134
 
135
  body_details = f"""<table style="table-layout: fixed; width:100%">
136
  <thead>