KlaudiaTH commited on
Commit
a200cc8
โ€ข
1 Parent(s): 8fcff38

Reformatted

Browse files
Files changed (3) hide show
  1. app.py +6 -13
  2. core.py +9 -4
  3. style.py +2 -6
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
 
3
  import core as core
4
- from style import CSS, T_SYMBOLS, TITLE, LANG_SYMBOLS
5
 
6
  demo = gr.Blocks(css=CSS)
7
  with demo:
@@ -38,7 +38,7 @@ with demo:
38
  )
39
  with gr.Row():
40
  langs_bar = gr.CheckboxGroup(
41
- choices=[(LANG_SYMBOLS.get(l,l),l) for l in core.languages_list],
42
  value=core.languages_list,
43
  label="Select languages to average over",
44
  elem_id="column-select",
@@ -52,9 +52,7 @@ with demo:
52
  size="sm",
53
  scale=1,
54
  )
55
- select = gr.Button(
56
- value="Select all languages", size="sm", scale=1
57
- )
58
 
59
  def update_bar(selected_tab):
60
  if selected_tab in [0, 1]:
@@ -88,14 +86,10 @@ with demo:
88
  label="Select evaluation type",
89
  scale=29,
90
  )
91
- clear = gr.ClearButton(
92
- shown_tasks, value="Deselect all tasks", size="sm", scale=21
93
- )
94
 
95
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
96
- with gr.TabItem(
97
- "๐Ÿ… LLM accuracy benchmark", elem_id="llm-benchmark-tab-table-acc", id=0
98
- ) as acc:
99
  leaderboard_table = gr.Dataframe()
100
  with gr.TabItem(
101
  "๐ŸŒ LLM translation benchmark",
@@ -106,7 +100,7 @@ with demo:
106
 
107
  demo.load(
108
  core.update_task_groups_and_fewshot,
109
- [gr.State(value=0), model_types, langs_bar,fewshot],
110
  [shown_tasks, fewshot, selected_tab, model_types, langs_bar],
111
  )
112
  fewshot.change(
@@ -142,7 +136,6 @@ with demo:
142
  leaderboard_table_misc,
143
  )
144
 
145
-
146
  gr.Blocks.load(
147
  block=demo,
148
  fn=core.update_df,
 
1
  import gradio as gr
2
 
3
  import core as core
4
+ from style import CSS, LANG_SYMBOLS, T_SYMBOLS, TITLE
5
 
6
  demo = gr.Blocks(css=CSS)
7
  with demo:
 
38
  )
39
  with gr.Row():
40
  langs_bar = gr.CheckboxGroup(
41
+ choices=[(LANG_SYMBOLS.get(l, l), l) for l in core.languages_list],
42
  value=core.languages_list,
43
  label="Select languages to average over",
44
  elem_id="column-select",
 
52
  size="sm",
53
  scale=1,
54
  )
55
+ select = gr.Button(value="Select all languages", size="sm", scale=1)
 
 
56
 
57
  def update_bar(selected_tab):
58
  if selected_tab in [0, 1]:
 
86
  label="Select evaluation type",
87
  scale=29,
88
  )
89
+ clear = gr.ClearButton(shown_tasks, value="Deselect all tasks", size="sm", scale=21)
 
 
90
 
91
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
92
+ with gr.TabItem("๐Ÿ… LLM accuracy benchmark", elem_id="llm-benchmark-tab-table-acc", id=0) as acc:
 
 
93
  leaderboard_table = gr.Dataframe()
94
  with gr.TabItem(
95
  "๐ŸŒ LLM translation benchmark",
 
100
 
101
  demo.load(
102
  core.update_task_groups_and_fewshot,
103
+ [gr.State(value=0), model_types, langs_bar, fewshot],
104
  [shown_tasks, fewshot, selected_tab, model_types, langs_bar],
105
  )
106
  fewshot.change(
 
136
  leaderboard_table_misc,
137
  )
138
 
 
139
  gr.Blocks.load(
140
  block=demo,
141
  fn=core.update_df,
core.py CHANGED
@@ -7,7 +7,7 @@ import pandas as pd
7
  from datasets import load_dataset
8
 
9
  import style
10
- from style import T_SYMBOLS, LANG_SYMBOLS
11
 
12
  ZERO_SHOT_ONLY = ["BELEBELE"]
13
  FEW_SHOT_ONLY = ["GSM8K", "TruthfulQA"]
@@ -115,7 +115,7 @@ def update_df(
115
  # aggregate results over languages per task
116
  df = aggregate_langs(df, tasks, langs)
117
 
118
- df = df.sort_values(by='Average', ascending=False)
119
 
120
  # filter models by search bar and model type
121
  df = search_model(df, model_query)
@@ -127,7 +127,12 @@ def update_df(
127
  return sort_cols(df, fewshot)
128
 
129
 
130
- def update_task_groups_and_fewshot(current_selected_tab: int, model_types, langs_bar, is_fewshot_current: bool = False, ):
 
 
 
 
 
131
  selected_task_type = get_selected_task_type(current_selected_tab)
132
  available_tasks = get_available_task_groups(selected_task_type, is_fewshot_current)
133
  new_selected_tasks = available_tasks.copy()
@@ -159,7 +164,7 @@ def update_task_groups_and_fewshot(current_selected_tab: int, model_types, langs
159
  (f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]),
160
  ],
161
  value=list(T_SYMBOLS.values()),
162
- interactive=True
163
  )
164
  langs_bar = gr.CheckboxGroup(
165
  choices=[(LANG_SYMBOLS.get(l, l), l) for l in languages_list],
 
7
  from datasets import load_dataset
8
 
9
  import style
10
+ from style import LANG_SYMBOLS, T_SYMBOLS
11
 
12
  ZERO_SHOT_ONLY = ["BELEBELE"]
13
  FEW_SHOT_ONLY = ["GSM8K", "TruthfulQA"]
 
115
  # aggregate results over languages per task
116
  df = aggregate_langs(df, tasks, langs)
117
 
118
+ df = df.sort_values(by="Average", ascending=False)
119
 
120
  # filter models by search bar and model type
121
  df = search_model(df, model_query)
 
127
  return sort_cols(df, fewshot)
128
 
129
 
130
+ def update_task_groups_and_fewshot(
131
+ current_selected_tab: int,
132
+ model_types,
133
+ langs_bar,
134
+ is_fewshot_current: bool = False,
135
+ ):
136
  selected_task_type = get_selected_task_type(current_selected_tab)
137
  available_tasks = get_available_task_groups(selected_task_type, is_fewshot_current)
138
  new_selected_tasks = available_tasks.copy()
 
164
  (f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]),
165
  ],
166
  value=list(T_SYMBOLS.values()),
167
+ interactive=True,
168
  )
169
  langs_bar = gr.CheckboxGroup(
170
  choices=[(LANG_SYMBOLS.get(l, l), l) for l in languages_list],
style.py CHANGED
@@ -11,10 +11,7 @@ CSS = """
11
  }
12
  """
13
 
14
- T_SYMBOLS = {
15
- "pretrained": "๐ŸŸข",
16
- "chat": "๐Ÿ’ฌ"
17
- }
18
 
19
  LANG_SYMBOLS = {
20
  "BG": "๐Ÿ‡ง๐Ÿ‡ฌ BG",
@@ -37,6 +34,5 @@ LANG_SYMBOLS = {
37
  "RO": "๐Ÿ‡ท๐Ÿ‡ด RO",
38
  "SK": "๐Ÿ‡ธ๐Ÿ‡ฐ SK",
39
  "SL": "๐Ÿ‡ธ๐Ÿ‡ฎ SL",
40
- "SV": "๐Ÿ‡ธ๐Ÿ‡ช SV"
41
  }
42
-
 
11
  }
12
  """
13
 
14
+ T_SYMBOLS = {"pretrained": "๐ŸŸข", "chat": "๐Ÿ’ฌ"}
 
 
 
15
 
16
  LANG_SYMBOLS = {
17
  "BG": "๐Ÿ‡ง๐Ÿ‡ฌ BG",
 
34
  "RO": "๐Ÿ‡ท๐Ÿ‡ด RO",
35
  "SK": "๐Ÿ‡ธ๐Ÿ‡ฐ SK",
36
  "SL": "๐Ÿ‡ธ๐Ÿ‡ฎ SL",
37
+ "SV": "๐Ÿ‡ธ๐Ÿ‡ช SV",
38
  }