abhinav-joshi commited on
Commit
a92fba7
1 Parent(s): 679c7e6
Files changed (2) hide show
  1. app.py +27 -19
  2. src/about.py +11 -2
app.py CHANGED
@@ -24,7 +24,7 @@ from src.display.utils import (
24
  ModelType,
25
  fields,
26
  WeightType,
27
- Precision
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -34,17 +34,28 @@ from src.submission.submit import add_new_eval
34
  def restart_space():
35
  API.restart_space(repo_id=REPO_ID)
36
 
 
37
  try:
38
  print(EVAL_REQUESTS_PATH)
39
  snapshot_download(
40
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
 
 
 
 
41
  )
42
  except Exception:
43
  restart_space()
44
  try:
45
  print(EVAL_RESULTS_PATH)
46
  snapshot_download(
47
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
 
 
 
 
48
  )
49
  except Exception:
50
  restart_space()
@@ -86,9 +97,7 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
86
  AutoEvalColumn.model.name,
87
  ]
88
  # We use COLS to maintain sorting
89
- filtered_df = df[
90
- always_here_cols + [c for c in COLS if c in df.columns and c in columns]
91
- ]
92
  return filtered_df
93
 
94
 
@@ -138,7 +147,7 @@ with demo:
138
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
139
 
140
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
141
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
142
  with gr.Row():
143
  with gr.Column():
144
  with gr.Row():
@@ -149,11 +158,7 @@ with demo:
149
  )
150
  with gr.Row():
151
  shown_columns = gr.CheckboxGroup(
152
- choices=[
153
- c.name
154
- for c in fields(AutoEvalColumn)
155
- if not c.hidden and not c.never_hidden
156
- ],
157
  value=[
158
  c.name
159
  for c in fields(AutoEvalColumn)
@@ -168,7 +173,7 @@ with demo:
168
  value=False, label="Show gated/private/deleted models", interactive=True
169
  )
170
  with gr.Column(min_width=320):
171
- #with gr.Box(elem_id="box-filter"):
172
  filter_columns_type = gr.CheckboxGroup(
173
  label="Model types",
174
  choices=[t.to_str() for t in ModelType],
@@ -192,10 +197,7 @@ with demo:
192
  )
193
 
194
  leaderboard_table = gr.components.Dataframe(
195
- value=leaderboard_df[
196
- [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
197
- + shown_columns.value
198
- ],
199
  headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
200
  datatype=TYPES,
201
  elem_id="leaderboard-table",
@@ -223,7 +225,13 @@ with demo:
223
  ],
224
  leaderboard_table,
225
  )
226
- for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
 
 
 
 
 
 
227
  selector.change(
228
  update_table,
229
  [
@@ -342,4 +350,4 @@ with demo:
342
  scheduler = BackgroundScheduler()
343
  scheduler.add_job(restart_space, "interval", seconds=1800)
344
  scheduler.start()
345
- demo.queue(default_concurrency_limit=40).launch()
 
24
  ModelType,
25
  fields,
26
  WeightType,
27
+ Precision,
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
34
  def restart_space():
35
  API.restart_space(repo_id=REPO_ID)
36
 
37
+
38
  try:
39
  print(EVAL_REQUESTS_PATH)
40
  snapshot_download(
41
+ repo_id=QUEUE_REPO,
42
+ local_dir=EVAL_REQUESTS_PATH,
43
+ repo_type="dataset",
44
+ tqdm_class=None,
45
+ etag_timeout=30,
46
+ token=TOKEN,
47
  )
48
  except Exception:
49
  restart_space()
50
  try:
51
  print(EVAL_RESULTS_PATH)
52
  snapshot_download(
53
+ repo_id=RESULTS_REPO,
54
+ local_dir=EVAL_RESULTS_PATH,
55
+ repo_type="dataset",
56
+ tqdm_class=None,
57
+ etag_timeout=30,
58
+ token=TOKEN,
59
  )
60
  except Exception:
61
  restart_space()
 
97
  AutoEvalColumn.model.name,
98
  ]
99
  # We use COLS to maintain sorting
100
+ filtered_df = df[always_here_cols + [c for c in COLS if c in df.columns and c in columns]]
 
 
101
  return filtered_df
102
 
103
 
 
147
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
148
 
149
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
150
+ with gr.TabItem("🏅 IL-TUR Benchmark", elem_id="llm-benchmark-tab-table", id=0):
151
  with gr.Row():
152
  with gr.Column():
153
  with gr.Row():
 
158
  )
159
  with gr.Row():
160
  shown_columns = gr.CheckboxGroup(
161
+ choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
 
 
 
 
162
  value=[
163
  c.name
164
  for c in fields(AutoEvalColumn)
 
173
  value=False, label="Show gated/private/deleted models", interactive=True
174
  )
175
  with gr.Column(min_width=320):
176
+ # with gr.Box(elem_id="box-filter"):
177
  filter_columns_type = gr.CheckboxGroup(
178
  label="Model types",
179
  choices=[t.to_str() for t in ModelType],
 
197
  )
198
 
199
  leaderboard_table = gr.components.Dataframe(
200
+ value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
 
 
 
201
  headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
202
  datatype=TYPES,
203
  elem_id="leaderboard-table",
 
225
  ],
226
  leaderboard_table,
227
  )
228
+ for selector in [
229
+ shown_columns,
230
+ filter_columns_type,
231
+ filter_columns_precision,
232
+ filter_columns_size,
233
+ deleted_models_visibility,
234
+ ]:
235
  selector.change(
236
  update_table,
237
  [
 
350
  scheduler = BackgroundScheduler()
351
  scheduler.add_job(restart_space, "interval", seconds=1800)
352
  scheduler.start()
353
+ demo.queue(default_concurrency_limit=40).launch()
src/about.py CHANGED
@@ -1,6 +1,7 @@
1
  from dataclasses import dataclass
2
  from enum import Enum
3
 
 
4
  @dataclass
5
  class Task:
6
  benchmark: str
@@ -11,14 +12,22 @@ class Task:
11
  # Select your tasks here
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
  task0 = Task("anli_r1", "acc", "Legal Named Entity Recognition (L-NER)")
16
  task1 = Task("logiqa", "acc_norm", "Rhetorical Role Prediction (RR)")
 
 
 
 
 
 
17
 
18
- NUM_FEWSHOT = 0 # Change with your few shot
19
  # ---------------------------------------------------
20
 
21
 
 
 
 
22
 
23
  # Your leaderboard name
24
  TITLE = """<h1 align="center" id="space-title">IL-TUR leaderboard</h1>"""
 
1
  from dataclasses import dataclass
2
  from enum import Enum
3
 
4
+
5
  @dataclass
6
  class Task:
7
  benchmark: str
 
12
  # Select your tasks here
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
  task0 = Task("anli_r1", "acc", "Legal Named Entity Recognition (L-NER)")
17
  task1 = Task("logiqa", "acc_norm", "Rhetorical Role Prediction (RR)")
18
+ task2 = Task("logiqa", "acc_norm", "Court Judgment Prediction and Explanation (CJPE)")
19
+ task3 = Task("logiqa", "acc_norm", "Bail Prediction (BAIL)")
20
+ task4 = Task("logiqa", "acc_norm", "Legal Statute Identification (LSI)")
21
+ task5 = Task("logiqa", "acc_norm", "Prior Case Retrieval (PCR)")
22
+ task6 = Task("logiqa", "acc_norm", "Summarization (SUMM)")
23
+
24
 
 
25
  # ---------------------------------------------------
26
 
27
 
28
+ NUM_FEWSHOT = 0 # Change with your few shot
29
+ # ---------------------------------------------------
30
+
31
 
32
  # Your leaderboard name
33
  TITLE = """<h1 align="center" id="space-title">IL-TUR leaderboard</h1>"""