Alina Lozovskaia commited on
Commit
86c3dd5
β€’
1 Parent(s): 50c352c

apply code style and quality checks to app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -27
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import pandas as pd
3
  import logging
4
  import time
5
  import gradio as gr
@@ -23,8 +22,6 @@ from src.display.utils import (
23
  COLS,
24
  EVAL_COLS,
25
  EVAL_TYPES,
26
- NUMERIC_INTERVALS,
27
- TYPES,
28
  AutoEvalColumn,
29
  ModelType,
30
  Precision,
@@ -51,11 +48,12 @@ from src.tools.collections import update_collections
51
  from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
52
 
53
  # Configure logging
54
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
55
 
56
  # Start ephemeral Spaces on PRs (see config in README.md)
57
  enable_space_ci()
58
 
 
59
  def restart_space():
60
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
61
 
@@ -68,6 +66,7 @@ def time_diff_wrapper(func):
68
  diff = end_time - start_time
69
  logging.info(f"Time taken for {func.__name__}: {diff} seconds")
70
  return result
 
71
  return wrapper
72
 
73
 
@@ -89,12 +88,13 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
89
  logging.info("Download successful")
90
  return
91
  except Exception as e:
92
- wait_time = backoff_factor ** attempt
93
  logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
94
  time.sleep(wait_time)
95
  attempt += 1
96
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
97
 
 
98
  def init_space(full_init: bool = True):
99
  """Initializes the application space, loading only necessary data."""
100
  if full_init:
@@ -120,12 +120,13 @@ def init_space(full_init: bool = True):
120
  update_collections(original_df)
121
 
122
  leaderboard_df = original_df.copy()
123
-
124
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
125
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
126
 
127
  return leaderboard_df, raw_data, original_df, eval_queue_dfs
128
 
 
129
  # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
130
  # This controls whether a full initialization should be performed.
131
  do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
@@ -153,36 +154,34 @@ with demo:
153
  value=leaderboard_df,
154
  datatype=[c.type for c in fields(AutoEvalColumn)],
155
  select_columns=SelectColumns(
156
- default_selection=[
157
- c.name
158
- for c in fields(AutoEvalColumn)
159
- if c.displayed_by_default
160
- ],
161
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
162
  label="Select Columns to Display:",
163
  ),
164
- search_columns=[
165
- AutoEvalColumn.model.name,
166
- AutoEvalColumn.fullname.name,
167
- AutoEvalColumn.license.name
168
- ],
169
- hide_columns=[
170
- c.name
171
- for c in fields(AutoEvalColumn)
172
- if c.hidden
173
- ],
174
  filter_columns=[
175
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
176
  ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
177
- ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=150, label="Select the number of parameters (B)"),
178
- ColumnFilter(AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True),
179
- ColumnFilter(AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True),
 
 
 
 
 
 
 
 
 
 
180
  ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
181
  ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
182
  ],
183
- bool_checkboxgroup_label="Hide models"
184
  )
185
-
186
  with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
187
  with gr.Row():
188
  with gr.Column():
@@ -313,4 +312,4 @@ scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
313
  scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
314
  scheduler.start()
315
 
316
- demo.queue(default_concurrency_limit=40).launch()
 
1
  import os
 
2
  import logging
3
  import time
4
  import gradio as gr
 
22
  COLS,
23
  EVAL_COLS,
24
  EVAL_TYPES,
 
 
25
  AutoEvalColumn,
26
  ModelType,
27
  Precision,
 
48
  from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
49
 
50
  # Configure logging
51
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
52
 
53
  # Start ephemeral Spaces on PRs (see config in README.md)
54
  enable_space_ci()
55
 
56
+
57
  def restart_space():
58
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
59
 
 
66
  diff = end_time - start_time
67
  logging.info(f"Time taken for {func.__name__}: {diff} seconds")
68
  return result
69
+
70
  return wrapper
71
 
72
 
 
88
  logging.info("Download successful")
89
  return
90
  except Exception as e:
91
+ wait_time = backoff_factor**attempt
92
  logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
93
  time.sleep(wait_time)
94
  attempt += 1
95
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
96
 
97
+
98
  def init_space(full_init: bool = True):
99
  """Initializes the application space, loading only necessary data."""
100
  if full_init:
 
120
  update_collections(original_df)
121
 
122
  leaderboard_df = original_df.copy()
123
+
124
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
125
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
126
 
127
  return leaderboard_df, raw_data, original_df, eval_queue_dfs
128
 
129
+
130
  # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
131
  # This controls whether a full initialization should be performed.
132
  do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
 
154
  value=leaderboard_df,
155
  datatype=[c.type for c in fields(AutoEvalColumn)],
156
  select_columns=SelectColumns(
157
+ default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
 
 
 
 
158
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
159
  label="Select Columns to Display:",
160
  ),
161
+ search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
162
+ hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
 
 
 
 
 
 
 
 
163
  filter_columns=[
164
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
165
  ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
166
+ ColumnFilter(
167
+ AutoEvalColumn.params.name,
168
+ type="slider",
169
+ min=0,
170
+ max=150,
171
+ label="Select the number of parameters (B)",
172
+ ),
173
+ ColumnFilter(
174
+ AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True
175
+ ),
176
+ ColumnFilter(
177
+ AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True
178
+ ),
179
  ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
180
  ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
181
  ],
182
+ bool_checkboxgroup_label="Hide models",
183
  )
184
+
185
  with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
186
  with gr.Row():
187
  with gr.Column():
 
312
  scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
313
  scheduler.start()
314
 
315
+ demo.queue(default_concurrency_limit=40).launch()