Clémentine commited on
Commit
8618a2a
1 Parent(s): 4fc3864

added collections back to main

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. src/tools/collections.py +76 -0
app.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
  import time
4
  import gradio as gr
5
  import datasets
6
- from apscheduler.schedulers.background import BackgroundScheduler
7
  from huggingface_hub import snapshot_download
8
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
9
  from gradio_space_ci import enable_space_ci
@@ -105,6 +105,8 @@ def init_space(full_init: bool = True):
105
  cols=COLS,
106
  benchmark_cols=BENCHMARK_COLS,
107
  )
 
 
108
 
109
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
110
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
3
  import time
4
  import gradio as gr
5
  import datasets
6
+ from src.tools.collections import update_collections
7
  from huggingface_hub import snapshot_download
8
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
9
  from gradio_space_ci import enable_space_ci
 
105
  cols=COLS,
106
  benchmark_cols=BENCHMARK_COLS,
107
  )
108
+ if full_init:
109
+ update_collections(leaderboard_df)
110
 
111
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
112
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
src/tools/collections.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from huggingface_hub import add_collection_item, delete_collection_item, get_collection, update_collection_item
3
+ from huggingface_hub.utils._errors import HfHubHTTPError
4
+ from pandas import DataFrame
5
+
6
+ from src.display.utils import AutoEvalColumn, ModelType
7
+ from src.envs import HF_TOKEN, PATH_TO_COLLECTION
8
+
9
+ # Specific intervals for the collections
10
+ intervals = {
11
+ "1B": pd.Interval(0, 1.5, closed="right"),
12
+ "3B": pd.Interval(2.5, 3.5, closed="neither"),
13
+ "7B": pd.Interval(6, 8, closed="neither"),
14
+ "13B": pd.Interval(10, 14, closed="neither"),
15
+ "30B": pd.Interval(25, 35, closed="neither"),
16
+ "65B": pd.Interval(60, 70, closed="neither"),
17
+ }
18
+
19
+
20
+ def _filter_by_type_and_size(df, model_type, size_interval):
21
+ """Filter DataFrame by model type and parameter size interval."""
22
+ type_emoji = model_type.value.symbol[0]
23
+ filtered_df = df[df[AutoEvalColumn.model_type_symbol.name] == type_emoji]
24
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
25
+ mask = params_column.apply(lambda x: x in size_interval)
26
+ return filtered_df.loc[mask]
27
+
28
+
29
+ def _add_models_to_collection(collection, models, model_type, size):
30
+ """Add best models to the collection and update positions."""
31
+ cur_len_collection = len(collection.items)
32
+ for ix, model in enumerate(models, start=1):
33
+ try:
34
+ collection = add_collection_item(
35
+ PATH_TO_COLLECTION,
36
+ item_id=model,
37
+ item_type="model",
38
+ exists_ok=True,
39
+ note=f"Best {model_type.to_str(' ')} model of around {size} on the leaderboard today!",
40
+ token=HF_TOKEN,
41
+ )
42
+ # Ensure position is correct if item was added
43
+ if len(collection.items) > cur_len_collection:
44
+ item_object_id = collection.items[-1].item_object_id
45
+ update_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_object_id, position=ix)
46
+ cur_len_collection = len(collection.items)
47
+ break # assuming we only add the top model
48
+ except HfHubHTTPError:
49
+ continue
50
+
51
+
52
+ def update_collections(df: DataFrame):
53
+ """Update collections by filtering and adding the best models."""
54
+ collection = get_collection(collection_slug=PATH_TO_COLLECTION, token=HF_TOKEN)
55
+ cur_best_models = []
56
+
57
+ for model_type in ModelType:
58
+ if not model_type.value.name:
59
+ continue
60
+ for size, interval in intervals.items():
61
+ filtered_df = _filter_by_type_and_size(df, model_type, interval)
62
+ best_models = list(
63
+ filtered_df.sort_values(AutoEvalColumn.average.name, ascending=False)[AutoEvalColumn.fullname.name][:10]
64
+ )
65
+ print(model_type.value.symbol, size, best_models)
66
+ _add_models_to_collection(collection, best_models, model_type, size)
67
+ cur_best_models.extend(best_models)
68
+
69
+ # Cleanup
70
+ existing_models = {item.item_id for item in collection.items}
71
+ to_remove = existing_models - set(cur_best_models)
72
+ for item_id in to_remove:
73
+ try:
74
+ delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=HF_TOKEN)
75
+ except HfHubHTTPError:
76
+ continue