chaeeunlee commited on
Commit
0c86d49
·
1 Parent(s): a0fc18f

first commit, migrated from chaeeun's local

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. README.md +5 -5
  3. app.py +343 -0
  4. app_empty.py +7 -0
  5. backend-cli.py +167 -0
  6. eval-queue-bk/.DS_Store +0 -0
  7. eval-queue-bk/.gitattributes +55 -0
  8. eval-queue-bk/EleutherAI/pythia-160m_eval_request_False_float32_Original.json +1 -0
  9. eval-queue-bk/EleutherAI/pythia-410m_eval_request_False_float32_Original.json +1 -0
  10. eval-queue-bk/EleutherAI/pythia-70m_eval_request_False_float32_Original.json +1 -0
  11. eval-queue-bk/README.md +3 -0
  12. eval-queue/.DS_Store +0 -0
  13. eval-queue/.gitattributes +55 -0
  14. eval-queue/EleutherAI/pythia-160m_eval_request_False_float32_Original.json +1 -0
  15. eval-queue/EleutherAI/pythia-410m_eval_request_False_float32_Original.json +1 -0
  16. eval-queue/EleutherAI/pythia-70m_eval_request_False_float32_Original.json +1 -0
  17. eval-queue/README.md +3 -0
  18. eval-results-bk/.DS_Store +0 -0
  19. eval-results-bk/.gitattributes +55 -0
  20. eval-results-bk/EleutherAI/.DS_Store +0 -0
  21. eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 13:35:29.978568.json +0 -0
  22. eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 13:35:30.399383.json +0 -0
  23. eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 13:48:48.327864.json +0 -0
  24. eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 23:02:53.510226.json +0 -0
  25. eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 23:09:15.254321.json +0 -0
  26. eval-results-bk/EleutherAI/pythia-160m/results_hellaswag_2024-02-01 00:42:54.031633.json +0 -0
  27. eval-results-bk/EleutherAI/pythia-160m/results_hellaswag_2024-02-01 00:42:54.349884.json +0 -0
  28. eval-results-bk/EleutherAI/pythia-160m/results_pubmedqa_2024-02-01 00:28:27.147005.json +0 -0
  29. eval-results-bk/EleutherAI/pythia-160m/results_pubmedqa_2024-02-01 00:28:27.422530.json +0 -0
  30. eval-results-bk/EleutherAI/pythia-410m/results_hellaswag_2024-02-01 03:33:27.647868.json +0 -0
  31. eval-results-bk/EleutherAI/pythia-410m/results_pubmedqa_2024-02-01 03:05:20.335717.json +0 -0
  32. eval-results-bk/EleutherAI/pythia-410m/results_pubmedqa_2024-02-01 03:06:30.820158.json +0 -0
  33. eval-results-bk/EleutherAI/pythia-70m/results_hellaswag_2024-02-01 02:50:41.186578.json +0 -0
  34. eval-results-bk/EleutherAI/pythia-70m/results_hellaswag_2024-02-01 02:50:41.837997.json +0 -0
  35. eval-results-bk/README.md +3 -0
  36. eval-results/.DS_Store +0 -0
  37. eval-results/.gitattributes +55 -0
  38. eval-results/EleutherAI/.DS_Store +0 -0
  39. eval-results/EleutherAI/pythia-160m/results_hellaswag_2024-02-01 00:42:54.349884.json +0 -0
  40. eval-results/EleutherAI/pythia-160m/results_pubmedqa_2024-02-01 00:28:27.422530.json +0 -0
  41. eval-results/EleutherAI/pythia-410m/results_hellaswag_2024-02-01 03:33:28.471084.json +0 -0
  42. eval-results/EleutherAI/pythia-410m/results_pubmedqa_2024-02-01 03:06:30.820158.json +0 -0
  43. eval-results/EleutherAI/pythia-70m/results_hellaswag_2024-02-01 02:50:41.837997.json +0 -0
  44. eval-results/README.md +3 -0
  45. manage_repos.ipynb +176 -0
  46. requirements.txt +31 -0
  47. src/.DS_Store +0 -0
  48. src/__pycache__/envs.cpython-310.pyc +0 -0
  49. src/__pycache__/populate.cpython-310.pyc +0 -0
  50. src/__pycache__/utils.cpython-310.pyc +0 -0
.DS_Store ADDED
Binary file (8.2 kB). View file
 
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Biomed Probing Leaderboard
3
- emoji: 🏆
4
  colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 4.16.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Test Leaderboard
3
+ emoji: 🐢
4
  colorFrom: pink
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.15.0
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from src.display.css_html_js import custom_css
5
+
6
+ from src.display.about import (
7
+ CITATION_BUTTON_LABEL,
8
+ CITATION_BUTTON_TEXT,
9
+ EVALUATION_QUEUE_TEXT,
10
+ INTRODUCTION_TEXT,
11
+ LLM_BENCHMARKS_TEXT,
12
+ LLM_BENCHMARKS_DETAILS,
13
+ FAQ_TEXT,
14
+ TITLE,
15
+ )
16
+
17
+ from src.display.utils import (
18
+ BENCHMARK_COLS,
19
+ COLS,
20
+ EVAL_COLS,
21
+ EVAL_TYPES,
22
+ NUMERIC_INTERVALS,
23
+ TYPES,
24
+ AutoEvalColumn,
25
+ ModelType,
26
+ fields,
27
+ WeightType,
28
+ Precision
29
+ )
30
+
31
+ from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
+
33
+ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
34
+ from src.submission.submit import add_new_eval
35
+
36
+ from src.display.utils import Tasks
37
+
38
+ from huggingface_hub import snapshot_download
39
+
40
+ ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## -------
41
+
42
+ def restart_space():
43
+ API.restart_space(repo_id=REPO_ID)#, token=H4_TOKEN)
44
+
45
+ def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
46
+ try:
47
+ print(f"local_dir for snapshot download = {local_dir}")
48
+ snapshot_download(repo_id=repo_id, local_dir=local_dir, repo_type=repo_type, tqdm_class=tqdm_class, etag_timeout=etag_timeout)
49
+ except Exception:
50
+ print(f"ui_snapshot_download failed. restarting space...")
51
+ restart_space()
52
+
53
+ # Searching and filtering
54
+ def update_table(hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str):
55
+ print(f"hidden_df = {hidden_df}")
56
+ show_deleted = True
57
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
58
+
59
+ print(f"filtered_df = {filtered_df}")
60
+ filtered_df = filter_queries(query, filtered_df)
61
+ df = select_columns(filtered_df, columns)
62
+ print(f"df = {df}")
63
+ return df
64
+
65
+ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
66
+ return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
67
+
68
+
69
+ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
70
+ always_here_cols = [
71
+ AutoEvalColumn.model_type_symbol.name,
72
+ AutoEvalColumn.model.name,
73
+ ]
74
+ # We use COLS to maintain sorting
75
+ filtered_df = df[
76
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
77
+ ]
78
+ return filtered_df
79
+
80
+ def filter_queries(query: str, filtered_df: pd.DataFrame):
81
+ final_df = []
82
+ if query != "":
83
+ queries = [q.strip() for q in query.split(";")]
84
+ for _q in queries:
85
+ _q = _q.strip()
86
+ if _q != "":
87
+ temp_filtered_df = search_table(filtered_df, _q)
88
+ if len(temp_filtered_df) > 0:
89
+ final_df.append(temp_filtered_df)
90
+ if len(final_df) > 0:
91
+ filtered_df = pd.concat(final_df)
92
+ filtered_df = filtered_df.drop_duplicates(
93
+ subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
94
+ )
95
+
96
+ return filtered_df
97
+
98
+
99
+ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool) -> pd.DataFrame:
100
+
101
+ print(f"filter_models()'s df: {df}\n")
102
+ # Show all models
103
+ if show_deleted:
104
+ filtered_df = df
105
+ else: # Show only still on the hub models
106
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] is True]
107
+
108
+ type_emoji = [t[0] for t in type_query]
109
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
110
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
111
+
112
+ numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
113
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
114
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
115
+ filtered_df = filtered_df.loc[mask]
116
+
117
+ return filtered_df
118
+
119
+
120
+ ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## ------- ## -------
121
+
122
+ ui_snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
123
+ ui_snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
124
+
125
+
126
+ raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) # k the problem is that the results are only saved in _bk dirs.
127
+ leaderboard_df = original_df.copy()
128
+
129
+
130
+ ################################################################################################################################
131
+ demo = gr.Blocks(css=custom_css)
132
+ with demo:
133
+ gr.HTML(TITLE)
134
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
135
+
136
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
137
+
138
+ # toggle break 1: this tab just RENDERS existing result files on remote repo.
139
+ with gr.TabItem("Benchmarks", elem_id="llm-benchmark-tab-table", id=0):
140
+
141
+ with gr.Row():
142
+ with gr.Column():
143
+ with gr.Row():
144
+ search_bar = gr.Textbox(placeholder=" 🔍 Model search (separate multiple queries with `;`)", show_label=False, elem_id="search-bar",)
145
+ with gr.Row():
146
+ shown_columns = gr.CheckboxGroup(
147
+ choices=[
148
+ c.name
149
+ for c in fields(AutoEvalColumn)
150
+ if not c.hidden and not c.never_hidden and not c.dummy
151
+ ],
152
+ value=[
153
+ c.name
154
+ for c in fields(AutoEvalColumn)
155
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
156
+ ],
157
+ label="Select columns to show",
158
+ elem_id="column-select",
159
+ interactive=True,
160
+ )
161
+
162
+ with gr.Column(min_width=320):
163
+ filter_columns_type = gr.CheckboxGroup(
164
+ label="Model types",
165
+ choices=[t.to_str() for t in ModelType],
166
+ value=[t.to_str() for t in ModelType],
167
+ interactive=True,
168
+ elem_id="filter-columns-type",
169
+ )
170
+ filter_columns_precision = gr.CheckboxGroup(
171
+ label="Precision",
172
+ choices=[i.value.name for i in Precision],
173
+ value=[i.value.name for i in Precision],
174
+ interactive=True,
175
+ elem_id="filter-columns-precision",
176
+ )
177
+ filter_columns_size = gr.CheckboxGroup(
178
+ label="Model sizes (in billions of parameters)",
179
+ choices=list(NUMERIC_INTERVALS.keys()),
180
+ value=list(NUMERIC_INTERVALS.keys()),
181
+ interactive=True,
182
+ elem_id="filter-columns-size",
183
+ )
184
+
185
+ # leaderboard_table = gr.components.Dataframe(
186
+ # value=leaderboard_df[
187
+ # [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
188
+ # + shown_columns.value
189
+ # + [AutoEvalColumn.dummy.name]
190
+ # ] if leaderboard_df.empty is False else leaderboard_df,
191
+ # headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
192
+ # datatype=TYPES,
193
+ # elem_id="leaderboard-table",
194
+ # interactive=False,
195
+ # visible=True,
196
+ # column_widths=["2%", "20%"]
197
+ # )
198
+ leaderboard_table = gr.components.Dataframe(
199
+ # value=leaderboard_df,
200
+ value=leaderboard_df[
201
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
202
+ + shown_columns.value
203
+ + [AutoEvalColumn.dummy.name]
204
+ ] if leaderboard_df.empty is False else leaderboard_df,
205
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
206
+ datatype=TYPES,
207
+ elem_id="leaderboard-table",
208
+ interactive=False,
209
+ visible=True,
210
+ # column_widths=["2%", "20%"]
211
+ )
212
+ # Dummy leaderboard for handling the case when the user uses backspace key
213
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
214
+ value=original_df[COLS] if original_df.empty is False else original_df,
215
+ headers=COLS,
216
+ datatype=TYPES,
217
+ visible=False
218
+ )
219
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
220
+ selector.change(
221
+ update_table,
222
+ [
223
+ hidden_leaderboard_table_for_search,
224
+ shown_columns,
225
+ filter_columns_type,
226
+ filter_columns_precision,
227
+ filter_columns_size,
228
+ search_bar,
229
+ ],
230
+ leaderboard_table,
231
+ queue=True,
232
+ )
233
+
234
+ # toggle break 2: Submission -> runs add_new_eval() (actual evaluation is done on backend when backend-cli.py is run.)
235
+ with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
236
+ # with gr.Column():
237
+ # with gr.Row():
238
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
239
+
240
+ # with gr.Column():
241
+ # with gr.Accordion(
242
+ # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
243
+ # open=False,
244
+ # ):
245
+ # with gr.Row():
246
+ # finished_eval_table = gr.components.Dataframe(
247
+ # value=finished_eval_queue_df,
248
+ # headers=EVAL_COLS,
249
+ # datatype=EVAL_TYPES,
250
+ # row_count=5
251
+ # )
252
+ # with gr.Accordion(
253
+ # f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
254
+ # open=False,
255
+ # ):
256
+ # with gr.Row():
257
+ # running_eval_table = gr.components.Dataframe(
258
+ # value=running_eval_queue_df,
259
+ # headers=EVAL_COLS,
260
+ # datatype=EVAL_TYPES,
261
+ # row_count=5
262
+ # )
263
+
264
+ # with gr.Accordion(
265
+ # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
266
+ # open=False,
267
+ # ):
268
+ # with gr.Row():
269
+ # pending_eval_table = gr.components.Dataframe(
270
+ # value=pending_eval_queue_df,
271
+ # headers=EVAL_COLS,
272
+ # datatype=EVAL_TYPES,
273
+ # row_count=5
274
+ # )
275
+ with gr.Row():
276
+ gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
277
+
278
+ with gr.Row():
279
+ with gr.Column():
280
+ model_name_textbox = gr.Textbox(label="Model name")
281
+ # You can use the revision parameter to point to the specific commit hash when downloading.
282
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
283
+ private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
284
+ model_type = gr.Dropdown(
285
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
286
+ label="Model type",
287
+ multiselect=False,
288
+ value=None,
289
+ interactive=True,
290
+ )
291
+
292
+ with gr.Column():
293
+ precision = gr.Dropdown(
294
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
295
+ label="Precision",
296
+ multiselect=False,
297
+ value="float32",
298
+ interactive=True,
299
+ )
300
+ weight_type = gr.Dropdown(
301
+ choices=[i.value.name for i in WeightType],
302
+ label="Weights type",
303
+ multiselect=False,
304
+ value="Original",
305
+ interactive=True,
306
+ )
307
+
308
+
309
+ requested_tasks = gr.CheckboxGroup(
310
+ choices=[ (i.value.col_name, i.value) for i in Tasks],
311
+
312
+ label="Select tasks",
313
+ elem_id="task-select",
314
+ interactive=True,
315
+ )
316
+
317
+
318
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
319
+
320
+ submit_button = gr.Button("Submit Eval")
321
+ submission_result = gr.Markdown()
322
+
323
+ # we need to add task specification argument here as well.
324
+ submit_button.click(
325
+ add_new_eval,
326
+ [
327
+ model_name_textbox,
328
+
329
+ requested_tasks, # is this a list of str or class Task? i think it's Task.
330
+
331
+ base_model_name_textbox,
332
+ revision_name_textbox,
333
+ precision,
334
+ private,
335
+ weight_type,
336
+ model_type,
337
+ ],
338
+ submission_result,
339
+ )
340
+
341
+
342
+ # demo.queue(default_concurrency_limit=40).launch()
343
+ demo.launch()
app_empty.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ # iface.launch()
backend-cli.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+ import json
5
+
6
+ import random
7
+ from datetime import datetime
8
+
9
+ from src.backend.run_eval_suite import run_evaluation
10
+ from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
11
+ from src.backend.sort_queue import sort_models_by_priority
12
+
13
+
14
+ from src.backend.envs import EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE, LIMIT, Tasks, Task, num_fewshots
15
+
16
+ from src.backend.manage_requests import EvalRequest
17
+ from src.leaderboard.read_evals import EvalResult
18
+
19
+ from src.envs import QUEUE_REPO, RESULTS_REPO, API
20
+ from src.utils import my_snapshot_download
21
+
22
+ import time
23
+
24
+ import logging
25
+ import pprint
26
+
27
+
28
+ def my_set_eval_request(api, eval_request, set_to_status, hf_repo, local_dir):
29
+ for i in range(10):
30
+ try:
31
+ set_eval_request(api=api, eval_request=eval_request, set_to_status=set_to_status, hf_repo=hf_repo, local_dir=local_dir)
32
+ return
33
+ except Exception:
34
+ time.sleep(60)
35
+ return
36
+
37
+
38
+ logging.getLogger("openai").setLevel(logging.WARNING)
39
+
40
+ logging.basicConfig(level=logging.ERROR)
41
+ pp = pprint.PrettyPrinter(width=80)
42
+
43
+ PENDING_STATUS = "PENDING"
44
+ RUNNING_STATUS = "RUNNING"
45
+ FINISHED_STATUS = "FINISHED"
46
+ FAILED_STATUS = "FAILED"
47
+
48
+ TASKS_HARNESS = [task.value for task in Tasks]
49
+
50
+ # starts by downloading results and requests. makes sense since we want to be able to use different backend servers!
51
+ my_snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
52
+ my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
53
+
54
+
55
+ def sanity_checks():
56
+ print(f'Device: {DEVICE}')
57
+
58
+ # pull the eval dataset from the hub and parse any eval requests
59
+ # check completed evals and set them to finished
60
+ my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
61
+ check_completed_evals(api=API, checked_status=RUNNING_STATUS, completed_status=FINISHED_STATUS,
62
+ failed_status=FAILED_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND,
63
+ hf_repo_results=RESULTS_REPO, local_dir_results=EVAL_RESULTS_PATH_BACKEND)
64
+ return
65
+
66
+
67
+ def request_to_result_name(request: EvalRequest) -> str:
68
+
69
+ org_and_model = request.model.split("/", 1)
70
+ if len(org_and_model) == 1:
71
+ model = org_and_model[0]
72
+ res = f"{model}_{request.precision}"
73
+ else:
74
+ org = org_and_model[0]
75
+ model = org_and_model[1]
76
+ res = f"{org}_{model}_{request.precision}"
77
+ return res
78
+
79
+ # doesn't make distinctions for tasks since the original code runs eval on ALL tasks.
80
+ def process_evaluation(task_name: str, eval_request: EvalRequest) -> dict:
81
+ # batch_size = 1
82
+ batch_size = "auto"
83
+
84
+ # might not have to get the benchmark.
85
+ print(f"task_name parameter in process_evaluation() = {task_name}") #, task_names=[task.benchmark] = {[task.benchmark]}")
86
+
87
+ num_fewshot = num_fewshots[task_name]
88
+
89
+ results = run_evaluation(eval_request=eval_request, task_names=task_name, num_fewshot=num_fewshot,
90
+ batch_size=batch_size, device=DEVICE, use_cache=None, limit=LIMIT)
91
+
92
+ print('RESULTS', results)
93
+
94
+ dumped = json.dumps(results, indent=2, default=lambda o: '<not serializable>')
95
+ print(dumped)
96
+
97
+ output_path = os.path.join(EVAL_RESULTS_PATH_BACKEND, *eval_request.model.split("/"), f"results_{task_name}_{datetime.now()}.json")
98
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
99
+ with open(output_path, "w") as f:
100
+ f.write(dumped)
101
+
102
+ my_snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
103
+ API.upload_file(path_or_fileobj=output_path, path_in_repo=f"{eval_request.model}/results_{task_name}_{datetime.now()}.json",
104
+ repo_id=RESULTS_REPO, repo_type="dataset")
105
+ return results
106
+
107
+
108
+ # the rendering is done with files in local repo.
109
+ def process_pending_requests() -> bool:
110
+ sanity_checks()
111
+
112
+ current_pending_status = [PENDING_STATUS]
113
+
114
+ # Get all eval request that are PENDING, if you want to run other evals, change this parameter
115
+ # GETTING REQUESTS FROM THE HUB NOT LOCAL DIR.
116
+ eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
117
+ # Sort the evals by priority (first submitted first run)
118
+ eval_requests = sort_models_by_priority(api=API, models=eval_requests)
119
+
120
+ random.shuffle(eval_requests)
121
+
122
+ # this says zero
123
+ print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
124
+
125
+ if len(eval_requests) == 0:
126
+ return False
127
+
128
+ eval_request = eval_requests[0]
129
+ pp.pprint(eval_request)
130
+
131
+ my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
132
+ my_set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
133
+
134
+ # task_lst = TASKS_HARNESS.copy()
135
+ task_lst = eval_request.get_user_requested_task_names()
136
+ random.shuffle(task_lst)
137
+ print(f"task_lst in process_pending_requests(): {task_lst}")
138
+
139
+ for task_name in task_lst:
140
+
141
+ results = process_evaluation(task_name, eval_request)
142
+
143
+ my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
144
+ my_set_eval_request(api=API, eval_request=eval_request, set_to_status=FINISHED_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
145
+
146
+ return True
147
+
148
+
149
+ if __name__ == "__main__":
150
+ # wait = True
151
+
152
+ # import socket
153
+ # if socket.gethostname() in {'hamburg'} or os.path.isdir("/home/pminervi"):
154
+ # wait = False
155
+
156
+ # if wait:
157
+ # time.sleep(60 * random.randint(2, 5))
158
+ # pass
159
+
160
+ # res = False
161
+ res = process_pending_requests()
162
+
163
+ # if res is False:
164
+ # res = process_finished_requests(100)
165
+
166
+ # if res is False:
167
+ # res = process_finished_requests(0)
eval-queue-bk/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval-queue-bk/.gitattributes ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
eval-queue-bk/EleutherAI/pythia-160m_eval_request_False_float32_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "EleutherAI/pythia-160m", "requested_tasks": [{"benchmark": "hellaswag", "metric": "acc_norm", "col_name": "HellaSwag"}], "base_model": "", "revision": "main", "private": false, "precision": "float32", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2024-02-01T00:29:51Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 17, "params": 0.213, "license": "apache-2.0"}
eval-queue-bk/EleutherAI/pythia-410m_eval_request_False_float32_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "EleutherAI/pythia-410m", "requested_tasks": [{"benchmark": "hellaswag", "metric": "acc_norm", "col_name": "HellaSwag"}, {"benchmark": "pubmedqa", "metric": "acc", "col_name": "PubMedQA"}], "base_model": "", "revision": "main", "private": false, "precision": "float32", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2024-02-01T02:54:14Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 16, "params": 0.506, "license": "apache-2.0"}
eval-queue-bk/EleutherAI/pythia-70m_eval_request_False_float32_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "EleutherAI/pythia-70m", "requested_tasks": [{"benchmark": "hellaswag", "metric": "acc_norm", "col_name": "HellaSwag"}], "base_model": "", "revision": "main", "private": false, "precision": "float32", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2024-02-01T02:42:51Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 32, "params": 0.096, "license": "apache-2.0"}
eval-queue-bk/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
eval-queue/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval-queue/.gitattributes ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
eval-queue/EleutherAI/pythia-160m_eval_request_False_float32_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "EleutherAI/pythia-160m", "requested_tasks": [{"benchmark": "hellaswag", "metric": "acc_norm", "col_name": "HellaSwag"}], "base_model": "", "revision": "main", "private": false, "precision": "float32", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2024-02-01T00:29:51Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 17, "params": 0.213, "license": "apache-2.0"}
eval-queue/EleutherAI/pythia-410m_eval_request_False_float32_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "EleutherAI/pythia-410m", "requested_tasks": [{"benchmark": "hellaswag", "metric": "acc_norm", "col_name": "HellaSwag"}, {"benchmark": "pubmedqa", "metric": "acc", "col_name": "PubMedQA"}], "base_model": "", "revision": "main", "private": false, "precision": "float32", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2024-02-01T02:54:14Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 16, "params": 0.506, "license": "apache-2.0"}
eval-queue/EleutherAI/pythia-70m_eval_request_False_float32_Original.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "EleutherAI/pythia-70m", "requested_tasks": [{"benchmark": "hellaswag", "metric": "acc_norm", "col_name": "HellaSwag"}], "base_model": "", "revision": "main", "private": false, "precision": "float32", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2024-02-01T02:42:51Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 32, "params": 0.096, "license": "apache-2.0"}
eval-queue/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
eval-results-bk/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval-results-bk/.gitattributes ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
eval-results-bk/EleutherAI/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 13:35:29.978568.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 13:35:30.399383.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 13:48:48.327864.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 23:02:53.510226.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_2024-01-31 23:09:15.254321.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_hellaswag_2024-02-01 00:42:54.031633.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_hellaswag_2024-02-01 00:42:54.349884.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_pubmedqa_2024-02-01 00:28:27.147005.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-160m/results_pubmedqa_2024-02-01 00:28:27.422530.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-410m/results_hellaswag_2024-02-01 03:33:27.647868.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-410m/results_pubmedqa_2024-02-01 03:05:20.335717.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-410m/results_pubmedqa_2024-02-01 03:06:30.820158.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-70m/results_hellaswag_2024-02-01 02:50:41.186578.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/EleutherAI/pythia-70m/results_hellaswag_2024-02-01 02:50:41.837997.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results-bk/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
eval-results/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval-results/.gitattributes ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
eval-results/EleutherAI/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval-results/EleutherAI/pythia-160m/results_hellaswag_2024-02-01 00:42:54.349884.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results/EleutherAI/pythia-160m/results_pubmedqa_2024-02-01 00:28:27.422530.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results/EleutherAI/pythia-410m/results_hellaswag_2024-02-01 03:33:28.471084.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results/EleutherAI/pythia-410m/results_pubmedqa_2024-02-01 03:06:30.820158.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results/EleutherAI/pythia-70m/results_hellaswag_2024-02-01 02:50:41.837997.json ADDED
The diff for this file is too large to render. See raw diff
 
eval-results/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
manage_repos.ipynb ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "https://huggingface.co/datasets/chaeeunlee/test_requests\n",
8
+ "\n",
9
+ "https://huggingface.co/datasets/chaeeunlee/test_results"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 18,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "data": {
19
+ "text/plain": [
20
+ "'\\n( path_in_repo: str\\nrepo_id: str\\ntoken: typing.Optional[str] = None\\nrepo_type: typing.Optional[str] = Nonerevision: typing.Optional[str] = Nonecommit_message: typing.Optional[str] = Nonecommit_description: typing.Optional[str] = Nonecreate_pr: typing.Optional[bool] = Noneparent_commit: typing.Optional[str] = None )\\n'"
21
+ ]
22
+ },
23
+ "execution_count": 18,
24
+ "metadata": {},
25
+ "output_type": "execute_result"
26
+ }
27
+ ],
28
+ "source": [
29
+ "from src.envs import H4_TOKEN, API, QUEUE_REPO, RESULTS_REPO\n",
30
+ "\n",
31
+ "from huggingface_hub import HfApi\n",
32
+ "\n",
33
+ "'''\n",
34
+ "( path_in_repo: str\n",
35
+ "repo_id: str\n",
36
+ "token: typing.Optional[str] = None\n",
37
+ "repo_type: typing.Optional[str] = Nonerevision: typing.Optional[str] = Nonecommit_message: typing.Optional[str] = Nonecommit_description: typing.Optional[str] = Nonecreate_pr: typing.Optional[bool] = Noneparent_commit: typing.Optional[str] = None )\n",
38
+ "'''\n",
39
+ "\n"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 38,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "res = API.delete_folder(path_in_repo='EleutherAI/', repo_id=RESULTS_REPO, repo_type='dataset')"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 37,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "res = API.delete_folder(path_in_repo='EleutherAI/', repo_id=QUEUE_REPO, repo_type='dataset')"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "# import os\n",
67
+ "\n",
68
+ "# for root, _, files in os.walk(results_path):\n",
69
+ "# # We should only have json files in model results\n",
70
+ "# if len(files) == 0 or any([not f.endswith(\".json\") for f in files]):\n",
71
+ "# continue\n",
72
+ "\n",
73
+ "# # Sort the files by date\n",
74
+ "# try:\n",
75
+ "# files.sort(key=lambda x: x.removesuffix(\".json\").removeprefix(\"results_\")[:-7])\n",
76
+ "# except dateutil.parser._parser.ParserError:\n",
77
+ "# files = [files[-1]]\n",
78
+ "\n",
79
+ "\n",
80
+ "# print(f\"files = {files}\")\n",
81
+ "\n",
82
+ "# for file in files:\n",
83
+ "# model_result_filepaths.append(os.path.join(root, file))"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "metadata": {},
90
+ "outputs": [
91
+ {
92
+ "name": "stdout",
93
+ "output_type": "stream",
94
+ "text": [
95
+ "DatasetInfo(id='chaeeunlee/test_requests', author='chaeeunlee', sha='c7f4d0c0b1207cc773dcd0b1df49cd6a883e02be', created_at=datetime.datetime(2024, 1, 31, 11, 19, 22, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2024, 1, 31, 19, 55, 30, tzinfo=datetime.timezone.utc), private=False, gated=False, disabled=False, downloads=0, likes=0, paperswithcode_id=None, tags=['license:mit', 'region:us'], card_data={'annotations_creators': None, 'language_creators': None, 'language': None, 'license': 'mit', 'multilinguality': None, 'size_categories': None, 'source_datasets': None, 'task_categories': None, 'task_ids': None, 'paperswithcode_id': None, 'pretty_name': None, 'config_names': None, 'train_eval_index': None}, siblings=[RepoSibling(rfilename='.gitattributes', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='EleutherAI/pythia-160m_eval_request_False_float32_Original.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='README.md', size=None, blob_id=None, lfs=None)])\n"
96
+ ]
97
+ }
98
+ ],
99
+ "source": [
100
+ "info = API.dataset_info(repo_id=QUEUE_REPO)\n",
101
+ "print(info)"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 21,
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": [
110
+ "from huggingface_hub import HfApi\n",
111
+ "\n",
112
+ "def print_repo_directory_structure(api, repo_id, is_dataset=True):\n",
113
+ " \"\"\"\n",
114
+ " Print the directory structure of a Hugging Face repository.\n",
115
+ "\n",
116
+ " Parameters:\n",
117
+ " - repo_id (str): Repository ID in the format \"username/reponame\".\n",
118
+ " \"\"\"\n",
119
+ " # api = HfApi()\n",
120
+ " if is_dataset:\n",
121
+ " repo_files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')\n",
122
+ " else:\n",
123
+ " repo_files = api.list_repo_files(repo_id=repo_id)\n",
124
+ "\n",
125
+ "\n",
126
+ " print(f\"Directory structure of {repo_id}:\")\n",
127
+ " print()\n",
128
+ " for file_path in repo_files:\n",
129
+ " print(file_path)\n",
130
+ "\n"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": 35,
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "name": "stdout",
140
+ "output_type": "stream",
141
+ "text": [
142
+ "Directory structure of chaeeunlee/test_requests:\n",
143
+ "\n",
144
+ ".gitattributes\n",
145
+ "README.md\n"
146
+ ]
147
+ }
148
+ ],
149
+ "source": [
150
+ "repo_id = QUEUE_REPO # Replace with the target repository ID\n",
151
+ "print_repo_directory_structure(API, repo_id)"
152
+ ]
153
+ }
154
+ ],
155
+ "metadata": {
156
+ "kernelspec": {
157
+ "display_name": "lb",
158
+ "language": "python",
159
+ "name": "python3"
160
+ },
161
+ "language_info": {
162
+ "codemirror_mode": {
163
+ "name": "ipython",
164
+ "version": 3
165
+ },
166
+ "file_extension": ".py",
167
+ "mimetype": "text/x-python",
168
+ "name": "python",
169
+ "nbconvert_exporter": "python",
170
+ "pygments_lexer": "ipython3",
171
+ "version": "3.10.13"
172
+ }
173
+ },
174
+ "nbformat": 4,
175
+ "nbformat_minor": 2
176
+ }
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ colorama
3
+ APScheduler
4
+ black
5
+ click
6
+ datasets
7
+ gradio
8
+ gradio_client
9
+ huggingface-hub
10
+ matplotlib
11
+ numpy
12
+ pandas
13
+ plotly
14
+ python-dateutil
15
+ requests
16
+ semantic-version
17
+ tqdm
18
+ transformers>=4.36.0,<4.37.0
19
+ tokenizers>=0.15.0
20
+ lm_eval[ifeval] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git
21
+ accelerate
22
+ sentencepiece
23
+ langdetect
24
+ sacrebleu
25
+ cchardet
26
+ rouge_score
27
+ bert-score
28
+ evaluate
29
+ spacy
30
+ selfcheckgpt
31
+ immutabledict
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/__pycache__/envs.cpython-310.pyc ADDED
Binary file (999 Bytes). View file
 
src/__pycache__/populate.cpython-310.pyc ADDED
Binary file (2.79 kB). View file
 
src/__pycache__/utils.cpython-310.pyc ADDED
Binary file (1.34 kB). View file