Spaces:
Runtime error
Runtime error
Merge pull request #28 from huggingface/flush-jobs
Browse files- app.py +25 -3
- notebooks/flush-prediction-repos.ipynb +177 -0
app.py
CHANGED
@@ -4,12 +4,14 @@ from pathlib import Path
|
|
4 |
|
5 |
import pandas as pd
|
6 |
import streamlit as st
|
|
|
7 |
from datasets import get_dataset_config_names
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import list_datasets
|
10 |
|
11 |
from evaluation import filter_evaluated_models
|
12 |
from utils import (
|
|
|
13 |
commit_evaluation_log,
|
14 |
format_col_mapping,
|
15 |
get_compatible_models,
|
@@ -146,9 +148,8 @@ selected_dataset = st.selectbox(
|
|
146 |
"Select a dataset",
|
147 |
all_datasets,
|
148 |
index=all_datasets.index(default_dataset),
|
149 |
-
help="""Datasets with metadata can be evaluated with 1-click.
|
150 |
-
|
151 |
-
evaluation metadata to a dataset.""",
|
152 |
)
|
153 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
154 |
|
@@ -495,6 +496,18 @@ with st.form(key="form"):
|
|
495 |
).json()
|
496 |
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
497 |
if train_json_resp["success"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
st.success("✅ Successfully submitted evaluation job!")
|
499 |
st.markdown(
|
500 |
f"""
|
@@ -506,6 +519,15 @@ with st.form(key="form"):
|
|
506 |
Check your email for notifications.
|
507 |
* 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
508 |
to view the results from your submission once the Hub pull request is merged.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
"""
|
510 |
)
|
511 |
print("INFO -- Pushing evaluation job logs to the Hub")
|
|
|
4 |
|
5 |
import pandas as pd
|
6 |
import streamlit as st
|
7 |
+
import yaml
|
8 |
from datasets import get_dataset_config_names
|
9 |
from dotenv import load_dotenv
|
10 |
from huggingface_hub import list_datasets
|
11 |
|
12 |
from evaluation import filter_evaluated_models
|
13 |
from utils import (
|
14 |
+
AUTOTRAIN_TASK_TO_HUB_TASK,
|
15 |
commit_evaluation_log,
|
16 |
format_col_mapping,
|
17 |
get_compatible_models,
|
|
|
148 |
"Select a dataset",
|
149 |
all_datasets,
|
150 |
index=all_datasets.index(default_dataset),
|
151 |
+
help="""Datasets with metadata can be evaluated with 1-click. Configure an evaluation job to add \
|
152 |
+
new metadata to a dataset card.""",
|
|
|
153 |
)
|
154 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
155 |
|
|
|
496 |
).json()
|
497 |
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
498 |
if train_json_resp["success"]:
|
499 |
+
train_eval_index = {
|
500 |
+
"train-eval-index": [
|
501 |
+
{
|
502 |
+
"config": selected_config,
|
503 |
+
"task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
|
504 |
+
"task_id": selected_task,
|
505 |
+
"splits": {"eval_split": selected_split},
|
506 |
+
"col_mapping": col_mapping,
|
507 |
+
}
|
508 |
+
]
|
509 |
+
}
|
510 |
+
selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
|
511 |
st.success("✅ Successfully submitted evaluation job!")
|
512 |
st.markdown(
|
513 |
f"""
|
|
|
519 |
Check your email for notifications.
|
520 |
* 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
521 |
to view the results from your submission once the Hub pull request is merged.
|
522 |
+
* Add the following metadata to the \
|
523 |
+
[dataset card](https://huggingface.co/datasets/{selected_dataset}/blob/main/README.md) \
|
524 |
+
to enable 1-click evaluations:
|
525 |
+
"""
|
526 |
+
)
|
527 |
+
st.markdown(
|
528 |
+
f"""
|
529 |
+
```yaml
|
530 |
+
{selected_metadata}
|
531 |
"""
|
532 |
)
|
533 |
print("INFO -- Pushing evaluation job logs to the Hub")
|
notebooks/flush-prediction-repos.ipynb
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "c8093b9e-ca6a-423d-96c3-5fe21f7109a1",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"## Imports"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "code",
|
13 |
+
"execution_count": 1,
|
14 |
+
"id": "efe8cda7-a687-4867-b1f0-8efbcd428681",
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"import os\n",
|
19 |
+
"from pathlib import Path\n",
|
20 |
+
"\n",
|
21 |
+
"from dotenv import load_dotenv\n",
|
22 |
+
"from huggingface_hub import DatasetFilter, delete_repo, list_datasets\n",
|
23 |
+
"from tqdm.auto import tqdm\n",
|
24 |
+
"\n",
|
25 |
+
"if Path(\".env\").is_file():\n",
|
26 |
+
" load_dotenv(\".env\")\n",
|
27 |
+
"\n",
|
28 |
+
"HF_TOKEN = os.getenv(\"HF_TOKEN\")"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "markdown",
|
33 |
+
"id": "8f6e01f0-b658-451f-999c-e08d9f4bbbd3",
|
34 |
+
"metadata": {},
|
35 |
+
"source": [
|
36 |
+
"## Get all prediction repos from autoevaluate org"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 2,
|
42 |
+
"id": "2e369478-66d3-498d-a8fd-95bc9180f362",
|
43 |
+
"metadata": {},
|
44 |
+
"outputs": [],
|
45 |
+
"source": [
|
46 |
+
"def get_prediction_repos():\n",
|
47 |
+
" all_repos = list_datasets(author=\"autoevaluate\")\n",
|
48 |
+
" prediction_repos = [\n",
|
49 |
+
" repo for repo in all_repos if repo.id.split(\"/\")[1].startswith(\"autoeval-\")\n",
|
50 |
+
" ]\n",
|
51 |
+
" return prediction_repos"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cell_type": "code",
|
56 |
+
"execution_count": 3,
|
57 |
+
"id": "542db019-d01f-42f5-bef4-888dae8eeadb",
|
58 |
+
"metadata": {},
|
59 |
+
"outputs": [
|
60 |
+
{
|
61 |
+
"data": {
|
62 |
+
"text/plain": [
|
63 |
+
"66"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
"execution_count": 3,
|
67 |
+
"metadata": {},
|
68 |
+
"output_type": "execute_result"
|
69 |
+
}
|
70 |
+
],
|
71 |
+
"source": [
|
72 |
+
"prediction_repos = get_prediction_repos()\n",
|
73 |
+
"len(prediction_repos)"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 4,
|
79 |
+
"id": "331cfabf-4b73-490f-8d6a-86b5bc162666",
|
80 |
+
"metadata": {},
|
81 |
+
"outputs": [
|
82 |
+
{
|
83 |
+
"data": {
|
84 |
+
"text/plain": [
|
85 |
+
"DatasetInfo: {\n",
|
86 |
+
"\tid: autoevaluate/autoeval-staging-eval-project-9dcc51b5-6464670\n",
|
87 |
+
"\tsha: d3bb02be592d167f7a217ac9341d187142d9a90a\n",
|
88 |
+
"\tlastModified: 2022-06-13T14:54:34.000Z\n",
|
89 |
+
"\ttags: ['type:predictions', 'tags:autotrain', 'tags:evaluation', 'datasets:glue']\n",
|
90 |
+
"\tprivate: False\n",
|
91 |
+
"\tauthor: autoevaluate\n",
|
92 |
+
"\tdescription: None\n",
|
93 |
+
"\tcitation: None\n",
|
94 |
+
"\tcardData: None\n",
|
95 |
+
"\tsiblings: None\n",
|
96 |
+
"\tgated: False\n",
|
97 |
+
"\tdownloads: 12\n",
|
98 |
+
"}"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
"execution_count": 4,
|
102 |
+
"metadata": {},
|
103 |
+
"output_type": "execute_result"
|
104 |
+
}
|
105 |
+
],
|
106 |
+
"source": [
|
107 |
+
"prediction_repos[0]"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"cell_type": "markdown",
|
112 |
+
"id": "57a86b69-ffe8-4035-8f3d-5c917d8ce7bf",
|
113 |
+
"metadata": {},
|
114 |
+
"source": [
|
115 |
+
"## Delete all prediction repos"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"execution_count": 5,
|
121 |
+
"id": "6c8e23e7-2a6d-437b-9742-17f37684d9eb",
|
122 |
+
"metadata": {},
|
123 |
+
"outputs": [
|
124 |
+
{
|
125 |
+
"data": {
|
126 |
+
"application/vnd.jupyter.widget-view+json": {
|
127 |
+
"model_id": "06fa304dcc6d44e39205b20a5e488052",
|
128 |
+
"version_major": 2,
|
129 |
+
"version_minor": 0
|
130 |
+
},
|
131 |
+
"text/plain": [
|
132 |
+
" 0%| | 0/66 [00:00<?, ?it/s]"
|
133 |
+
]
|
134 |
+
},
|
135 |
+
"metadata": {},
|
136 |
+
"output_type": "display_data"
|
137 |
+
}
|
138 |
+
],
|
139 |
+
"source": [
|
140 |
+
"for repo in tqdm(prediction_repos):\n",
|
141 |
+
" delete_repo(\n",
|
142 |
+
" repo_id=repo.id,\n",
|
143 |
+
" repo_type=\"dataset\",\n",
|
144 |
+
" )"
|
145 |
+
]
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"cell_type": "code",
|
149 |
+
"execution_count": null,
|
150 |
+
"id": "7d64b0aa-d05f-4497-9bd2-eb2fc0d8bd7a",
|
151 |
+
"metadata": {},
|
152 |
+
"outputs": [],
|
153 |
+
"source": []
|
154 |
+
}
|
155 |
+
],
|
156 |
+
"metadata": {
|
157 |
+
"kernelspec": {
|
158 |
+
"display_name": "autoevaluate",
|
159 |
+
"language": "python",
|
160 |
+
"name": "autoevaluate"
|
161 |
+
},
|
162 |
+
"language_info": {
|
163 |
+
"codemirror_mode": {
|
164 |
+
"name": "ipython",
|
165 |
+
"version": 3
|
166 |
+
},
|
167 |
+
"file_extension": ".py",
|
168 |
+
"mimetype": "text/x-python",
|
169 |
+
"name": "python",
|
170 |
+
"nbconvert_exporter": "python",
|
171 |
+
"pygments_lexer": "ipython3",
|
172 |
+
"version": "3.8.13"
|
173 |
+
}
|
174 |
+
},
|
175 |
+
"nbformat": 4,
|
176 |
+
"nbformat_minor": 5
|
177 |
+
}
|