lewtun HF staff commited on
Commit
7ec3a4a
·
unverified ·
2 Parent(s): dcc9d39 0b46237

Merge pull request #28 from huggingface/flush-jobs

Browse files
Files changed (2) hide show
  1. app.py +25 -3
  2. notebooks/flush-prediction-repos.ipynb +177 -0
app.py CHANGED
@@ -4,12 +4,14 @@ from pathlib import Path
4
 
5
  import pandas as pd
6
  import streamlit as st
 
7
  from datasets import get_dataset_config_names
8
  from dotenv import load_dotenv
9
  from huggingface_hub import list_datasets
10
 
11
  from evaluation import filter_evaluated_models
12
  from utils import (
 
13
  commit_evaluation_log,
14
  format_col_mapping,
15
  get_compatible_models,
@@ -146,9 +148,8 @@ selected_dataset = st.selectbox(
146
  "Select a dataset",
147
  all_datasets,
148
  index=all_datasets.index(default_dataset),
149
- help="""Datasets with metadata can be evaluated with 1-click. Check out the \
150
- [documentation](https://huggingface.co/docs/hub/datasets-cards) to add \
151
- evaluation metadata to a dataset.""",
152
  )
153
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
154
 
@@ -495,6 +496,18 @@ with st.form(key="form"):
495
  ).json()
496
  print(f"INFO -- AutoTrain job response: {train_json_resp}")
497
  if train_json_resp["success"]:
 
 
 
 
 
 
 
 
 
 
 
 
498
  st.success("✅ Successfully submitted evaluation job!")
499
  st.markdown(
500
  f"""
@@ -506,6 +519,15 @@ with st.form(key="form"):
506
  Check your email for notifications.
507
  * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
508
  to view the results from your submission once the Hub pull request is merged.
 
 
 
 
 
 
 
 
 
509
  """
510
  )
511
  print("INFO -- Pushing evaluation job logs to the Hub")
 
4
 
5
  import pandas as pd
6
  import streamlit as st
7
+ import yaml
8
  from datasets import get_dataset_config_names
9
  from dotenv import load_dotenv
10
  from huggingface_hub import list_datasets
11
 
12
  from evaluation import filter_evaluated_models
13
  from utils import (
14
+ AUTOTRAIN_TASK_TO_HUB_TASK,
15
  commit_evaluation_log,
16
  format_col_mapping,
17
  get_compatible_models,
 
148
  "Select a dataset",
149
  all_datasets,
150
  index=all_datasets.index(default_dataset),
151
+ help="""Datasets with metadata can be evaluated with 1-click. Configure an evaluation job to add \
152
+ new metadata to a dataset card.""",
 
153
  )
154
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
155
 
 
496
  ).json()
497
  print(f"INFO -- AutoTrain job response: {train_json_resp}")
498
  if train_json_resp["success"]:
499
+ train_eval_index = {
500
+ "train-eval-index": [
501
+ {
502
+ "config": selected_config,
503
+ "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
504
+ "task_id": selected_task,
505
+ "splits": {"eval_split": selected_split},
506
+ "col_mapping": col_mapping,
507
+ }
508
+ ]
509
+ }
510
+ selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
511
  st.success("✅ Successfully submitted evaluation job!")
512
  st.markdown(
513
  f"""
 
519
  Check your email for notifications.
520
  * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
521
  to view the results from your submission once the Hub pull request is merged.
522
+ * Add the following metadata to the \
523
+ [dataset card](https://huggingface.co/datasets/{selected_dataset}/blob/main/README.md) \
524
+ to enable 1-click evaluations:
525
+ """
526
+ )
527
+ st.markdown(
528
+ f"""
529
+ ```yaml
530
+ {selected_metadata}
531
  """
532
  )
533
  print("INFO -- Pushing evaluation job logs to the Hub")
notebooks/flush-prediction-repos.ipynb ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "c8093b9e-ca6a-423d-96c3-5fe21f7109a1",
6
+ "metadata": {},
7
+ "source": [
8
+ "## Imports"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "efe8cda7-a687-4867-b1f0-8efbcd428681",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import os\n",
19
+ "from pathlib import Path\n",
20
+ "\n",
21
+ "from dotenv import load_dotenv\n",
22
+ "from huggingface_hub import DatasetFilter, delete_repo, list_datasets\n",
23
+ "from tqdm.auto import tqdm\n",
24
+ "\n",
25
+ "if Path(\".env\").is_file():\n",
26
+ " load_dotenv(\".env\")\n",
27
+ "\n",
28
+ "HF_TOKEN = os.getenv(\"HF_TOKEN\")"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "markdown",
33
+ "id": "8f6e01f0-b658-451f-999c-e08d9f4bbbd3",
34
+ "metadata": {},
35
+ "source": [
36
+ "## Get all prediction repos from autoevaluate org"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 2,
42
+ "id": "2e369478-66d3-498d-a8fd-95bc9180f362",
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "def get_prediction_repos():\n",
47
+ " all_repos = list_datasets(author=\"autoevaluate\")\n",
48
+ " prediction_repos = [\n",
49
+ " repo for repo in all_repos if repo.id.split(\"/\")[1].startswith(\"autoeval-\")\n",
50
+ " ]\n",
51
+ " return prediction_repos"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 3,
57
+ "id": "542db019-d01f-42f5-bef4-888dae8eeadb",
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "66"
64
+ ]
65
+ },
66
+ "execution_count": 3,
67
+ "metadata": {},
68
+ "output_type": "execute_result"
69
+ }
70
+ ],
71
+ "source": [
72
+ "prediction_repos = get_prediction_repos()\n",
73
+ "len(prediction_repos)"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 4,
79
+ "id": "331cfabf-4b73-490f-8d6a-86b5bc162666",
80
+ "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "data": {
84
+ "text/plain": [
85
+ "DatasetInfo: {\n",
86
+ "\tid: autoevaluate/autoeval-staging-eval-project-9dcc51b5-6464670\n",
87
+ "\tsha: d3bb02be592d167f7a217ac9341d187142d9a90a\n",
88
+ "\tlastModified: 2022-06-13T14:54:34.000Z\n",
89
+ "\ttags: ['type:predictions', 'tags:autotrain', 'tags:evaluation', 'datasets:glue']\n",
90
+ "\tprivate: False\n",
91
+ "\tauthor: autoevaluate\n",
92
+ "\tdescription: None\n",
93
+ "\tcitation: None\n",
94
+ "\tcardData: None\n",
95
+ "\tsiblings: None\n",
96
+ "\tgated: False\n",
97
+ "\tdownloads: 12\n",
98
+ "}"
99
+ ]
100
+ },
101
+ "execution_count": 4,
102
+ "metadata": {},
103
+ "output_type": "execute_result"
104
+ }
105
+ ],
106
+ "source": [
107
+ "prediction_repos[0]"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "markdown",
112
+ "id": "57a86b69-ffe8-4035-8f3d-5c917d8ce7bf",
113
+ "metadata": {},
114
+ "source": [
115
+ "## Delete all prediction repos"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 5,
121
+ "id": "6c8e23e7-2a6d-437b-9742-17f37684d9eb",
122
+ "metadata": {},
123
+ "outputs": [
124
+ {
125
+ "data": {
126
+ "application/vnd.jupyter.widget-view+json": {
127
+ "model_id": "06fa304dcc6d44e39205b20a5e488052",
128
+ "version_major": 2,
129
+ "version_minor": 0
130
+ },
131
+ "text/plain": [
132
+ " 0%| | 0/66 [00:00<?, ?it/s]"
133
+ ]
134
+ },
135
+ "metadata": {},
136
+ "output_type": "display_data"
137
+ }
138
+ ],
139
+ "source": [
140
+ "for repo in tqdm(prediction_repos):\n",
141
+ " delete_repo(\n",
142
+ " repo_id=repo.id,\n",
143
+ " repo_type=\"dataset\",\n",
144
+ " )"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": null,
150
+ "id": "7d64b0aa-d05f-4497-9bd2-eb2fc0d8bd7a",
151
+ "metadata": {},
152
+ "outputs": [],
153
+ "source": []
154
+ }
155
+ ],
156
+ "metadata": {
157
+ "kernelspec": {
158
+ "display_name": "autoevaluate",
159
+ "language": "python",
160
+ "name": "autoevaluate"
161
+ },
162
+ "language_info": {
163
+ "codemirror_mode": {
164
+ "name": "ipython",
165
+ "version": 3
166
+ },
167
+ "file_extension": ".py",
168
+ "mimetype": "text/x-python",
169
+ "name": "python",
170
+ "nbconvert_exporter": "python",
171
+ "pygments_lexer": "ipython3",
172
+ "version": "3.8.13"
173
+ }
174
+ },
175
+ "nbformat": 4,
176
+ "nbformat_minor": 5
177
+ }