Files changed (1) hide show
  1. app.py +0 -693
app.py DELETED
@@ -1,693 +0,0 @@
1
- import os
2
- import time
3
- from pathlib import Path
4
-
5
- import pandas as pd
6
- import streamlit as st
7
- import yaml
8
- from datasets import get_dataset_config_names
9
- from dotenv import load_dotenv
10
- from huggingface_hub import list_datasets
11
-
12
- from evaluation import filter_evaluated_models
13
- from utils import (
14
- AUTOTRAIN_TASK_TO_HUB_TASK,
15
- commit_evaluation_log,
16
- create_autotrain_project_name,
17
- format_col_mapping,
18
- get_compatible_models,
19
- get_config_metadata,
20
- get_dataset_card_url,
21
- get_key,
22
- get_metadata,
23
- http_get,
24
- http_post,
25
- )
26
-
27
- if Path(".env").is_file():
28
- load_dotenv(".env")
29
-
30
- HF_TOKEN = os.getenv("HF_TOKEN")
31
- AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
32
- AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
33
- DATASETS_PREVIEW_API = os.getenv("DATASETS_PREVIEW_API")
34
-
35
- # Put image tasks on top
36
- TASK_TO_ID = {
37
- "image_binary_classification": 17,
38
- "image_multi_class_classification": 18,
39
- "binary_classification": 1,
40
- "multi_class_classification": 2,
41
- "natural_language_inference": 22,
42
- "entity_extraction": 4,
43
- "extractive_question_answering": 5,
44
- "translation": 6,
45
- "summarization": 8,
46
- "text_zero_shot_classification": 23,
47
- }
48
-
49
- TASK_TO_DEFAULT_METRICS = {
50
- "binary_classification": ["f1", "precision", "recall", "auc", "accuracy"],
51
- "multi_class_classification": [
52
- "f1",
53
- "precision",
54
- "recall",
55
- "accuracy",
56
- ],
57
- "natural_language_inference": ["f1", "precision", "recall", "auc", "accuracy"],
58
- "entity_extraction": ["precision", "recall", "f1", "accuracy"],
59
- "extractive_question_answering": ["f1", "exact_match"],
60
- "translation": ["sacrebleu"],
61
- "summarization": ["rouge1", "rouge2", "rougeL", "rougeLsum"],
62
- "image_binary_classification": ["f1", "precision", "recall", "auc", "accuracy"],
63
- "image_multi_class_classification": [
64
- "f1",
65
- "precision",
66
- "recall",
67
- "accuracy",
68
- ],
69
- "text_zero_shot_classification": ["accuracy", "loss"],
70
- }
71
-
72
- AUTOTRAIN_TASK_TO_LANG = {
73
- "translation": "en2de",
74
- "image_binary_classification": "unk",
75
- "image_multi_class_classification": "unk",
76
- }
77
-
78
- AUTOTRAIN_MACHINE = {"text_zero_shot_classification": "r5.16x"}
79
-
80
-
81
- SUPPORTED_TASKS = list(TASK_TO_ID.keys())
82
-
83
- # Extracted from utils.get_supported_metrics
84
- # Hardcoded for now due to speed / caching constraints
85
- SUPPORTED_METRICS = [
86
- "accuracy",
87
- "bertscore",
88
- "bleu",
89
- "cer",
90
- "chrf",
91
- "code_eval",
92
- "comet",
93
- "competition_math",
94
- "coval",
95
- "cuad",
96
- "exact_match",
97
- "f1",
98
- "frugalscore",
99
- "google_bleu",
100
- "mae",
101
- "mahalanobis",
102
- "matthews_correlation",
103
- "mean_iou",
104
- "meteor",
105
- "mse",
106
- "pearsonr",
107
- "perplexity",
108
- "precision",
109
- "recall",
110
- "roc_auc",
111
- "rouge",
112
- "sacrebleu",
113
- "sari",
114
- "seqeval",
115
- "spearmanr",
116
- "squad",
117
- "squad_v2",
118
- "ter",
119
- "trec_eval",
120
- "wer",
121
- "wiki_split",
122
- "xnli",
123
- "angelina-wang/directional_bias_amplification",
124
- "jordyvl/ece",
125
- "lvwerra/ai4code",
126
- "lvwerra/amex",
127
- ]
128
-
129
-
130
- #######
131
- # APP #
132
- #######
133
- st.title("Evaluation on the Hub")
134
- st.markdown(
135
- """
136
- Welcome to Hugging Face's automatic model evaluator 👋!
137
-
138
- This application allows you to evaluate 🤗 Transformers
139
- [models](https://huggingface.co/models?library=transformers&sort=downloads)
140
- across a wide variety of [datasets](https://huggingface.co/datasets) on the
141
- Hub. Please select the dataset and configuration below. The results of your
142
- evaluation will be displayed on the [public
143
- leaderboards](https://huggingface.co/spaces/autoevaluate/leaderboards). For
144
- more details, check out out our [blog
145
- post](https://huggingface.co/blog/eval-on-the-hub).
146
- """
147
- )
148
-
149
- all_datasets = [d.id for d in list_datasets()]
150
- query_params = st.experimental_get_query_params()
151
- if "first_query_params" not in st.session_state:
152
- st.session_state.first_query_params = query_params
153
- first_query_params = st.session_state.first_query_params
154
- default_dataset = all_datasets[0]
155
- if "dataset" in first_query_params:
156
- if len(first_query_params["dataset"]) > 0 and first_query_params["dataset"][0] in all_datasets:
157
- default_dataset = first_query_params["dataset"][0]
158
-
159
- selected_dataset = st.selectbox(
160
- "Select a dataset",
161
- all_datasets,
162
- index=all_datasets.index(default_dataset),
163
- help="""Datasets with metadata can be evaluated with 1-click. Configure an evaluation job to add \
164
- new metadata to a dataset card.""",
165
- )
166
- st.experimental_set_query_params(**{"dataset": [selected_dataset]})
167
-
168
- # Check if selected dataset can be streamed
169
- is_valid_dataset = http_get(
170
- path="/is-valid",
171
- domain=DATASETS_PREVIEW_API,
172
- params={"dataset": selected_dataset},
173
- ).json()
174
- if is_valid_dataset["viewer"] is False and is_valid_dataset["preview"] is False:
175
- st.error(
176
- """The dataset you selected is not currently supported. Open a \
177
- [discussion](https://huggingface.co/spaces/autoevaluate/model-evaluator/discussions) for support."""
178
- )
179
-
180
- metadata = get_metadata(selected_dataset, token=HF_TOKEN)
181
- print(f"INFO -- Dataset metadata: {metadata}")
182
- if metadata is None:
183
- st.warning("No evaluation metadata found. Please configure the evaluation job below.")
184
-
185
- with st.expander("Advanced configuration"):
186
- # Select task
187
- selected_task = st.selectbox(
188
- "Select a task",
189
- SUPPORTED_TASKS,
190
- index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
191
- help="""Don't see your favourite task here? Open a \
192
- [discussion](https://huggingface.co/spaces/autoevaluate/model-evaluator/discussions) to request it!""",
193
- )
194
- # Select config
195
- configs = get_dataset_config_names(selected_dataset)
196
- selected_config = st.selectbox(
197
- "Select a config",
198
- configs,
199
- help="""Some datasets contain several sub-datasets, known as _configurations_. \
200
- Select one to evaluate your models on. \
201
- See the [docs](https://huggingface.co/docs/datasets/master/en/load_hub#configurations) for more details.
202
- """,
203
- )
204
- # Some datasets have multiple metadata (one per config), so we grab the one associated with the selected config
205
- config_metadata = get_config_metadata(selected_config, metadata)
206
- print(f"INFO -- Config metadata: {config_metadata}")
207
-
208
- # Select splits
209
- splits_resp = http_get(
210
- path="/splits",
211
- domain=DATASETS_PREVIEW_API,
212
- params={"dataset": selected_dataset},
213
- )
214
- if splits_resp.status_code == 200:
215
- split_names = []
216
- all_splits = splits_resp.json()
217
- for split in all_splits["splits"]:
218
- if split["config"] == selected_config:
219
- split_names.append(split["split"])
220
-
221
- if config_metadata is not None:
222
- eval_split = config_metadata["splits"].get("eval_split", None)
223
- else:
224
- eval_split = None
225
- selected_split = st.selectbox(
226
- "Select a split",
227
- split_names,
228
- index=split_names.index(eval_split) if eval_split is not None else 0,
229
- help="Be wary when evaluating models on the `train` split.",
230
- )
231
-
232
- # Select columns
233
- rows_resp = http_get(
234
- path="/first-rows",
235
- domain=DATASETS_PREVIEW_API,
236
- params={
237
- "dataset": selected_dataset,
238
- "config": selected_config,
239
- "split": selected_split,
240
- },
241
- ).json()
242
- col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
243
-
244
- st.markdown("**Map your dataset columns**")
245
- st.markdown(
246
- """The model evaluator uses a standardised set of column names for the input examples and labels. \
247
- Please define the mapping between your dataset columns (right) and the standardised column names (left)."""
248
- )
249
- col1, col2 = st.columns(2)
250
-
251
- # TODO: find a better way to layout these items
252
- # TODO: need graceful way of handling dataset <--> task mismatch for datasets with metadata
253
- col_mapping = {}
254
- if selected_task in ["binary_classification", "multi_class_classification"]:
255
- with col1:
256
- st.markdown("`text` column")
257
- st.text("")
258
- st.text("")
259
- st.text("")
260
- st.text("")
261
- st.markdown("`target` column")
262
- with col2:
263
- text_col = st.selectbox(
264
- "This column should contain the text to be classified",
265
- col_names,
266
- index=col_names.index(get_key(config_metadata["col_mapping"], "text"))
267
- if config_metadata is not None
268
- else 0,
269
- )
270
- target_col = st.selectbox(
271
- "This column should contain the labels associated with the text",
272
- col_names,
273
- index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
274
- if config_metadata is not None
275
- else 0,
276
- )
277
- col_mapping[text_col] = "text"
278
- col_mapping[target_col] = "target"
279
-
280
- elif selected_task == "text_zero_shot_classification":
281
- with col1:
282
- st.markdown("`text` column")
283
- st.text("")
284
- st.text("")
285
- st.text("")
286
- st.text("")
287
- st.markdown("`classes` column")
288
- st.text("")
289
- st.text("")
290
- st.text("")
291
- st.text("")
292
- st.markdown("`target` column")
293
- with col2:
294
- text_col = st.selectbox(
295
- "This column should contain the text to be classified",
296
- col_names,
297
- index=col_names.index(get_key(config_metadata["col_mapping"], "text"))
298
- if config_metadata is not None
299
- else 0,
300
- )
301
- classes_col = st.selectbox(
302
- "This column should contain the classes associated with the text",
303
- col_names,
304
- index=col_names.index(get_key(config_metadata["col_mapping"], "classes"))
305
- if config_metadata is not None
306
- else 0,
307
- )
308
- target_col = st.selectbox(
309
- "This column should contain the index of the correct class",
310
- col_names,
311
- index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
312
- if config_metadata is not None
313
- else 0,
314
- )
315
- col_mapping[text_col] = "text"
316
- col_mapping[classes_col] = "classes"
317
- col_mapping[target_col] = "target"
318
-
319
- if selected_task in ["natural_language_inference"]:
320
- config_metadata = get_config_metadata(selected_config, metadata)
321
- with col1:
322
- st.markdown("`text1` column")
323
- st.text("")
324
- st.text("")
325
- st.text("")
326
- st.text("")
327
- st.text("")
328
- st.markdown("`text2` column")
329
- st.text("")
330
- st.text("")
331
- st.text("")
332
- st.text("")
333
- st.text("")
334
- st.markdown("`target` column")
335
- with col2:
336
- text1_col = st.selectbox(
337
- "This column should contain the first text passage to be classified",
338
- col_names,
339
- index=col_names.index(get_key(config_metadata["col_mapping"], "text1"))
340
- if config_metadata is not None
341
- else 0,
342
- )
343
- text2_col = st.selectbox(
344
- "This column should contain the second text passage to be classified",
345
- col_names,
346
- index=col_names.index(get_key(config_metadata["col_mapping"], "text2"))
347
- if config_metadata is not None
348
- else 0,
349
- )
350
- target_col = st.selectbox(
351
- "This column should contain the labels associated with the text",
352
- col_names,
353
- index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
354
- if config_metadata is not None
355
- else 0,
356
- )
357
- col_mapping[text1_col] = "text1"
358
- col_mapping[text2_col] = "text2"
359
- col_mapping[target_col] = "target"
360
-
361
- elif selected_task == "entity_extraction":
362
- with col1:
363
- st.markdown("`tokens` column")
364
- st.text("")
365
- st.text("")
366
- st.text("")
367
- st.text("")
368
- st.markdown("`tags` column")
369
- with col2:
370
- tokens_col = st.selectbox(
371
- "This column should contain the array of tokens to be classified",
372
- col_names,
373
- index=col_names.index(get_key(config_metadata["col_mapping"], "tokens"))
374
- if config_metadata is not None
375
- else 0,
376
- )
377
- tags_col = st.selectbox(
378
- "This column should contain the labels associated with each part of the text",
379
- col_names,
380
- index=col_names.index(get_key(config_metadata["col_mapping"], "tags"))
381
- if config_metadata is not None
382
- else 0,
383
- )
384
- col_mapping[tokens_col] = "tokens"
385
- col_mapping[tags_col] = "tags"
386
-
387
- elif selected_task == "translation":
388
- with col1:
389
- st.markdown("`source` column")
390
- st.text("")
391
- st.text("")
392
- st.text("")
393
- st.text("")
394
- st.markdown("`target` column")
395
- with col2:
396
- text_col = st.selectbox(
397
- "This column should contain the text to be translated",
398
- col_names,
399
- index=col_names.index(get_key(config_metadata["col_mapping"], "source"))
400
- if config_metadata is not None
401
- else 0,
402
- )
403
- target_col = st.selectbox(
404
- "This column should contain the target translation",
405
- col_names,
406
- index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
407
- if config_metadata is not None
408
- else 0,
409
- )
410
- col_mapping[text_col] = "source"
411
- col_mapping[target_col] = "target"
412
-
413
- elif selected_task == "summarization":
414
- with col1:
415
- st.markdown("`text` column")
416
- st.text("")
417
- st.text("")
418
- st.text("")
419
- st.text("")
420
- st.markdown("`target` column")
421
- with col2:
422
- text_col = st.selectbox(
423
- "This column should contain the text to be summarized",
424
- col_names,
425
- index=col_names.index(get_key(config_metadata["col_mapping"], "text"))
426
- if config_metadata is not None
427
- else 0,
428
- )
429
- target_col = st.selectbox(
430
- "This column should contain the target summary",
431
- col_names,
432
- index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
433
- if config_metadata is not None
434
- else 0,
435
- )
436
- col_mapping[text_col] = "text"
437
- col_mapping[target_col] = "target"
438
-
439
- elif selected_task == "extractive_question_answering":
440
- if config_metadata is not None:
441
- col_mapping = config_metadata["col_mapping"]
442
- # Hub YAML parser converts periods to hyphens, so we remap them here
443
- col_mapping = format_col_mapping(col_mapping)
444
- with col1:
445
- st.markdown("`context` column")
446
- st.text("")
447
- st.text("")
448
- st.text("")
449
- st.text("")
450
- st.markdown("`question` column")
451
- st.text("")
452
- st.text("")
453
- st.text("")
454
- st.text("")
455
- st.markdown("`answers.text` column")
456
- st.text("")
457
- st.text("")
458
- st.text("")
459
- st.text("")
460
- st.markdown("`answers.answer_start` column")
461
- with col2:
462
- context_col = st.selectbox(
463
- "This column should contain the question's context",
464
- col_names,
465
- index=col_names.index(get_key(col_mapping, "context")) if config_metadata is not None else 0,
466
- )
467
- question_col = st.selectbox(
468
- "This column should contain the question to be answered, given the context",
469
- col_names,
470
- index=col_names.index(get_key(col_mapping, "question")) if config_metadata is not None else 0,
471
- )
472
- answers_text_col = st.selectbox(
473
- "This column should contain example answers to the question, extracted from the context",
474
- col_names,
475
- index=col_names.index(get_key(col_mapping, "answers.text")) if config_metadata is not None else 0,
476
- )
477
- answers_start_col = st.selectbox(
478
- "This column should contain the indices in the context of the first character of each `answers.text`",
479
- col_names,
480
- index=col_names.index(get_key(col_mapping, "answers.answer_start"))
481
- if config_metadata is not None
482
- else 0,
483
- )
484
- col_mapping[context_col] = "context"
485
- col_mapping[question_col] = "question"
486
- col_mapping[answers_text_col] = "answers.text"
487
- col_mapping[answers_start_col] = "answers.answer_start"
488
- elif selected_task in ["image_binary_classification", "image_multi_class_classification"]:
489
- with col1:
490
- st.markdown("`image` column")
491
- st.text("")
492
- st.text("")
493
- st.text("")
494
- st.text("")
495
- st.markdown("`target` column")
496
- with col2:
497
- image_col = st.selectbox(
498
- "This column should contain the images to be classified",
499
- col_names,
500
- index=col_names.index(get_key(config_metadata["col_mapping"], "image"))
501
- if config_metadata is not None
502
- else 0,
503
- )
504
- target_col = st.selectbox(
505
- "This column should contain the labels associated with the images",
506
- col_names,
507
- index=col_names.index(get_key(config_metadata["col_mapping"], "target"))
508
- if config_metadata is not None
509
- else 0,
510
- )
511
- col_mapping[image_col] = "image"
512
- col_mapping[target_col] = "target"
513
-
514
- # Select metrics
515
- st.markdown("**Select metrics**")
516
- st.markdown("The following metrics will be computed")
517
- html_string = " ".join(
518
- [
519
- '<div style="padding-right:5px;padding-left:5px;padding-top:5px;padding-bottom:5px;float:left">'
520
- + '<div style="background-color:#D3D3D3;border-radius:5px;display:inline-block;padding-right:5px;'
521
- + 'padding-left:5px;color:white">'
522
- + metric
523
- + "</div></div>"
524
- for metric in TASK_TO_DEFAULT_METRICS[selected_task]
525
- ]
526
- )
527
- st.markdown(html_string, unsafe_allow_html=True)
528
- selected_metrics = st.multiselect(
529
- "(Optional) Select additional metrics",
530
- sorted(list(set(SUPPORTED_METRICS) - set(TASK_TO_DEFAULT_METRICS[selected_task]))),
531
- help="""User-selected metrics will be computed with their default arguments. \
532
- For example, `f1` will report results for binary labels. \
533
- Check out the [available metrics](https://huggingface.co/metrics) for more details.""",
534
- )
535
-
536
- with st.form(key="form"):
537
- compatible_models = get_compatible_models(selected_task, [selected_dataset])
538
- selected_models = st.multiselect(
539
- "Select the models you wish to evaluate",
540
- compatible_models,
541
- help="""Don't see your favourite model in this list? Add the dataset and task it was trained on to the \
542
- [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
543
- )
544
- print("INFO -- Selected models before filter:", selected_models)
545
-
546
- hf_username = st.text_input("Enter your 🤗 Hub username to be notified when the evaluation is finished")
547
-
548
- submit_button = st.form_submit_button("Evaluate models 🚀")
549
-
550
- if submit_button:
551
- if len(hf_username) == 0:
552
- st.warning("No 🤗 Hub username provided! Please enter your username and try again.")
553
- elif len(selected_models) == 0:
554
- st.warning("⚠️ No models were selected for evaluation! Please select at least one model and try again.")
555
- elif len(selected_models) > 10:
556
- st.warning("Only 10 models can be evaluated at once. Please select fewer models and try again.")
557
- else:
558
- # Filter out previously evaluated models
559
- selected_models = filter_evaluated_models(
560
- selected_models,
561
- selected_task,
562
- selected_dataset,
563
- selected_config,
564
- selected_split,
565
- selected_metrics,
566
- )
567
- print("INFO -- Selected models after filter:", selected_models)
568
- if len(selected_models) > 0:
569
- project_payload = {
570
- "username": AUTOTRAIN_USERNAME,
571
- "proj_name": create_autotrain_project_name(selected_dataset, selected_config),
572
- "task": TASK_TO_ID[selected_task],
573
- "config": {
574
- "language": AUTOTRAIN_TASK_TO_LANG[selected_task]
575
- if selected_task in AUTOTRAIN_TASK_TO_LANG
576
- else "en",
577
- "max_models": 5,
578
- "instance": {
579
- "provider": "sagemaker" if selected_task in AUTOTRAIN_MACHINE.keys() else "ovh",
580
- "instance_type": AUTOTRAIN_MACHINE[selected_task]
581
- if selected_task in AUTOTRAIN_MACHINE.keys()
582
- else "p3",
583
- "max_runtime_seconds": 172800,
584
- "num_instances": 1,
585
- "disk_size_gb": 200,
586
- },
587
- "evaluation": {
588
- "metrics": selected_metrics,
589
- "models": selected_models,
590
- "hf_username": hf_username,
591
- },
592
- },
593
- }
594
- print(f"INFO -- Payload: {project_payload}")
595
- project_json_resp = http_post(
596
- path="/projects/create",
597
- payload=project_payload,
598
- token=HF_TOKEN,
599
- domain=AUTOTRAIN_BACKEND_API,
600
- ).json()
601
- print(f"INFO -- Project creation response: {project_json_resp}")
602
-
603
- if project_json_resp["created"]:
604
- data_payload = {
605
- "split": 4, # use "auto" split choice in AutoTrain
606
- "col_mapping": col_mapping,
607
- "load_config": {"max_size_bytes": 0, "shuffle": False},
608
- "dataset_id": selected_dataset,
609
- "dataset_config": selected_config,
610
- "dataset_split": selected_split,
611
- }
612
- data_json_resp = http_post(
613
- path=f"/projects/{project_json_resp['id']}/data/dataset",
614
- payload=data_payload,
615
- token=HF_TOKEN,
616
- domain=AUTOTRAIN_BACKEND_API,
617
- ).json()
618
- print(f"INFO -- Dataset creation response: {data_json_resp}")
619
- if data_json_resp["download_status"] == 1:
620
- train_json_resp = http_post(
621
- path=f"/projects/{project_json_resp['id']}/data/start_processing",
622
- token=HF_TOKEN,
623
- domain=AUTOTRAIN_BACKEND_API,
624
- ).json()
625
- # For local development we process and approve projects on-the-fly
626
- if "localhost" in AUTOTRAIN_BACKEND_API:
627
- with st.spinner("⏳ Waiting for data processing to complete ..."):
628
- is_data_processing_success = False
629
- while is_data_processing_success is not True:
630
- project_status = http_get(
631
- path=f"/projects/{project_json_resp['id']}",
632
- token=HF_TOKEN,
633
- domain=AUTOTRAIN_BACKEND_API,
634
- ).json()
635
- if project_status["status"] == 3:
636
- is_data_processing_success = True
637
- time.sleep(10)
638
-
639
- # Approve training job
640
- train_job_resp = http_post(
641
- path=f"/projects/{project_json_resp['id']}/start_training",
642
- token=HF_TOKEN,
643
- domain=AUTOTRAIN_BACKEND_API,
644
- ).json()
645
- st.success("✅ Data processing and project approval complete - go forth and evaluate!")
646
- else:
647
- # Prod/staging submissions are evaluated in a cron job via run_evaluation_jobs.py
648
- print(f"INFO -- AutoTrain job response: {train_json_resp}")
649
- if train_json_resp["success"]:
650
- train_eval_index = {
651
- "train-eval-index": [
652
- {
653
- "config": selected_config,
654
- "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
655
- "task_id": selected_task,
656
- "splits": {"eval_split": selected_split},
657
- "col_mapping": col_mapping,
658
- }
659
- ]
660
- }
661
- selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
662
- dataset_card_url = get_dataset_card_url(selected_dataset)
663
- st.success("✅ Successfully submitted evaluation job!")
664
- st.markdown(
665
- f"""
666
- Evaluation can take up to 1 hour to complete, so grab a ☕️ or 🍵 while you wait:
667
-
668
- * 🔔 A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
669
- * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
670
- * 🥱 Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
671
- """ # noqa
672
- )
673
- st.markdown(
674
- f"""
675
- ```yaml
676
- {selected_metadata}
677
- """
678
- )
679
- print("INFO -- Pushing evaluation job logs to the Hub")
680
- evaluation_log = {}
681
- evaluation_log["project_id"] = project_json_resp["id"]
682
- evaluation_log["autotrain_env"] = (
683
- "staging" if "staging" in AUTOTRAIN_BACKEND_API else "prod"
684
- )
685
- evaluation_log["payload"] = project_payload
686
- evaluation_log["project_creation_response"] = project_json_resp
687
- evaluation_log["dataset_creation_response"] = data_json_resp
688
- evaluation_log["autotrain_job_response"] = train_json_resp
689
- commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
690
- else:
691
- st.error("🙈 Oh no, there was an error submitting your evaluation job!")
692
- else:
693
- st.warning("⚠️ No models left to evaluate! Please select other models and try again.")