lewtun HF staff commited on
Commit
2022859
Β·
1 Parent(s): 82c33f8

Tidy up README

Browse files
{{cookiecutter.repo_name}}/README.md CHANGED
@@ -1,30 +1,29 @@
1
  ---
2
  benchmark: superb
3
  type: model-upload
4
- submission_nme: none
5
  ---
6
 
7
- # SUPERB submissions for {{cookiecutter.repo_name}}
8
 
9
  ## Submitting to the leaderboard
10
 
11
  To make a submission to the [leaderboard](https://huggingface.co/spaces/superb/superb-leaderboard)), there are three main steps:
12
 
13
- 1. Generate predictions on the unlabeled test set of each task
14
- 2. Validate the predictions are compatible with the evaluation framework
15
- 3. Push the predictions to the Hub!
16
 
17
  See the instructions below for more details.
18
 
19
  ### Rules
20
 
21
- 1. To prevent overfitting to the public leaderboard, we only evaluate **one submission per week**. You can push predictions to the Hub as many times as you wish, but we will only evaluate the most recent commit in a given week.
22
- 2. Transfer or meta-learning using other datasets, including further pre-training on other corpora, is allowed.
23
- 3. Use of unlabeled test data is allowed, as is it always available in the applied setting. For example, further pre-training using the unlabeled data for a task would be permitted.
24
- 4. Systems may be augmented with information retrieved from the internet, e.g. via automated web searches.
25
 
26
  ### Submission file format
27
 
 
 
28
  ### Validate your submission
29
 
30
  To ensure that your submission files are correctly formatted, run the following command from the root of the repository:
@@ -55,4 +54,4 @@ Submission successful! πŸŽ‰ πŸ₯³ πŸŽ‰
55
  Your submission will be fine-tuned and evaulated on Sunday 05 September 2021 ⏳
56
  ```
57
 
58
- where the evaluation is run every Sunday and your results will be visible on the leaderboard.
 
1
  ---
2
  benchmark: superb
3
  type: model-upload
4
+ submission_name: none
5
  ---
6
 
7
+ # SUPERB submissions for test-submission-with-weights
8
 
9
  ## Submitting to the leaderboard
10
 
11
  To make a submission to the [leaderboard](https://huggingface.co/spaces/superb/superb-leaderboard)), there are three main steps:
12
 
13
+ 1. Pretrain a model and store the weights in this repository as a `model.pt` file
14
+ 2. Validate the model is compatible with the fine-tuning and evaluation framework
15
+ 3. Push the model to the Hub!
16
 
17
  See the instructions below for more details.
18
 
19
  ### Rules
20
 
21
+ 1. To prevent overfitting to the public leaderboard, we only evaluate **one submission per week**. You can push upstream models to the Hub as many times as you wish, but we will only evaluate the most recent commit in a given week.
 
 
 
22
 
23
  ### Submission file format
24
 
25
+ TODO
26
+
27
  ### Validate your submission
28
 
29
  To ensure that your submission files are correctly formatted, run the following command from the root of the repository:
 
54
  Your submission will be fine-tuned and evaulated on Sunday 05 September 2021 ⏳
55
  ```
56
 
57
+ and your results will be visible on the leaderboard once the model is fine-tuned and evaluated.s
{{cookiecutter.repo_name}}/cli.py CHANGED
@@ -5,25 +5,11 @@ from pathlib import Path
5
 
6
  import pandas as pd
7
  import typer
8
- from datasets import get_dataset_config_names, load_dataset
9
 
10
- CSV_SCHEMA = {
11
- "banking_77": (5000, 2),
12
- "overruling": (2350, 2),
13
- "semiconductor_org_types": (449, 2),
14
- "ade_corpus_v2": (5000, 2),
15
- "twitter_complaints": (3399, 2),
16
- "neurips_impact_statement_risks": (150, 2),
17
- "systematic_review_inclusion": (2244, 2),
18
- "terms_of_service": (5000, 2),
19
- "tai_safety_research": (1639, 2),
20
- "one_stop_english": (518, 2),
21
- "tweet_eval_hate": (2966, 2),
22
- }
23
 
24
  app = typer.Typer()
25
 
26
-
27
  def _update_submission_name(submission_name: str):
28
  replacement = ""
29
  with open("README.md", "r") as f:
@@ -42,54 +28,20 @@ def _update_submission_name(submission_name: str):
42
 
43
  @app.command()
44
  def validate():
45
- # TODO(lewtun): Consider using great_expectations for the data validation
46
- tasks = get_dataset_config_names("ought/raft")
47
-
48
  # Check that all the expected files exist
49
- prediction_files = list(Path("data").rglob("predictions.csv"))
50
- mismatched_files = set(tasks).symmetric_difference(set([f.parent.name for f in prediction_files]))
51
- if mismatched_files:
52
- raise ValueError(f"Incorrect number of files! Expected {len(tasks)} files, but got {len(prediction_files)}.")
53
-
54
- # Check all files have the expected shape (number of rows, number of columns)
55
- # TODO(lewtun): Add a check for the specific IDs per file
56
- shape_errors = []
57
- column_errors = []
58
- for prediction_file in prediction_files:
59
- df = pd.read_csv(prediction_file)
60
- incorrect_shape = df.shape != CSV_SCHEMA[prediction_file.parent.name]
61
- if incorrect_shape:
62
- shape_errors.append(prediction_file)
63
- incorrect_columns = sorted(df.columns) != ["ID", "Label"]
64
- if incorrect_columns:
65
- column_errors.append(prediction_file)
66
-
67
- if shape_errors:
68
- raise ValueError(f"Incorrect CSV shapes in files: {shape_errors}")
69
-
70
- if column_errors:
71
- raise ValueError(f"Incorrect CSV columns in files: {column_errors}")
72
-
73
- # Check we can load the dataset for each task
74
- load_errors = []
75
- for task in tasks:
76
- try:
77
- _ = load_dataset("../{{cookiecutter.repo_name}}", task)
78
- except Exception as e:
79
- load_errors.append(e)
80
-
81
- if load_errors:
82
- raise ValueError(f"Could not load predictions! Errors: {load_errors}")
83
 
84
  typer.echo("All submission files validated! ✨ πŸš€ ✨")
85
  typer.echo("Now you can make a submission πŸ€—")
86
 
87
 
88
  @app.command()
89
- def submit(submission_name: str = typer.Option(..., prompt="Please provide a name for your submission, e.g. GPT-4 😁")):
90
  subprocess.call("git pull origin main".split())
91
  _update_submission_name(submission_name)
92
- subprocess.call(["git", "add", "data/*predictions.csv", "README.md"])
93
  subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
94
  subprocess.call(["git", "push"])
95
 
 
5
 
6
  import pandas as pd
7
  import typer
 
8
 
9
+ SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  app = typer.Typer()
12
 
 
13
  def _update_submission_name(submission_name: str):
14
  replacement = ""
15
  with open("README.md", "r") as f:
 
28
 
29
  @app.command()
30
  def validate():
 
 
 
31
  # Check that all the expected files exist
32
+ for file in SUBMISSION_FILES:
33
+ if not Path(file).is_file():
34
+ raise ValueError(f"File {file} not found! Please include {file} in your submission")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  typer.echo("All submission files validated! ✨ πŸš€ ✨")
37
  typer.echo("Now you can make a submission πŸ€—")
38
 
39
 
40
  @app.command()
41
+ def submit(submission_name: str = typer.Option(..., prompt="Please provide a name for your submission, e.g. HuBERT 😁")):
42
  subprocess.call("git pull origin main".split())
43
  _update_submission_name(submission_name)
44
+ subprocess.call(["git", "add", "model.pt", "README.md"])
45
  subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
46
  subprocess.call(["git", "push"])
47