leo19941227 commited on
Commit
fa98f1c
β€’
1 Parent(s): b7f22bb

Submission: update-template

Browse files
{{cookiecutter.repo_name}}/cli.py CHANGED
@@ -1,31 +1,16 @@
1
- import datetime
2
- import re
3
  import subprocess
4
  from pathlib import Path
5
 
6
- import pandas as pd
7
- import typer
8
 
9
  SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
 
 
10
 
11
  app = typer.Typer()
12
 
13
- def _update_submission_name(submission_name: str):
14
- replacement = ""
15
- with open("README.md", "r") as f:
16
- lines = f.readlines()
17
-
18
- for line in lines:
19
- if line.startswith("submission_name:"):
20
- changes = re.sub(r"submission_name:.+", f"submission_name: {submission_name}", line)
21
- replacement += changes
22
- else:
23
- replacement += line
24
-
25
- with open("README.md", "w") as f:
26
- f.write(replacement)
27
-
28
-
29
  @app.command()
30
  def validate():
31
  # Check that all the expected files exist
@@ -33,24 +18,42 @@ def validate():
33
  if not Path(file).is_file():
34
  raise ValueError(f"File {file} not found! Please include {file} in your submission")
35
 
36
- typer.echo("All submission files validated! ✨ πŸš€ ✨")
37
- typer.echo("Now you can make a submission πŸ€—")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  @app.command()
41
- def submit(submission_name: str = typer.Option(..., prompt="Please provide a name for your submission, e.g. HuBERT 😁")):
42
  subprocess.call("git pull origin main".split())
43
- _update_submission_name(submission_name)
44
- subprocess.call(["git", "add", "model.pt", "README.md"])
45
  subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
46
  subprocess.call(["git", "push"])
47
-
48
- today = datetime.date.today()
49
- # MON = 0, SUN = 6 -> SUN = 0 .. SAT = 6
50
- idx = (today.weekday() + 1) % 7
51
- sun = today + datetime.timedelta(7 - idx)
52
- typer.echo("Submission successful! πŸŽ‰ πŸ₯³ πŸŽ‰")
53
- typer.echo(f"Your submission will be evaulated on {sun:%A %d %B %Y} ⏳")
54
 
55
 
56
  if __name__ == "__main__":
1
+ import typer
2
+ import torch
3
  import subprocess
4
  from pathlib import Path
5
 
6
+ from expert import UpstreamExpert
 
7
 
8
  SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
9
+ SAMPLE_RATE = 16000
10
+ SECONDS = [2, 1.8, 3.7]
11
 
12
  app = typer.Typer()
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @app.command()
15
  def validate():
16
  # Check that all the expected files exist
18
  if not Path(file).is_file():
19
  raise ValueError(f"File {file} not found! Please include {file} in your submission")
20
 
21
+ try:
22
+ upstream = UpstreamExpert(ckpt="model.pt")
23
+ wavs = [torch.rand(round(SAMPLE_RATE * sec)) for sec in SECONDS]
24
+ results = upstream(wavs)
25
+
26
+ assert isinstance(results, dict)
27
+ tasks = ["PR", "SID", "ER", "ASR", "ASV", "SD", "QbE", "ST", "SS", "SE", "secret"]
28
+ for task in tasks:
29
+ hidden_states = results.get(task, "hidden_states")
30
+ assert isinstance(hidden_states, list)
31
+
32
+ for state in hidden_states:
33
+ assert isinstance(state, torch.Tensor)
34
+ assert state.dim() == 3, "(batch_size, max_sequence_length_of_batch, hidden_size)"
35
+ assert state.shape == hidden_states[0].shape
36
+
37
+ for task in tasks:
38
+ downsample_rate = upstream.get_downsample_rates(task)
39
+ assert isinstance(downsample_rate, int)
40
+ print(f"The upstream's representation for {task}"
41
+ f" has the downsample rate of {downsample_rate}.")
42
+ except:
43
+ print("Please check the Upstream Specification on https://superbbenchmark.org/challenge")
44
+ raise
45
+
46
+ typer.echo("All submission files validated!")
47
+ typer.echo("Now you can make a submission.")
48
 
49
 
50
  @app.command()
51
+ def submit(submission_name: str):
52
  subprocess.call("git pull origin main".split())
53
+ subprocess.call(["git", "add", "."])
 
54
  subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
55
  subprocess.call(["git", "push"])
56
+ typer.echo("Submission successful!")
 
 
 
 
 
 
57
 
58
 
59
  if __name__ == "__main__":
{{cookiecutter.repo_name}}/expert.py CHANGED
@@ -1,50 +1,54 @@
1
  from collections import OrderedDict
2
  from typing import List, Union, Dict
3
 
 
4
  import torch.nn as nn
5
  from torch import Tensor
6
  from torch.nn.utils.rnn import pad_sequence
7
 
8
  HIDDEN_DIM = 8
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  class UpstreamExpert(nn.Module):
12
- def __init__(self, ckpt: str = None, model_config: str = None, **kwargs):
13
  """
14
  Args:
15
  ckpt:
16
  The checkpoint path for loading your pretrained weights.
17
- Can be assigned by the -k option in run_downstream.py
18
-
19
- model_config:
20
- The config path for constructing your model.
21
- Might not needed if you also save that in your checkpoint file.
22
- Can be assigned by the -g option in run_downstream.py
23
  """
24
  super().__init__()
25
  self.name = "[Example UpstreamExpert]"
26
 
27
- print(
28
- f"{self.name} - You can use model_config to construct your customized model: {model_config}"
29
- )
30
  print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
31
- print(
32
- f"{self.name} - If you store the pretrained weights and model config in a single file, "
33
- "you can just choose one argument (ckpt or model_config) to pass. It's up to you!"
34
- )
35
-
36
- # The model needs to be a nn.Module for finetuning, not required for representation extraction
37
- self.model1 = nn.Linear(1, HIDDEN_DIM)
38
- self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
39
 
40
  def get_downsample_rates(self, key: str) -> int:
41
  """
42
  Since we do not do any downsampling in this example upstream
43
  All keys' corresponding representations have downsample rate of 1
 
44
  """
45
  return 1
46
 
47
- def forward(self, wavs: List[Tensor]) -> Dict[str, Union[Tensor, List[Tensor]]]:
48
  """
49
  When the returning Dict contains the List with more than one Tensor,
50
  those Tensors should be in the same shape to train a weighted-sum on them.
@@ -53,25 +57,21 @@ class UpstreamExpert(nn.Module):
53
  wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
54
  # wavs: (batch_size, max_len, 1)
55
 
56
- hidden = self.model1(wavs)
57
- # hidden: (batch_size, max_len, hidden_dim)
58
-
59
- feature = self.model2(hidden)
60
- # feature: (batch_size, max_len, hidden_dim)
61
 
62
  # The "hidden_states" key will be used as default in many cases
63
  # Others keys in this example are presented for SUPERB Challenge
64
  return {
65
- "hidden_states": [hidden, feature],
66
- "PR": [hidden, feature],
67
- "ASR": [hidden, feature],
68
- "QbE": [hidden, feature],
69
- "SID": [hidden, feature],
70
- "ASV": [hidden, feature],
71
- "SD": [hidden, feature],
72
- "ER": [hidden, feature],
73
- "SF": [hidden, feature],
74
- "SE": [hidden, feature],
75
- "SS": [hidden, feature],
76
- "secret": [hidden, feature],
77
- }
1
  from collections import OrderedDict
2
  from typing import List, Union, Dict
3
 
4
+ import torch
5
  import torch.nn as nn
6
  from torch import Tensor
7
  from torch.nn.utils.rnn import pad_sequence
8
 
9
  HIDDEN_DIM = 8
10
 
11
+ class Model(nn.Module):
12
+ def __init__(self):
13
+ super().__init__()
14
+ # The model needs to be a nn.Module for finetuning, not required for representation extraction
15
+ self.model1 = nn.Linear(1, HIDDEN_DIM)
16
+ self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
17
+
18
+ def forward(self, wavs):
19
+ hidden = self.model1(wavs)
20
+ # hidden: (batch_size, max_len, hidden_dim)
21
+
22
+ feature = self.model2(hidden)
23
+ # feature: (batch_size, max_len, hidden_dim)
24
+
25
+ return [hidden, feature]
26
 
27
  class UpstreamExpert(nn.Module):
28
+ def __init__(self, ckpt: str = "model.pt", **kwargs):
29
  """
30
  Args:
31
  ckpt:
32
  The checkpoint path for loading your pretrained weights.
33
+ Should be fixed as model.pt for SUPERB Challenge.
 
 
 
 
 
34
  """
35
  super().__init__()
36
  self.name = "[Example UpstreamExpert]"
37
 
 
 
 
38
  print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
39
+ ckpt = torch.load(ckpt, map_location="cpu")
40
+ self.model = Model()
41
+ self.model.load_state_dict(ckpt)
 
 
 
 
 
42
 
43
  def get_downsample_rates(self, key: str) -> int:
44
  """
45
  Since we do not do any downsampling in this example upstream
46
  All keys' corresponding representations have downsample rate of 1
47
+ Eg. 10ms stride representation has the downsample rate 160 (input wavs are all in 16kHz)
48
  """
49
  return 1
50
 
51
+ def forward(self, wavs: List[Tensor]) -> Dict[str, List[Tensor]]:
52
  """
53
  When the returning Dict contains the List with more than one Tensor,
54
  those Tensors should be in the same shape to train a weighted-sum on them.
57
  wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
58
  # wavs: (batch_size, max_len, 1)
59
 
60
+ hidden_states = self.model(wavs)
 
 
 
 
61
 
62
  # The "hidden_states" key will be used as default in many cases
63
  # Others keys in this example are presented for SUPERB Challenge
64
  return {
65
+ "hidden_states": hidden_states,
66
+ "PR": hidden_states,
67
+ "SID": hidden_states,
68
+ "ER": hidden_states,
69
+ "ASR": hidden_states,
70
+ "QbE": hidden_states,
71
+ "ASV": hidden_states,
72
+ "SD": hidden_states,
73
+ "ST": hidden_states,
74
+ "SE": hidden_states,
75
+ "SS": hidden_states,
76
+ "secret": hidden_states,
77
+ }
{{cookiecutter.repo_name}}/requirements.txt CHANGED
@@ -1 +1,2 @@
1
- typer
 
1
+ typer
2
+ torch