superb
/

superb-submission

Model card Files Files and versions Community

leo19941227 commited on Oct 15, 2021

Commit

fa98f1c

•

1 Parent(s): b7f22bb

Submission: update-template

Browse files

Files changed (3) hide show

{{cookiecutter.repo_name}}/cli.py +35 -32
{{cookiecutter.repo_name}}/expert.py +37 -37
{{cookiecutter.repo_name}}/requirements.txt +2 -1

{{cookiecutter.repo_name}}/cli.py CHANGED Viewed

@@ -1,31 +1,16 @@
-import datetime
-import re
 import subprocess
 from pathlib import Path
-import pandas as pd
-import typer
 SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
 app = typer.Typer()
-def _update_submission_name(submission_name: str):
-    replacement = ""
-    with open("README.md", "r") as f:
-        lines = f.readlines()
-    for line in lines:
-        if line.startswith("submission_name:"):
-            changes = re.sub(r"submission_name:.+", f"submission_name: {submission_name}", line)
-            replacement += changes
-        else:
-            replacement += line
-    with open("README.md", "w") as f:
-        f.write(replacement)
 @app.command()
 def validate():
     # Check that all the expected files exist
@@ -33,24 +18,42 @@ def validate():
         if not Path(file).is_file():
             raise ValueError(f"File {file} not found! Please include {file} in your submission")
-    typer.echo("All submission files validated! ✨ 🚀 ✨")
-    typer.echo("Now you can make a submission 🤗")
 @app.command()
-def submit(submission_name: str = typer.Option(..., prompt="Please provide a name for your submission, e.g. HuBERT 😁")):
     subprocess.call("git pull origin main".split())
-    _update_submission_name(submission_name)
-    subprocess.call(["git", "add", "model.pt", "README.md"])
     subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
     subprocess.call(["git", "push"])
-    today = datetime.date.today()
-    # MON = 0, SUN = 6 -> SUN = 0 .. SAT = 6
-    idx = (today.weekday() + 1) % 7
-    sun = today + datetime.timedelta(7 - idx)
-    typer.echo("Submission successful! 🎉 🥳 🎉")
-    typer.echo(f"Your submission will be evaulated on {sun:%A %d %B %Y} ⏳")
 if __name__ == "__main__":

+import typer
+import torch
 import subprocess
 from pathlib import Path
+from expert import UpstreamExpert
 SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
+SAMPLE_RATE = 16000
+SECONDS = [2, 1.8, 3.7]
 app = typer.Typer()
 @app.command()
 def validate():
     # Check that all the expected files exist
         if not Path(file).is_file():
             raise ValueError(f"File {file} not found! Please include {file} in your submission")
+    try:
+        upstream = UpstreamExpert(ckpt="model.pt")
+        wavs = [torch.rand(round(SAMPLE_RATE * sec)) for sec in SECONDS]
+        results = upstream(wavs)
+        assert isinstance(results, dict)
+        tasks = ["PR", "SID", "ER", "ASR", "ASV", "SD", "QbE", "ST", "SS", "SE", "secret"]
+        for task in tasks:
+            hidden_states = results.get(task, "hidden_states")
+            assert isinstance(hidden_states, list)
+            for state in hidden_states:
+                assert isinstance(state, torch.Tensor)
+                assert state.dim() == 3, "(batch_size, max_sequence_length_of_batch, hidden_size)"
+                assert state.shape == hidden_states[0].shape
+        for task in tasks:
+            downsample_rate = upstream.get_downsample_rates(task)
+            assert isinstance(downsample_rate, int)
+            print(f"The upstream's representation for {task}"
+                f" has the downsample rate of {downsample_rate}.")
+    except:
+        print("Please check the Upstream Specification on https://superbbenchmark.org/challenge")
+        raise
+    typer.echo("All submission files validated!")
+    typer.echo("Now you can make a submission.")
 @app.command()
+def submit(submission_name: str):
     subprocess.call("git pull origin main".split())
+    subprocess.call(["git", "add", "."])
     subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
     subprocess.call(["git", "push"])
+    typer.echo("Submission successful!")
 if __name__ == "__main__":

{{cookiecutter.repo_name}}/expert.py CHANGED Viewed

@@ -1,50 +1,54 @@
 from collections import OrderedDict
 from typing import List, Union, Dict
 import torch.nn as nn
 from torch import Tensor
 from torch.nn.utils.rnn import pad_sequence
 HIDDEN_DIM = 8
 class UpstreamExpert(nn.Module):
-    def __init__(self, ckpt: str = None, model_config: str = None, **kwargs):
         """
         Args:
             ckpt:
                 The checkpoint path for loading your pretrained weights.
-                Can be assigned by the -k option in run_downstream.py
-            model_config:
-                The config path for constructing your model.
-                Might not needed if you also save that in your checkpoint file.
-                Can be assigned by the -g option in run_downstream.py
         """
         super().__init__()
         self.name = "[Example UpstreamExpert]"
-        print(
-            f"{self.name} - You can use model_config to construct your customized model: {model_config}"
-        )
         print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
-        print(
-            f"{self.name} - If you store the pretrained weights and model config in a single file, "
-            "you can just choose one argument (ckpt or model_config) to pass. It's up to you!"
-        )
-        # The model needs to be a nn.Module for finetuning, not required for representation extraction
-        self.model1 = nn.Linear(1, HIDDEN_DIM)
-        self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
     def get_downsample_rates(self, key: str) -> int:
         """
         Since we do not do any downsampling in this example upstream
         All keys' corresponding representations have downsample rate of 1
         """
         return 1
-    def forward(self, wavs: List[Tensor]) -> Dict[str, Union[Tensor, List[Tensor]]]:
         """
         When the returning Dict contains the List with more than one Tensor,
         those Tensors should be in the same shape to train a weighted-sum on them.
@@ -53,25 +57,21 @@ class UpstreamExpert(nn.Module):
         wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
         # wavs: (batch_size, max_len, 1)
-        hidden = self.model1(wavs)
-        # hidden: (batch_size, max_len, hidden_dim)
-        feature = self.model2(hidden)
-        # feature: (batch_size, max_len, hidden_dim)
         # The "hidden_states" key will be used as default in many cases
         # Others keys in this example are presented for SUPERB Challenge
         return {
-            "hidden_states": [hidden, feature],
-            "PR": [hidden, feature],
-            "ASR": [hidden, feature],
-            "QbE": [hidden, feature],
-            "SID": [hidden, feature],
-            "ASV": [hidden, feature],
-            "SD": [hidden, feature],
-            "ER": [hidden, feature],
-            "SF": [hidden, feature],
-            "SE": [hidden, feature],
-            "SS": [hidden, feature],
-            "secret": [hidden, feature],
-        }

 from collections import OrderedDict
 from typing import List, Union, Dict
+import torch
 import torch.nn as nn
 from torch import Tensor
 from torch.nn.utils.rnn import pad_sequence
 HIDDEN_DIM = 8
+class Model(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # The model needs to be a nn.Module for finetuning, not required for representation extraction
+        self.model1 = nn.Linear(1, HIDDEN_DIM)
+        self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
+    def forward(self, wavs):
+        hidden = self.model1(wavs)
+        # hidden: (batch_size, max_len, hidden_dim)
+        feature = self.model2(hidden)
+        # feature: (batch_size, max_len, hidden_dim)
+        return [hidden, feature]
 class UpstreamExpert(nn.Module):
+    def __init__(self, ckpt: str = "model.pt", **kwargs):
         """
         Args:
             ckpt:
                 The checkpoint path for loading your pretrained weights.
+                Should be fixed as model.pt for SUPERB Challenge.
         """
         super().__init__()
         self.name = "[Example UpstreamExpert]"
         print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
+        ckpt = torch.load(ckpt, map_location="cpu")
+        self.model = Model()
+        self.model.load_state_dict(ckpt)
     def get_downsample_rates(self, key: str) -> int:
         """
         Since we do not do any downsampling in this example upstream
         All keys' corresponding representations have downsample rate of 1
+        Eg. 10ms stride representation has the downsample rate 160 (input wavs are all in 16kHz)
         """
         return 1
+    def forward(self, wavs: List[Tensor]) -> Dict[str, List[Tensor]]:
         """
         When the returning Dict contains the List with more than one Tensor,
         those Tensors should be in the same shape to train a weighted-sum on them.
         wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
         # wavs: (batch_size, max_len, 1)
+        hidden_states = self.model(wavs)
         # The "hidden_states" key will be used as default in many cases
         # Others keys in this example are presented for SUPERB Challenge
         return {
+            "hidden_states": hidden_states,
+            "PR": hidden_states,
+            "SID": hidden_states,
+            "ER": hidden_states,
+            "ASR": hidden_states,
+            "QbE": hidden_states,
+            "ASV": hidden_states,
+            "SD": hidden_states,
+            "ST": hidden_states,
+            "SE": hidden_states,
+            "SS": hidden_states,
+            "secret": hidden_states,
+        }

{{cookiecutter.repo_name}}/requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- typer


1	+ typer
2	+ torch