OpenSLU

Runtime error

App Files Files Community

Rams901

LightChen2333 commited on Apr 29, 2023

Commit

da332f1

•

0 Parent(s):

Duplicate from LightChen2333/OpenSLU

Browse files

Co-authored-by: Qiguang Chen <LightChen2333@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +34 -0
.gitignore +136 -0
README.md +14 -0
__init__.py +1 -0
accelerate/config-old.yaml +16 -0
accelerate/config.yaml +22 -0
app.py +63 -0
common/__init__.py +1 -0
common/config.py +192 -0
common/global_pool.py +26 -0
common/loader.py +332 -0
common/logger.py +237 -0
common/metric.py +346 -0
common/model_manager.py +419 -0
common/saver.py +80 -0
common/tokenizer.py +323 -0
common/utils.py +499 -0
config/README.md +348 -0
config/app.yaml +6 -0
config/decoder/interaction/stack-propagation.yaml +1 -0
config/examples/README.md +38 -0
config/examples/from_pretrained.yaml +53 -0
config/examples/from_pretrained_multi.yaml +55 -0
config/examples/normal.yaml +70 -0
config/examples/reload_to_train.yaml +71 -0
config/reproduction/atis/bi-model.yaml +106 -0
config/reproduction/atis/dca-net.yaml +88 -0
config/reproduction/atis/deberta.yaml +67 -0
config/reproduction/atis/electra.yaml +67 -0
config/reproduction/atis/joint-bert.yaml +70 -0
config/reproduction/atis/roberta.yaml +70 -0
config/reproduction/atis/slot-gated.yaml +87 -0
config/reproduction/atis/stack-propagation.yaml +109 -0
config/reproduction/mix-atis/agif.yaml +133 -0
config/reproduction/mix-atis/gl-gin.yaml +128 -0
config/reproduction/mix-atis/vanilla.yaml +95 -0
config/reproduction/mix-snips/agif.yaml +131 -0
config/reproduction/mix-snips/gl-gin.yaml +131 -0
config/reproduction/mix-snips/vanilla.yaml +95 -0
config/reproduction/snips/bi-model.yaml +104 -0
config/reproduction/snips/dca_net.yaml +88 -0
config/reproduction/snips/deberta.yaml +70 -0
config/reproduction/snips/electra.yaml +69 -0
config/reproduction/snips/joint-bert.yaml +75 -0
config/reproduction/snips/roberta.yaml +70 -0
config/reproduction/snips/slot-gated.yaml +87 -0
config/reproduction/snips/stack-propagation.yaml +105 -0
config/visual.yaml +6 -0
model/__init__.py +3 -0
model/decoder/__init__.py +5 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,136 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.idea/
+wandb/*
+save/*
+!save/.gitkeep
+logs/*
+!logs/.gitkeep
+test
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+.vscode/

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+license: mit
+title: OpenSLU
+sdk: gradio
+sdk_version: 3.18.0
+app_file: app.py
+emoji: 🚀
+colorFrom: blue
+colorTo: purple
+pinned: false
+tags:
+- making-demos
+duplicated_from: LightChen2333/OpenSLU
+---

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

accelerate/config-old.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+deepspeed_config: {}
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+fsdp_config: {}
+gpu_ids: all
+machine_rank: 0
+main_process_ip: null
+main_process_port: 9001
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 0
+num_processes: 2
+rdzv_backend: static
+same_network: true
+use_cpu: false

accelerate/config.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+command_file: null
+commands: null
+compute_environment: LOCAL_MACHINE
+deepspeed_config: {}
+distributed_type: 'NO'
+downcast_bf16: 'no'
+dynamo_backend: 'NO'
+fsdp_config: {}
+gpu_ids: all
+machine_rank: 0
+main_process_ip: null
+main_process_port: null
+main_training_function: main
+megatron_lm_config: {}
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_name: null
+tpu_zone: null
+use_cpu: false

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+'''
+Author: Qiguang Chen
+LastEditors: Qiguang Chen
+Date: 2023-02-07 15:42:32
+LastEditTime: 2023-02-19 21:04:03
+Description:
+'''
+import argparse
+import gradio as gr
+from common.config import Config
+from common.model_manager import ModelManager
+from common.utils import str2bool
+parser = argparse.ArgumentParser()
+parser.add_argument('--config_path', '-cp', type=str, default="config/examples/from_pretrained.yaml")
+parser.add_argument('--push_to_public', '-p', type=str2bool, nargs='?',
+                        const=True, default=False,
+                        help="Push to public network.")
+args = parser.parse_args()
+config = Config.load_from_yaml(args.config_path)
+config.base["train"] = False
+config.base["test"] = False
+model_manager = ModelManager(config)
+model_manager.init_model()
+def text_analysis(text):
+    print(text)
+    data = model_manager.predict(text)
+    html = """<link href="https://cdn.staticfile.org/twitter-bootstrap/5.1.1/css/bootstrap.min.css" rel="stylesheet">
+                <script src="https://cdn.staticfile.org/twitter-bootstrap/5.1.1/js/bootstrap.bundle.min.js"></script>"""
+    html += """<div style="background: white; padding: 16px;"><b>Intent:</b>"""
+    for intent in data["intent"]:
+        html += """<button type="button" class="btn btn-white">
+                        <span class="badge text-dark btn-light">""" + intent + """</span> </button>"""
+    html += """<br /> <b>Slot:</b>"""
+    for t, slot in zip(data["text"], data["slot"]):
+        html += """<button type="button" class="btn btn-white">"""+t+"""<span class="badge text-dark" style="background-color: rgb(255, 255, 255);
+                            color: rgb(62 62 62);
+                            box-shadow: 2px 2px 7px 1px rgba(210, 210, 210, 0.42);">"""+slot+\
+                            """</span>
+                    </button>"""
+    html+="</div>"
+    return html
+demo = gr.Interface(
+    text_analysis,
+    gr.Textbox(placeholder="Enter sentence here..."),
+    ["html"],
+    examples=[
+        ["i would like to find a flight from charlotte to las vegas that makes a stop in st louis"],
+    ],
+)
+if args.push_to_public:
+    demo.launch(share=True)
+else:
+    demo.launch()

common/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

common/config.py ADDED Viewed

	@@ -0,0 +1,192 @@

+'''
+Author: Qiguang Chen
+Date: 2023-01-11 10:39:26
+LastEditors: Qiguang Chen
+LastEditTime: 2023-02-15 17:58:53
+Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
+'''
+import re
+from ruamel import yaml
+import datetime
+class Config(dict):
+    def __init__(self, *args, **kwargs):
+        """ init with dict as args
+        """
+        dict.__init__(self, *args, **kwargs)
+        self.__dict__ = self
+        self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
+        if not self.model.get("_from_pretrained_"):
+            self.__autowired()
+    @staticmethod
+    def load_from_yaml(file_path:str)->"Config":
+        """load config files with path
+        Args:
+            file_path (str): yaml configuration file path.
+        Returns:
+            Config: config object.
+        """
+        with open(file_path) as stream:
+            try:
+                return Config(yaml.safe_load(stream))
+            except yaml.YAMLError as exc:
+                print(exc)
+    @staticmethod
+    def load_from_args(args)->"Config":
+        """ load args to replace item value in config files assigned with '--config_path' or '--model'
+        Args:
+            args (Any): args with command line.
+        Returns:
+            Config: _description_
+        """
+        if args.model is not None and args.dataset is not None:
+            args.config_path = f"config/reproduction/{args.dataset}/{args.model}.yaml"
+        config = Config.load_from_yaml(args.config_path)
+        if args.dataset is not None:
+            config.__update_dataset(args.dataset)
+        if args.device is not None:
+            config["base"]["device"] = args.device
+        if args.learning_rate is not None:
+            config["optimizer"]["lr"] = args.learning_rate
+        if args.epoch_num is not None:
+            config["base"]["epoch_num"] = args.epoch_num
+        return config
+    def autoload_template(self):
+        """ search '{*}' template to excute as python code, support replace variable as any configure item
+        """
+        self.__autoload_template(self.__dict__)
+    def __get_autoload_value(self, matched):
+        keys = matched.group()[1:-1].split(".")
+        temp = self.__dict__
+        for k in keys:
+            temp = temp[k]
+        return str(temp)
+    def __autoload_template(self, config:dict):
+        for k in config:
+            if isinstance(config, dict):
+                sub_config = config[k]
+            elif isinstance(config, list):
+                sub_config = k
+            else:
+                continue
+            if isinstance(sub_config, dict) or isinstance(sub_config, list):
+                self.__autoload_template(sub_config)
+            if isinstance(sub_config, str) and "{" in sub_config and "}" in sub_config:
+                res = re.sub(r'{.*?}', self.__get_autoload_value, config[k])
+                res_dict= {"res": None}
+                exec("res=" + res, res_dict)
+                config[k] = res_dict["res"]
+    def __update_dataset(self, dataset_name):
+        if dataset_name is not None and isinstance(dataset_name, str):
+            self.__dict__["dataset"]["dataset_name"] = dataset_name
+    def get_model_config(self):
+        return self.__dict__["model"]
+    def __autowired(self):
+        # Set encoder
+        encoder_config = self.__dict__["model"]["encoder"]
+        encoder_type = encoder_config["_model_target_"].split(".")[-1]
+        def get_output_dim(encoder_config):
+            encoder_type = encoder_config["_model_target_"].split(".")[-1]
+            if (encoder_type == "AutoEncoder" and encoder_config["encoder_name"] in ["lstm", "self-attention-lstm",
+                                                                                     "bi-encoder"]) or encoder_type == "NoPretrainedEncoder":
+                output_dim = 0
+                if encoder_config.get("lstm"):
+                    output_dim += encoder_config["lstm"]["output_dim"]
+                if encoder_config.get("attention"):
+                    output_dim += encoder_config["attention"]["output_dim"]
+                return output_dim
+            else:
+                return encoder_config["output_dim"]
+        if encoder_type == "BiEncoder":
+            output_dim = get_output_dim(encoder_config["intent_encoder"]) + \
+                         get_output_dim(encoder_config["slot_encoder"])
+        else:
+            output_dim = get_output_dim(encoder_config)
+        self.__dict__["model"]["encoder"]["output_dim"] = output_dim
+        # Set interaction
+        if "interaction" in self.__dict__["model"]["decoder"] and self.__dict__["model"]["decoder"]["interaction"].get(
+                "input_dim") is None:
+            self.__dict__["model"]["decoder"]["interaction"]["input_dim"] = output_dim
+            interaction_type = self.__dict__["model"]["decoder"]["interaction"]["_model_target_"].split(".")[-1]
+            if not ((encoder_type == "AutoEncoder" and encoder_config[
+                "encoder_name"] == "self-attention-lstm") or encoder_type == "SelfAttentionLSTMEncoder") and interaction_type != "BiModelWithoutDecoderInteraction":
+                output_dim = self.__dict__["model"]["decoder"]["interaction"]["output_dim"]
+        # Set classifier
+        if "slot_classifier" in self.__dict__["model"]["decoder"]:
+            if self.__dict__["model"]["decoder"]["slot_classifier"].get("input_dim") is None:
+                self.__dict__["model"]["decoder"]["slot_classifier"]["input_dim"] = output_dim
+            self.__dict__["model"]["decoder"]["slot_classifier"]["use_slot"] = True
+        if "intent_classifier" in self.__dict__["model"]["decoder"]:
+            if self.__dict__["model"]["decoder"]["intent_classifier"].get("input_dim") is None:
+                self.__dict__["model"]["decoder"]["intent_classifier"]["input_dim"] = output_dim
+            self.__dict__["model"]["decoder"]["intent_classifier"]["use_intent"] = True
+    def get_intent_label_num(self):
+        """ get the number of intent labels.
+        """
+        classifier_conf = self.__dict__["model"]["decoder"]["intent_classifier"]
+        return classifier_conf["intent_label_num"] if "intent_label_num" in classifier_conf else 0
+    def get_slot_label_num(self):
+        """ get the number of slot labels.
+        """
+        classifier_conf = self.__dict__["model"]["decoder"]["slot_classifier"]
+        return classifier_conf["slot_label_num"] if "slot_label_num" in classifier_conf else 0
+    def set_intent_label_num(self, intent_label_num):
+        """ set the number of intent labels.
+        Args:
+            slot_label_num (int): the number of intent label
+        """
+        self.__dict__["base"]["intent_label_num"] = intent_label_num
+        self.__dict__["model"]["decoder"]["intent_classifier"]["intent_label_num"] = intent_label_num
+        if "interaction" in self.__dict__["model"]["decoder"]:
+            self.__dict__["model"]["decoder"]["interaction"]["intent_label_num"] = intent_label_num
+            if self.__dict__["model"]["decoder"]["interaction"]["_model_target_"].split(".")[
+                -1] == "StackInteraction":
+                self.__dict__["model"]["decoder"]["slot_classifier"]["input_dim"] += intent_label_num
+    def set_slot_label_num(self, slot_label_num:int)->None:
+        """set the number of slot label
+        Args:
+            slot_label_num (int): the number of slot label
+        """
+        self.__dict__["base"]["slot_label_num"] = slot_label_num
+        self.__dict__["model"]["decoder"]["slot_classifier"]["slot_label_num"] = slot_label_num
+        if "interaction" in self.__dict__["model"]["decoder"]:
+            self.__dict__["model"]["decoder"]["interaction"]["slot_label_num"] = slot_label_num
+    def set_vocab_size(self, vocab_size):
+        """set the size of vocabulary in non-pretrained tokenizer
+        Args:
+            slot_label_num (int): the number of slot label
+        """
+        encoder_type = self.__dict__["model"]["encoder"]["_model_target_"].split(".")[-1]
+        encoder_name = self.__dict__["model"]["encoder"].get("encoder_name")
+        if encoder_type == "BiEncoder" or (encoder_type == "AutoEncoder" and encoder_name == "bi-encoder"):
+            self.__dict__["model"]["encoder"]["intent_encoder"]["embedding"]["vocab_size"] = vocab_size
+            self.__dict__["model"]["encoder"]["slot_encoder"]["embedding"]["vocab_size"] = vocab_size
+        elif self.__dict__["model"]["encoder"].get("embedding"):
+            self.__dict__["model"]["encoder"]["embedding"]["vocab_size"] = vocab_size

common/global_pool.py ADDED Viewed

	@@ -0,0 +1,26 @@

+'''
+Author: Qiguang Chen
+LastEditors: Qiguang Chen
+Date: 2023-02-12 14:35:37
+LastEditTime: 2023-02-12 14:37:40
+Description:
+'''
+def _init():
+    global _global_dict
+    _global_dict = {}
+def set_value(key, value):
+    # set gobal value to object pool
+    _global_dict[key] = value
+def get_value(key):
+    # get gobal value from object pool
+    try:
+        return _global_dict[key]
+    except:
+        print('读取' + key + '失败\r\n')

common/loader.py ADDED Viewed

	@@ -0,0 +1,332 @@

+'''
+Author: Qiguang Chen
+Date: 2023-01-11 10:39:26
+LastEditors: Qiguang Chen
+LastEditTime: 2023-02-19 15:39:48
+Description: all class for load data.
+'''
+import os
+import torch
+import json
+from datasets import load_dataset, Dataset
+from torch.utils.data import DataLoader
+from common.utils import InputData
+ABS_PATH=os.path.join(os.path.abspath(os.path.dirname(__file__)), "../")
+class DataFactory(object):
+    def __init__(self, tokenizer,use_multi_intent=False, to_lower_case=True):
+        """_summary_
+        Args:
+            tokenizer (Tokenizer): _description_
+            use_multi_intent (bool, optional): _description_. Defaults to False.
+        """
+        self.tokenizer = tokenizer
+        self.slot_label_list = []
+        self.intent_label_list = []
+        self.use_multi = use_multi_intent
+        self.to_lower_case = to_lower_case
+        self.slot_label_dict = None
+        self.intent_label_dict = None
+    def __is_supported_datasets(self, dataset_name:str)->bool:
+        return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
+    def load_dataset(self, dataset_config, split="train"):
+        dataset_name = None
+        if split not in dataset_config:
+            dataset_name = dataset_config.get("dataset_name")
+        elif self.__is_supported_datasets(dataset_config[split]):
+            dataset_name = dataset_config[split].lower()
+        if dataset_name is not None:
+            return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split)
+        else:
+            data_file = dataset_config[split]
+            data_dict = {"text": [], "slot": [], "intent":[]}
+            with open(data_file, encoding="utf-8") as f:
+                for line in f:
+                    row = json.loads(line)
+                    data_dict["text"].append(row["text"])
+                    data_dict["slot"].append(row["slot"])
+                    data_dict["intent"].append(row["intent"])
+            return Dataset.from_dict(data_dict)
+    def update_label_names(self, dataset):
+        for intent_labels in dataset["intent"]:
+            if self.use_multi:
+                intent_label = intent_labels.split("#")
+            else:
+                intent_label = [intent_labels]
+            for x in intent_label:
+                if x not in self.intent_label_list:
+                    self.intent_label_list.append(x)
+        for slot_label in dataset["slot"]:
+            for x in slot_label:
+                if x not in self.slot_label_list:
+                    self.slot_label_list.append(x)
+        self.intent_label_dict = {key: index for index,
+                                  key in enumerate(self.intent_label_list)}
+        self.slot_label_dict = {key: index for index,
+                                key in enumerate(self.slot_label_list)}
+    def update_vocabulary(self, dataset):
+        if self.tokenizer.name_or_path in ["word_tokenizer"]:
+            for data in dataset:
+                self.tokenizer.add_instance(data["text"])
+    @staticmethod
+    def fast_align_data(text, padding_side="right"):
+        for i in range(len(text.input_ids)):
+            desired_output = []
+            for word_id in text.word_ids(i):
+                if word_id is not None:
+                    start, end = text.word_to_tokens(
+                        i, word_id, sequence_index=0 if padding_side == "right" else 1)
+                    if start == end - 1:
+                        tokens = [start]
+                    else:
+                        tokens = [start, end - 1]
+                    if len(desired_output) == 0 or desired_output[-1] != tokens:
+                        desired_output.append(tokens)
+            yield desired_output
+    def fast_align(self,
+                   batch,
+                   ignore_index=-100,
+                   device="cuda",
+                   config=None,
+                   enable_label=True,
+                   label2tensor=True):
+        if self.to_lower_case:
+            input_list = [[t.lower() for t in x["text"]] for x in batch]
+        else:
+            input_list = [x["text"] for x in batch]
+        text = self.tokenizer(input_list,
+                              return_tensors="pt",
+                              padding=True,
+                              is_split_into_words=True,
+                              truncation=True,
+                              **config).to(device)
+        if enable_label:
+            if label2tensor:
+                slot_mask = torch.ones_like(text.input_ids) * ignore_index
+                for i, offsets in enumerate(
+                        DataFactory.fast_align_data(text, padding_side=self.tokenizer.padding_side)):
+                    num = 0
+                    assert len(offsets) == len(batch[i]["text"])
+                    assert len(offsets) == len(batch[i]["slot"])
+                    for off in offsets:
+                        slot_mask[i][off[0]
+                                     ] = self.slot_label_dict[batch[i]["slot"][num]]
+                        num += 1
+                slot = slot_mask.clone()
+                attentin_id = 0 if self.tokenizer.padding_side == "right" else 1
+                for i, slot_batch in enumerate(slot):
+                    for j, x in enumerate(slot_batch):
+                        if x == ignore_index and text.attention_mask[i][j] == attentin_id and (text.input_ids[i][
+                                j] not in self.tokenizer.all_special_ids or text.input_ids[i][j] == self.tokenizer.unk_token_id):
+                            slot[i][j] = slot[i][j - 1]
+                slot = slot.to(device)
+                if not self.use_multi:
+                    intent = torch.tensor(
+                        [self.intent_label_dict[x["intent"]] for x in batch]).to(device)
+                else:
+                    one_hot = torch.zeros(
+                        (len(batch), len(self.intent_label_list)), dtype=torch.float)
+                    for index, b in enumerate(batch):
+                        for x in b["intent"].split("#"):
+                            one_hot[index][self.intent_label_dict[x]] = 1.
+                    intent = one_hot.to(device)
+            else:
+                slot_mask = None
+                slot = [['#' for _ in range(text.input_ids.shape[1])]
+                        for _ in range(text.input_ids.shape[0])]
+                for i, offsets in enumerate(DataFactory.fast_align_data(text)):
+                    num = 0
+                    for off in offsets:
+                        slot[i][off[0]] = batch[i]["slot"][num]
+                        num += 1
+                if not self.use_multi:
+                    intent = [x["intent"] for x in batch]
+                else:
+                    intent = [
+                        [x for x in b["intent"].split("#")] for b in batch]
+            return InputData((text, slot, intent))
+        else:
+            return InputData((text, None, None))
+    def general_align_data(self, split_text_list, raw_text_list, encoded_text):
+        for i in range(len(split_text_list)):
+            desired_output = []
+            jdx = 0
+            offset = encoded_text.offset_mapping[i].tolist()
+            split_texts = split_text_list[i]
+            raw_text = raw_text_list[i]
+            last = 0
+            temp_offset = []
+            for off in offset:
+                s, e = off
+                if len(temp_offset) > 0 and (e != 0 and last == s):
+                    len_1 = off[1] - off[0]
+                    len_2 = temp_offset[-1][1] - temp_offset[-1][0]
+                    if len_1 > len_2:
+                        temp_offset.pop(-1)
+                        temp_offset.append([0, 0])
+                        temp_offset.append(off)
+                    continue
+                temp_offset.append(off)
+                last = s
+            offset = temp_offset
+            for split_text in split_texts:
+                while jdx < len(offset) and offset[jdx][0] == 0 and offset[jdx][1] == 0:
+                    jdx += 1
+                if jdx == len(offset):
+                    continue
+                start_, end_ = offset[jdx]
+                tokens = None
+                if split_text == raw_text[start_:end_].strip():
+                    tokens = [jdx]
+                else:
+                    # Compute "xxx" -> "xx" "#x"
+                    temp_jdx = jdx
+                    last_str = raw_text[start_:end_].strip()
+                    while last_str != split_text and temp_jdx < len(offset) - 1:
+                        temp_jdx += 1
+                        last_str += raw_text[offset[temp_jdx]
+                                             [0]:offset[temp_jdx][1]].strip()
+                    if temp_jdx == jdx:
+                        raise ValueError("Illegal Input data")
+                    elif last_str == split_text:
+                        tokens = [jdx, temp_jdx]
+                        jdx = temp_jdx
+                    else:
+                        jdx -= 1
+                jdx += 1
+                if tokens is not None:
+                    desired_output.append(tokens)
+            yield desired_output
+    def general_align(self,
+                      batch,
+                      ignore_index=-100,
+                      device="cuda",
+                      config=None,
+                      enable_label=True,
+                      label2tensor=True,
+                      locale="en-US"):
+        if self.to_lower_case:
+            raw_data = [" ".join(x["text"]).lower() if locale not in ['ja-JP', 'zh-CN', 'zh-TW'] else "".join(x["text"]) for x in
+                    batch]
+            input_list = [[t.lower() for t in x["text"]] for x in batch]
+        else:
+            input_list = [x["text"] for x in batch]
+            raw_data = [" ".join(x["text"]) if locale not in ['ja-JP', 'zh-CN', 'zh-TW'] else "".join(x["text"]) for x in
+                        batch]
+        text = self.tokenizer(raw_data,
+                              return_tensors="pt",
+                              padding=True,
+                              truncation=True,
+                              return_offsets_mapping=True,
+                              **config).to(device)
+        if enable_label:
+            if label2tensor:
+                slot_mask = torch.ones_like(text.input_ids) * ignore_index
+                for i, offsets in enumerate(
+                        self.general_align_data(input_list, raw_data, encoded_text=text)):
+                    num = 0
+                    # if len(offsets) != len(batch[i]["text"]) or len(offsets) != len(batch[i]["slot"]):
+                    #     if
+                    for off in offsets:
+                        slot_mask[i][off[0]
+                                     ] = self.slot_label_dict[batch[i]["slot"][num]]
+                        num += 1
+                # slot = slot_mask.clone()
+                # attentin_id = 0 if self.tokenizer.padding_side == "right" else 1
+                # for i, slot_batch in enumerate(slot):
+                #     for j, x in enumerate(slot_batch):
+                #         if x == ignore_index and text.attention_mask[i][j] == attentin_id and text.input_ids[i][
+                #             j] not in self.tokenizer.all_special_ids:
+                #             slot[i][j] = slot[i][j - 1]
+                slot = slot_mask.to(device)
+                if not self.use_multi:
+                    intent = torch.tensor(
+                        [self.intent_label_dict[x["intent"]] for x in batch]).to(device)
+                else:
+                    one_hot = torch.zeros(
+                        (len(batch), len(self.intent_label_list)), dtype=torch.float)
+                    for index, b in enumerate(batch):
+                        for x in b["intent"].split("#"):
+                            one_hot[index][self.intent_label_dict[x]] = 1.
+                    intent = one_hot.to(device)
+            else:
+                slot_mask = None
+                slot = [['#' for _ in range(text.input_ids.shape[1])]
+                        for _ in range(text.input_ids.shape[0])]
+                for i, offsets in enumerate(self.general_align_data(input_list, raw_data, encoded_text=text)):
+                    num = 0
+                    for off in offsets:
+                        slot[i][off[0]] = batch[i]["slot"][num]
+                        num += 1
+                if not self.use_multi:
+                    intent = [x["intent"] for x in batch]
+                else:
+                    intent = [
+                        [x for x in b["intent"].split("#")] for b in batch]
+            return InputData((text, slot, intent))
+        else:
+            return InputData((text, None, None))
+    def batch_fn(self,
+                 batch,
+                 ignore_index=-100,
+                 device="cuda",
+                 config=None,
+                 align_mode="fast",
+                 enable_label=True,
+                 label2tensor=True):
+        if align_mode == "fast":
+            # try:
+            return self.fast_align(batch,
+                                   ignore_index=ignore_index,
+                                   device=device,
+                                   config=config,
+                                   enable_label=enable_label,
+                                   label2tensor=label2tensor)
+            # except:
+            #     return self.general_align(batch,
+            #                               ignore_index=ignore_index,
+            #                               device=device,
+            #                               config=config,
+            #                               enable_label=enable_label,
+            #                               label2tensor=label2tensor)
+        else:
+            return self.general_align(batch,
+                                      ignore_index=ignore_index,
+                                      device=device,
+                                      config=config,
+                                      enable_label=enable_label,
+                                      label2tensor=label2tensor)
+    def get_data_loader(self,
+                        dataset,
+                        batch_size,
+                        shuffle=False,
+                        device="cuda",
+                        enable_label=True,
+                        align_mode="fast",
+                        label2tensor=True, **config):
+        data_loader = DataLoader(dataset,
+                                 shuffle=shuffle,
+                                 batch_size=batch_size,
+                                 collate_fn=lambda x: self.batch_fn(x,
+                                                                    device=device,
+                                                                    config=config,
+                                                                    enable_label=enable_label,
+                                                                    align_mode=align_mode,
+                                                                    label2tensor=label2tensor))
+        return data_loader

common/logger.py ADDED Viewed

	@@ -0,0 +1,237 @@

+'''
+Author: Qiguang Chen
+Date: 2023-01-11 10:39:26
+LastEditors: Qiguang Chen
+LastEditTime: 2023-02-19 22:05:49
+Description: log manager
+'''
+import datetime
+import json
+import os
+import time
+from common.config import Config
+import logging
+import colorlog
+def mkdirs(dir_names):
+    for dir_name in dir_names:
+        if not os.path.exists(dir_name):
+            os.mkdir(dir_name)
+class Logger():
+    """ logging infomation by [wandb, fitlog, local file]
+    """
+    def __init__(self,
+                 logger_type: str,
+                 logger_name: str,
+                 logging_level="INFO",
+                 start_time='',
+                 accelerator=None):
+        """ create logger
+        Args:
+            logger_type (str): support type = ["wandb", "fitlog", "local"]
+            logger_name (str): logger name, means project name in wandb, and logging file name
+            logging_level (str, optional): logging level. Defaults to "INFO".
+            start_time (str, optional): start time string. Defaults to ''.
+        """
+        self.logger_type = logger_type
+        times = time.localtime()
+        self.output_dir = "logs/" + logger_name + "/" + start_time
+        self.accelerator = accelerator
+        self.logger_name = logger_name
+        if accelerator is not None:
+            from accelerate.logging import get_logger
+            self.logging = get_logger(logger_name)
+        else:
+            if self.logger_type == "wandb":
+                import wandb
+                self.logger = wandb
+                mkdirs(["logs", "logs/" + logger_name, self.output_dir])
+                self.logger.init(project=logger_name)
+            elif self.logger_type == "fitlog":
+                import fitlog
+                self.logger = fitlog
+                mkdirs(["logs", "logs/" + logger_name, self.output_dir])
+                self.logger.set_log_dir("logs/" + logger_name)
+            else:
+                mkdirs(["logs", "logs/" + logger_name, self.output_dir])
+                self.config_file = os.path.join(self.output_dir, "config.jsonl")
+                with open(self.config_file, "w", encoding="utf8") as f:
+                    print(f"Config will be written to {self.config_file}")
+                self.loss_file = os.path.join(self.output_dir, "loss.jsonl")
+                with open(self.loss_file, "w", encoding="utf8") as f:
+                    print(f"Loss Result will be written to {self.loss_file}")
+                self.metric_file = os.path.join(self.output_dir, "metric.jsonl")
+                with open(self.metric_file, "w", encoding="utf8") as f:
+                    print(f"Metric Result will be written to {self.metric_file}")
+                self.other_log_file = os.path.join(self.output_dir, "other_log.jsonl")
+                with open(self.other_log_file, "w", encoding="utf8") as f:
+                    print(f"Other Log Result will be written to {self.other_log_file}")
+            LOGGING_LEVEL_MAP = {
+                "CRITICAL": logging.CRITICAL,
+                "FATAL": logging.FATAL,
+                "ERROR": logging.ERROR,
+                "WARNING": logging.WARNING,
+                "WARN": logging.WARN,
+                "INFO": logging.INFO,
+                "DEBUG": logging.DEBUG,
+                "NOTSET": logging.NOTSET,
+            }
+            # logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
+            #                 filename=os.path.join(self.output_dir, "log.log"), level=LOGGING_LEVEL_MAP[logging_level])
+            # logger = logging.getLogger()
+            # KZT = logging.StreamHandler()
+            # KZT.setLevel(logging.DEBUG)
+            # logger.addHandler(KZT)
+            self.logging = self._get_logging_logger(logging_level)
+    def _get_logging_logger(self, level="INFO"):
+        log_colors_config = {
+            'DEBUG': 'cyan',
+            'INFO': 'blue',
+            'WARNING': 'yellow',
+            'ERROR': 'red',
+            'CRITICAL': 'red,bg_white',
+        }
+        logger = logging.getLogger()
+        logger.setLevel(level)
+        log_path = os.path.join(self.output_dir, "log.log")
+        if not logger.handlers:
+            sh = logging.StreamHandler()
+            fh = logging.FileHandler(filename=log_path, mode='a', encoding="utf-8")
+            fmt = logging.Formatter(
+                fmt='[%(levelname)s - %(asctime)s]\t%(message)s',
+                datefmt='%m/%d/%Y %I:%M:%S %p')
+            sh_fmt = colorlog.ColoredFormatter(
+                fmt='%(log_color)s[%(levelname)s - %(asctime)s]\t%(message)s',
+                datefmt='%m/%d/%Y %I:%M:%S %p',
+                log_colors=log_colors_config)
+            sh.setFormatter(fmt=sh_fmt)
+            fh.setFormatter(fmt=fmt)
+            logger.addHandler(sh)
+            logger.addHandler(fh)
+        return logger
+    def set_config(self, config: Config):
+        """save config
+        Args:
+            config (Config): configuration object to save
+        """
+        if self.accelerator is not None:
+            self.accelerator.init_trackers(self.logger_name, config=config)
+        elif self.logger_type == "wandb":
+            self.logger.config.update(config)
+        elif self.logger_type == "fitlog":
+            self.logger.add_hyper(config)
+        else:
+            with open(self.config_file, "a", encoding="utf8") as f:
+                f.write(json.dumps(config) + "\n")
+    def log(self, data, step=0):
+        """log data and step
+        Args:
+            data (Any): data to log
+            step (int, optional): step num. Defaults to 0.
+        """
+        if self.accelerator is not None:
+            self.accelerator.log(data, step=0)
+        elif self.logger_type == "wandb":
+            self.logger.log(data, step=step)
+        elif self.logger_type == "fitlog":
+            self.logger.add_other({"data": data, "step": step})
+        else:
+            with open(self.other_log_file, "a", encoding="utf8") as f:
+                f.write(json.dumps({"data": data, "step": step}) + "\n")
+    def log_metric(self, metric, metric_split="dev", step=0):
+        """log metric
+        Args:
+            metric (Any): metric
+            metric_split (str, optional): dataset split. Defaults to 'dev'.
+            step (int, optional): step num. Defaults to 0.
+        """
+        if self.accelerator is not None:
+            self.accelerator.log({metric_split: metric}, step=step)
+        elif self.logger_type == "wandb":
+            self.logger.log({metric_split: metric}, step=step)
+        elif self.logger_type == "fitlog":
+            self.logger.add_metric({metric_split: metric}, step=step)
+        else:
+            with open(self.metric_file, "a", encoding="utf8") as f:
+                f.write(json.dumps({metric_split: metric, "step": step}) + "\n")
+    def log_loss(self, loss, loss_name="Loss", step=0):
+        """log loss
+        Args:
+            loss (Any): loss
+            loss_name (str, optional): loss description. Defaults to 'Loss'.
+            step (int, optional): step num. Defaults to 0.
+        """
+        if self.accelerator is not None:
+            self.accelerator.log({loss_name: loss}, step=step)
+        elif self.logger_type == "wandb":
+            self.logger.log({loss_name: loss}, step=step)
+        elif self.logger_type == "fitlog":
+            self.logger.add_loss(loss, name=loss_name, step=step)
+        else:
+            with open(self.loss_file, "a", encoding="utf8") as f:
+                f.write(json.dumps({loss_name: loss, "step": step}) + "\n")
+    def finish(self):
+        """finish logging
+        """
+        if self.logger_type == "fitlog":
+            self.logger.finish()
+    def info(self, message:str):
+        """ Log a message with severity 'INFO' in local file / console.
+        Args:
+            message (str): message to log
+        """
+        self.logging.info(message)
+    def warning(self, message):
+        """ Log a message with severity 'WARNING' in local file / console.
+        Args:
+            message (str): message to log
+        """
+        self.logging.warning(message)
+    def error(self, message):
+        """ Log a message with severity 'ERROR' in local file / console.
+        Args:
+            message (str): message to log
+        """
+        self.logging.error(message)
+    def debug(self, message):
+        """ Log a message with severity 'DEBUG' in local file / console.
+        Args:
+            message (str): message to log
+        """
+        self.logging.debug(message)
+    def critical(self, message):
+        self.logging.critical(message)

common/metric.py ADDED Viewed

	@@ -0,0 +1,346 @@

+'''
+Author: Qiguang Chen
+Date: 2023-01-11 10:39:26
+LastEditors: Qiguang Chen
+LastEditTime: 2023-02-17 19:39:22
+Description: Metric calculation class
+'''
+from collections import Counter
+from typing import List, Dict
+import numpy as np
+from sklearn.metrics import f1_score
+from common.utils import InputData, OutputData
+class Evaluator(object):
+    """Evaluation metric funtions library class
+        supported metric:
+        - slot_f1
+        - intent_acc
+        - exactly_match_accuracy
+        - intent_f1 (defult "macro_intent_f1")
+            - macro_intent_f1
+            - micro_intent_f1=
+    """
+    @staticmethod
+    def exactly_match_accuracy(pred_slot: List[List[str or int]],
+                               real_slot: List[List[str or int]],
+                               pred_intent: List[List[str or int] or str or int],
+                               real_intent: List[List[str or int] or str or int]) -> float:
+        """Compute the accuracy based on the whole predictions of given sentence, including slot and intent.
+            (both support str or int index as the representation of slot and intent)
+        Args:
+            pred_slot (List[List[str or int]]): predicted sequence of slot list
+            real_slot (List[List[str or int]]): golden sequence of slot list.
+            pred_intent (List[List[str or int] or str or int]): golden intent list / golden multi intent list.
+            real_intent (List[List[str or int] or str or int]): predicted intent list / predicted multi intent list.
+        Returns:
+            float: exactly match accuracy score
+        """
+        total_count, correct_count = 0.0, 0.0
+        for p_slot, r_slot, p_intent, r_intent in zip(pred_slot, real_slot, pred_intent, real_intent):
+            if isinstance(p_intent, list):
+                p_intent, r_intent = set(p_intent), set(r_intent)
+            if p_slot == r_slot and p_intent == r_intent:
+                correct_count += 1.0
+            total_count += 1.0
+        return 1.0 * correct_count / total_count
+    @staticmethod
+    def intent_accuracy(pred_list: List, real_list: List) -> float:
+        """Get  intent accuracy measured by predictions and ground-trues. Support both multi intent and single intent.
+        Args:
+            pred_list (List): predicted intent list
+            real_list (List): golden intent list
+        Returns:
+            float: intent accuracy score
+        """
+        total_count, correct_count = 0.0, 0.0
+        for p_intent, r_intent in zip(pred_list, real_list):
+            if isinstance(p_intent, list):
+                p_intent, r_intent = set(p_intent), set(r_intent)
+            if p_intent == r_intent:
+                correct_count += 1.0
+            total_count += 1.0
+        return 1.0 * correct_count / total_count
+    @staticmethod
+    def intent_f1(pred_list: List[List[int]], real_list: List[List[int]], num_intent: int, average='macro') -> float:
+        """Get  intent accuracy measured by predictions and ground-trues. Support both multi intent and single intent.
+        (Only support multi intent now, but you can use [[intent1], [intent2], ...] to compute intent f1 in single intent)
+        Args:
+            pred_list (List[List[int]]): predicted multi intent list.
+            real_list (List[List[int]]): golden multi intent list.
+            num_intent (int)
+            average (str): support "micro" and "macro"
+        Returns:
+            float: intent accuracy score
+        """
+        return f1_score(Evaluator.__instance2onehot(num_intent, real_list),
+                        Evaluator.__instance2onehot(num_intent, pred_list),
+                        average=average,
+                        zero_division=0)
+    @staticmethod
+    def __multilabel2one_hot(labels, nums):
+        res = [0.] * nums
+        if len(labels) == 0:
+            return res
+        if isinstance(labels[0], list):
+            for label in labels[0]:
+                res[label] = 1.
+            return res
+        for label in labels:
+            res[label] = 1.
+        return res
+    @staticmethod
+    def __instance2onehot(num_intent, data):
+        res = []
+        for intents in data:
+            res.append(Evaluator.__multilabel2one_hot(intents, num_intent))
+        return np.array(res)
+    @staticmethod
+    def __startOfChunk(prevTag, tag, prevTagType, tagType, chunkStart=False):
+        if prevTag == 'B' and tag == 'B':
+            chunkStart = True
+        if prevTag == 'I' and tag == 'B':
+            chunkStart = True
+        if prevTag == 'O' and tag == 'B':
+            chunkStart = True
+        if prevTag == 'O' and tag == 'I':
+            chunkStart = True
+        if prevTag == 'E' and tag == 'E':
+            chunkStart = True
+        if prevTag == 'E' and tag == 'I':
+            chunkStart = True
+        if prevTag == 'O' and tag == 'E':
+            chunkStart = True
+        if prevTag == 'O' and tag == 'I':
+            chunkStart = True
+        if tag != 'O' and tag != '.' and prevTagType != tagType:
+            chunkStart = True
+        return chunkStart
+    @staticmethod
+    def __endOfChunk(prevTag, tag, prevTagType, tagType, chunkEnd=False):
+        if prevTag == 'B' and tag == 'B':
+            chunkEnd = True
+        if prevTag == 'B' and tag == 'O':
+            chunkEnd = True
+        if prevTag == 'I' and tag == 'B':
+            chunkEnd = True
+        if prevTag == 'I' and tag == 'O':
+            chunkEnd = True
+        if prevTag == 'E' and tag == 'E':
+            chunkEnd = True
+        if prevTag == 'E' and tag == 'I':
+            chunkEnd = True
+        if prevTag == 'E' and tag == 'O':
+            chunkEnd = True
+        if prevTag == 'I' and tag == 'O':
+            chunkEnd = True
+        if prevTag != 'O' and prevTag != '.' and prevTagType != tagType:
+            chunkEnd = True
+        return chunkEnd
+    @staticmethod
+    def __splitTagType(tag):
+        s = tag.split('-')
+        if len(s) > 2 or len(s) == 0:
+            raise ValueError('tag format wrong. it must be B-xxx.xxx')
+        if len(s) == 1:
+            tag = s[0]
+            tagType = ""
+        else:
+            tag = s[0]
+            tagType = s[1]
+        return tag, tagType
+    @staticmethod
+    def computeF1Score(correct_slots: List[List[str]], pred_slots: List[List[str]]) -> float:
+        """compute f1 score is modified from conlleval.pl
+        Args:
+            correct_slots (List[List[str]]): golden slot string list
+            pred_slots (List[List[str]]): predicted slot string list
+        Returns:
+            float: slot f1 score
+        """
+        correctChunk = {}
+        correctChunkCnt = 0.0
+        foundCorrect = {}
+        foundCorrectCnt = 0.0
+        foundPred = {}
+        foundPredCnt = 0.0
+        correctTags = 0.0
+        tokenCount = 0.0
+        for correct_slot, pred_slot in zip(correct_slots, pred_slots):
+            inCorrect = False
+            lastCorrectTag = 'O'
+            lastCorrectType = ''
+            lastPredTag = 'O'
+            lastPredType = ''
+            for c, p in zip(correct_slot, pred_slot):
+                c = str(c)
+                p = str(p)
+                correctTag, correctType = Evaluator.__splitTagType(c)
+                predTag, predType = Evaluator.__splitTagType(p)
+                if inCorrect == True:
+                    if Evaluator.__endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
+                            Evaluator.__endOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
+                            (lastCorrectType == lastPredType):
+                        inCorrect = False
+                        correctChunkCnt += 1.0
+                        if lastCorrectType in correctChunk:
+                            correctChunk[lastCorrectType] += 1.0
+                        else:
+                            correctChunk[lastCorrectType] = 1.0
+                    elif Evaluator.__endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) != \
+                            Evaluator.__endOfChunk(lastPredTag, predTag, lastPredType, predType) or \
+                            (correctType != predType):
+                        inCorrect = False
+                if Evaluator.__startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
+                        Evaluator.__startOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
+                        (correctType == predType):
+                    inCorrect = True
+                if Evaluator.__startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True:
+                    foundCorrectCnt += 1
+                    if correctType in foundCorrect:
+                        foundCorrect[correctType] += 1.0
+                    else:
+                        foundCorrect[correctType] = 1.0
+                if Evaluator.__startOfChunk(lastPredTag, predTag, lastPredType, predType) == True:
+                    foundPredCnt += 1.0
+                    if predType in foundPred:
+                        foundPred[predType] += 1.0
+                    else:
+                        foundPred[predType] = 1.0
+                if correctTag == predTag and correctType == predType:
+                    correctTags += 1.0
+                tokenCount += 1.0
+                lastCorrectTag = correctTag
+                lastCorrectType = correctType
+                lastPredTag = predTag
+                lastPredType = predType
+            if inCorrect == True:
+                correctChunkCnt += 1.0
+                if lastCorrectType in correctChunk:
+                    correctChunk[lastCorrectType] += 1.0
+                else:
+                    correctChunk[lastCorrectType] = 1.0
+        if foundPredCnt > 0:
+            precision = 1.0 * correctChunkCnt / foundPredCnt
+        else:
+            precision = 0
+        if foundCorrectCnt > 0:
+            recall = 1.0 * correctChunkCnt / foundCorrectCnt
+        else:
+            recall = 0
+        if (precision + recall) > 0:
+            f1 = (2.0 * precision * recall) / (precision + recall)
+        else:
+            f1 = 0
+        return f1
+    @staticmethod
+    def max_freq_predict(sample):
+        """Max frequency prediction.
+        """
+        predict = []
+        for items in sample:
+            predict.append(Counter(items).most_common(1)[0][0])
+        return predict
+    @staticmethod
+    def __token_map(indexes, token_label_map):
+        return [[token_label_map[idx] if idx in token_label_map else -1 for idx in index] for index in indexes]
+    @staticmethod
+    def compute_all_metric(inps: InputData,
+                           output: OutputData,
+                           intent_label_map: dict = None,
+                           metric_list: List=None)-> Dict:
+        """Auto compute all metric mentioned in 'metric_list'
+        Args:
+            inps (InputData): input golden slot and intent labels
+            output (OutputData): output predicted slot and intent labels
+            intent_label_map (dict, Optional): dict like {"intent1": 0, "intent2": 1, ...},which aims to map intent string to index
+            metric_list (List): support metrics in ["slot_f1", "intent_acc", "intent_f1", "macro_intent_f1", "micro_intent_f1", "EMA"]
+        Returns:
+            Dict: all metric mentioned in 'metric_list', like {'EMA': 0.7, ...}
+        Example:
+            if compute slot metric:
+                inps.slot = [["slot1", "slot2", ...], ...]; output.slot_ids=[["slot1", "slot2", ...], ...];
+            if compute intent metric:
+                [Multi Intent] inps.intent = [["intent1", "intent2", ...], ...]; output.intent_ids = [["intent1", "intent2", ...], ...]
+                [Single Intent] inps.intent = ["intent1", ...]; [Single Intent] output.intent_ids = ["intent1", ...]
+        """
+        if not metric_list:
+            metric_list = ["slot_f1", "intent_acc", "EMA"]
+        res_dict = {}
+        use_slot = output.slot_ids is not None and len(output.slot_ids) > 0
+        use_intent = output.intent_ids is not None and len(
+            output.intent_ids) > 0
+        if use_slot and "slot_f1" in metric_list:
+            res_dict["slot_f1"] = Evaluator.computeF1Score(
+                output.slot_ids, inps.slot)
+        if use_intent and "intent_acc" in metric_list:
+            res_dict["intent_acc"] = Evaluator.intent_accuracy(
+                output.intent_ids, inps.intent)
+            if isinstance(output.intent_ids[0], list):
+                if "intent_f1" in metric_list:
+                    res_dict["intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
+                                                                Evaluator.__token_map(
+                                                                    inps.intent, intent_label_map),
+                                                                len(intent_label_map.keys()))
+                elif "macro_intent_f1" in metric_list:
+                    res_dict["macro_intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
+                                                                      Evaluator.__token_map(inps.intent, intent_label_map),
+                                                                      len(intent_label_map.keys()), average="macro")
+                if "micro_intent_f1" in metric_list:
+                    res_dict["micro_intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
+                                                                      Evaluator.__token_map(inps.intent, intent_label_map),
+                                                                      len(intent_label_map.keys()), average="micro")
+        if use_slot and use_intent and "EMA" in metric_list:
+            res_dict["EMA"] = Evaluator.exactly_match_accuracy(output.slot_ids, inps.slot, output.intent_ids,
+                                                               inps.intent)
+        return res_dict

common/model_manager.py ADDED Viewed

	@@ -0,0 +1,419 @@

+'''
+Author: Qiguang Chen
+Date: 2023-01-11 10:39:26
+LastEditors: Qiguang Chen
+LastEditTime: 2023-02-19 18:50:11
+Description: manage all process of model training and prediction.
+'''
+import math
+import os
+import queue
+import random
+import numpy as np
+import torch
+from tqdm import tqdm
+from common import utils
+from common.loader import DataFactory
+from common.logger import Logger
+from common.metric import Evaluator
+from common.saver import Saver
+from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
+from common.utils import InputData, instantiate
+from common.utils import OutputData
+from common.config import Config
+import dill
+from common import global_pool
+from tools.load_from_hugging_face import PreTrainedTokenizerForSLU, PretrainedModelForSLU
+# from tools.hugging_face_parser import load_model, load_tokenizer
+class ModelManager(object):
+    def __init__(self, config: Config):
+        """create model manager by config
+        Args:
+            config (Config): configuration to manage all process in OpenSLU
+        """
+        # init config
+        global_pool._init()
+        self.config = config
+        self.__set_seed(self.config.base.get("seed"))
+        self.device = self.config.base.get("device")
+        self.load_dir = self.config.model_manager.get("load_dir")
+        if self.config.get("logger") and self.config["logger"].get("logger_type"):
+            logger_type = self.config["logger"].get("logger_type")
+        else:
+            logger_type = "wandb"
+        # enable accelerator
+        if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
+            from accelerate import Accelerator
+            self.accelerator = Accelerator(log_with=logger_type)
+        else:
+            self.accelerator = None
+        self.tokenizer = None
+        self.saver = Saver(self.config.model_manager, start_time=self.config.start_time)
+        if self.config.base.get("train"):
+            self.model = None
+            self.optimizer = None
+            self.total_step = None
+            self.lr_scheduler = None
+        self.init_step = 0
+        self.best_metric = 0
+        self.logger = Logger(logger_type=logger_type,
+                             logger_name=self.config.base["name"],
+                             start_time=self.config.start_time,
+                             accelerator=self.accelerator)
+        global_pool.set_value("logger", self.logger)
+    def init_model(self):
+        """init model, optimizer, lr_scheduler
+        Args:
+            model (Any): pytorch model
+        """
+        self.prepared = False
+        if self.load_dir is not None:
+            self.load()
+            self.config.set_vocab_size(self.tokenizer.vocab_size)
+            self.init_data()
+            if self.config.base.get("train") and self.config.model_manager.get("load_train_state"):
+                train_state = torch.load(os.path.join(
+                    self.load_dir, "train_state.pkl"), pickle_module=dill)
+                self.optimizer = instantiate(
+                    self.config["optimizer"])(self.model.parameters())
+                self.lr_scheduler = instantiate(self.config["scheduler"])(
+                    optimizer=self.optimizer,
+                    num_training_steps=self.total_step
+                )
+                self.optimizer.load_state_dict(train_state["optimizer"])
+                self.optimizer.zero_grad()
+                self.lr_scheduler.load_state_dict(train_state["lr_scheduler"])
+                self.init_step = train_state["step"]
+                self.best_metric = train_state["best_metric"]
+        elif self.config.model.get("_from_pretrained_") and self.config.tokenizer.get("_from_pretrained_"):
+            self.from_pretrained()
+            self.config.set_vocab_size(self.tokenizer.vocab_size)
+            self.init_data()
+        else:
+            self.tokenizer = get_tokenizer(
+                self.config.tokenizer.get("_tokenizer_name_"))
+            self.init_data()
+            self.model = instantiate(self.config.model)
+            self.model.to(self.device)
+            if self.config.base.get("train"):
+                self.optimizer = instantiate(
+                    self.config["optimizer"])(self.model.parameters())
+                self.lr_scheduler = instantiate(self.config["scheduler"])(
+                    optimizer=self.optimizer,
+                    num_training_steps=self.total_step
+                )
+    def init_data(self):
+        self.data_factory = DataFactory(tokenizer=self.tokenizer,
+                                        use_multi_intent=self.config.base.get("multi_intent"),
+                                        to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
+        batch_size = self.config.base["batch_size"]
+        # init tokenizer config and dataloaders
+        tokenizer_config = {key: self.config.tokenizer[key]
+                            for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
+        if self.config.base.get("train"):
+            # init dataloader & load data
+            train_dataset = self.data_factory.load_dataset(self.config.dataset, split="train")
+            # update label and vocabulary (ONLY SUPPORT FOR "word_tokenizer")
+            self.data_factory.update_label_names(train_dataset)
+            self.data_factory.update_vocabulary(train_dataset)
+            self.train_dataloader = self.data_factory.get_data_loader(train_dataset,
+                                                       batch_size,
+                                                       shuffle=True,
+                                                       device=self.device,
+                                                       enable_label=True,
+                                                       align_mode=self.config.tokenizer.get(
+                                                           "_align_mode_"),
+                                                       label2tensor=True,
+                                                       **tokenizer_config)
+            self.total_step = int(self.config.base.get("epoch_num")) * len(self.train_dataloader)
+            dev_dataset = self.data_factory.load_dataset(self.config.dataset, split="validation")
+            self.dev_dataloader = self.data_factory.get_data_loader(dev_dataset,
+                                                     batch_size,
+                                                     shuffle=False,
+                                                     device=self.device,
+                                                     enable_label=True,
+                                                     align_mode=self.config.tokenizer.get(
+                                                         "_align_mode_"),
+                                                     label2tensor=False,
+                                                     **tokenizer_config)
+            self.data_factory.update_vocabulary(dev_dataset)
+            self.intent_list = None
+            self.intent_dict = None
+            self.slot_list = None
+            self.slot_dict = None
+            # add intent label num and slot label num to config
+            if self.config.model["decoder"].get("intent_classifier") and int(self.config.get_intent_label_num()) == 0:
+                self.intent_list = self.data_factory.intent_label_list
+                self.intent_dict = self.data_factory.intent_label_dict
+                self.config.set_intent_label_num(len(self.intent_list))
+            if self.config.model["decoder"].get("slot_classifier") and int(self.config.get_slot_label_num()) == 0:
+                self.slot_list = self.data_factory.slot_label_list
+                self.slot_dict = self.data_factory.slot_label_dict
+                self.config.set_slot_label_num(len(self.slot_list))
+            # autoload embedding for non-pretrained encoder
+            if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
+                    "load_embedding_name"):
+                self.config["model"]["encoder"]["embedding"]["embedding_matrix"] = load_embedding(self.tokenizer,
+                                                                                                  self.config["model"][
+                                                                                                      "encoder"][
+                                                                                                      "embedding"].get(
+                                                                                                      "load_embedding_name"))
+            # fill template in config
+            self.config.autoload_template()
+            # save config
+            self.logger.set_config(self.config)
+            self.saver.save_tokenizer(self.tokenizer)
+            self.saver.save_label(self.intent_list, self.slot_list)
+            self.config.set_vocab_size(self.tokenizer.vocab_size)
+        if self.config.base.get("test"):
+            self.test_dataset = self.data_factory.load_dataset(self.config.dataset, split="test")
+            self.test_dataloader = self.data_factory.get_data_loader(self.test_dataset,
+                                                      batch_size,
+                                                      shuffle=False,
+                                                      device=self.device,
+                                                      enable_label=True,
+                                                      align_mode=self.config.tokenizer.get(
+                                                          "_align_mode_"),
+                                                      label2tensor=False,
+                                                      **tokenizer_config)
+    def eval(self, step: int, best_metric: float) -> float:
+        """ evaluation models.
+        Args:
+            step (int): which step the model has trained in
+            best_metric (float): last best metric value to judge whether to test or save model
+        Returns:
+            float: updated best metric value
+        """
+        # TODO: save dev
+        _, res = self.__evaluate(self.model, self.dev_dataloader, mode="dev")
+        self.logger.log_metric(res, metric_split="dev", step=step)
+        if res[self.config.evaluator.get("best_key")] > best_metric:
+            best_metric = res[self.config.evaluator.get("best_key")]
+            train_state = {
+                "step": step,
+                "best_metric": best_metric,
+                "optimizer": self.optimizer.state_dict(),
+                "lr_scheduler": self.lr_scheduler.state_dict()
+            }
+            self.saver.save_model(self.model, train_state, self.accelerator)
+            if self.config.base.get("test"):
+                outputs, test_res = self.__evaluate(self.model, self.test_dataloader, mode="test")
+                self.saver.save_output(outputs, self.test_dataset)
+                self.logger.log_metric(test_res, metric_split="test", step=step)
+        return best_metric
+    def train(self) -> float:
+        """ train models.
+        Returns:
+            float: updated best metric value
+        """
+        self.model.train()
+        if self.accelerator is not None:
+            self.total_step = math.ceil(self.total_step / self.accelerator.num_processes)
+        if self.optimizer is None:
+            self.optimizer = instantiate(self.config["optimizer"])(self.model.parameters())
+        if self.lr_scheduler is None:
+            self.lr_scheduler = instantiate(self.config["scheduler"])(
+                optimizer=self.optimizer,
+                num_training_steps=self.total_step
+            )
+        if not self.prepared and self.accelerator is not None:
+            self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
+                self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
+        step = self.init_step
+        progress_bar = tqdm(range(self.total_step))
+        progress_bar.update(self.init_step)
+        self.optimizer.zero_grad()
+        for _ in range(int(self.config.base.get("epoch_num"))):
+            for data in self.train_dataloader:
+                if step == 0:
+                    self.logger.info(data.get_item(
+                        0, tokenizer=self.tokenizer, intent_map=self.intent_list, slot_map=self.slot_list))
+                output = self.model(data)
+                if self.accelerator is not None and hasattr(self.model, "module"):
+                    loss, intent_loss, slot_loss = self.model.module.compute_loss(
+                        pred=output, target=data)
+                else:
+                    loss, intent_loss, slot_loss = self.model.compute_loss(
+                        pred=output, target=data)
+                self.logger.log_loss(loss, "Loss", step=step)
+                self.logger.log_loss(intent_loss, "Intent Loss", step=step)
+                self.logger.log_loss(slot_loss, "Slot Loss", step=step)
+                self.optimizer.zero_grad()
+                if self.accelerator is not None:
+                    self.accelerator.backward(loss)
+                else:
+                    loss.backward()
+                self.optimizer.step()
+                self.lr_scheduler.step()
+                train_state = {
+                    "step": step,
+                    "best_metric": self.best_metric,
+                    "optimizer": self.optimizer.state_dict(),
+                    "lr_scheduler": self.lr_scheduler.state_dict()
+                }
+                if not self.saver.auto_save_step(self.model, train_state, self.accelerator):
+                    if not self.config.evaluator.get("eval_by_epoch") and step % self.config.evaluator.get("eval_step") == 0 and step != 0:
+                        self.best_metric = self.eval(step, self.best_metric)
+                step += 1
+                progress_bar.update(1)
+            if self.config.evaluator.get("eval_by_epoch"):
+                self.best_metric = self.eval(step, self.best_metric)
+        self.logger.finish()
+        return self.best_metric
+    def test(self):
+        return self.__evaluate(self.model, self.test_dataloader, mode="test")
+    def __set_seed(self, seed_value: int):
+        """Manually set random seeds.
+        Args:
+            seed_value (int): random seed
+        """
+        random.seed(seed_value)
+        np.random.seed(seed_value)
+        torch.manual_seed(seed_value)
+        torch.random.manual_seed(seed_value)
+        os.environ['PYTHONHASHSEED'] = str(seed_value)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed_value)
+            torch.cuda.manual_seed_all(seed_value)
+            torch.backends.cudnn.deterministic = True
+            torch.backends.cudnn.benchmark = True
+        return
+    def __evaluate(self, model, dataloader, mode="dev"):
+        model.eval()
+        inps = InputData()
+        outputs = OutputData()
+        for data in dataloader:
+            torch.cuda.empty_cache()
+            output = model(data)
+            if self.accelerator is not None and hasattr(self.model, "module"):
+                decode_output = model.module.decode(output, data)
+            else:
+                decode_output = model.decode(output, data)
+            decode_output.map_output(slot_map=self.slot_list,
+                                     intent_map=self.intent_list)
+            if self.config.model["decoder"].get("slot_classifier"):
+                data, decode_output = utils.remove_slot_ignore_index(
+                    data, decode_output, ignore_index="#")
+            inps.merge_input_data(data)
+            outputs.merge_output_data(decode_output)
+        if "metric" in self.config.evaluator:
+            res = Evaluator.compute_all_metric(
+                inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.evaluator["metric"])
+        else:
+            res = Evaluator.compute_all_metric(
+                inps, outputs, intent_label_map=self.intent_dict)
+        self.logger.info(f"Best {mode} metric: "+str(res))
+        model.train()
+        return outputs, res
+    def load(self):
+        if self.tokenizer is None:
+            with open(os.path.join(self.load_dir, "tokenizer.pkl"), 'rb') as f:
+                self.tokenizer = dill.load(f)
+        label = utils.load_json(os.path.join(self.load_dir, "label.json"))
+        if label["intent"] is None:
+            self.intent_list = None
+            self.intent_dict = None
+        else:
+            self.intent_list = label["intent"]
+            self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
+            self.config.set_intent_label_num(len(self.intent_list))
+        if label["slot"] is None:
+            self.slot_list = None
+            self.slot_dict = None
+        else:
+            self.slot_list = label["slot"]
+            self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
+            self.config.set_slot_label_num(len(self.slot_list))
+        self.config.set_vocab_size(self.tokenizer.vocab_size)
+        if self.accelerator is not None and self.load_dir is not None:
+            self.model = torch.load(os.path.join(self.load_dir, "model.pkl"), map_location=torch.device(self.device))
+            self.prepared = True
+            self.accelerator.load_state(self.load_dir)
+            self.accelerator.prepare_model(self.model)
+        else:
+            self.model = torch.load(os.path.join(
+            self.load_dir, "model.pkl"), map_location=torch.device(self.device))
+                # if self.config.tokenizer["_tokenizer_name_"] == "word_tokenizer":
+                #     self.tokenizer = get_tokenizer_class(self.config.tokenizer["_tokenizer_name_"]).from_file(os.path.join(self.load_dir, "tokenizer.json"))
+                # else:
+                #     self.tokenizer = get_tokenizer(self.config.tokenizer["_tokenizer_name_"])
+            self.model.to(self.device)
+    def from_pretrained(self):
+        self.config.autoload_template()
+        model = PretrainedModelForSLU.from_pretrained(self.config.model["_from_pretrained_"])
+        # model = load_model(self.config.model["_from_pretrained_"])
+        self.model = model.model
+        if self.tokenizer is None:
+            self.tokenizer = PreTrainedTokenizerForSLU.from_pretrained(
+                self.config.tokenizer["_from_pretrained_"])
+            self.config.tokenizer = model.config.tokenizer
+            # self.tokenizer = load_tokenizer(self.config.tokenizer["_from_pretrained_"])
+        self.model.to(self.device)
+        label = model.config._id2label
+        self.config.model = model.config.model
+        self.intent_list = label["intent"]
+        self.slot_list = label["slot"]
+        self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
+        self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
+    def predict(self, text_data):
+        self.model.eval()
+        tokenizer_config = {key: self.config.tokenizer[key]
+                            for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
+        align_mode = self.config.tokenizer.get("_align_mode_")
+        inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
+                                            device=self.device,
+                                            config=tokenizer_config,
+                                            enable_label=False,
+                                            align_mode=align_mode if align_mode is not None else "general",
+                                            label2tensor=False)
+        output = self.model(inputs)
+        decode_output = self.model.decode(output, inputs)
+        decode_output.map_output(slot_map=self.slot_list,
+                                 intent_map=self.intent_list)
+        if self.config.base.get("multi_intent"):
+            intent = decode_output.intent_ids[0]
+        else:
+            intent = [decode_output.intent_ids[0]]
+        input_ids = inputs.input_ids[0].tolist()
+        tokens = [self.tokenizer.decode(ids) for ids in input_ids]
+        slots = decode_output.slot_ids[0]
+        return {"intent": intent, "slot": slots, "text": tokens}

common/saver.py ADDED Viewed

	@@ -0,0 +1,80 @@

+'''
+Author: Qiguang Chen
+LastEditors: Qiguang Chen
+Date: 2023-02-12 22:23:58
+LastEditTime: 2023-02-19 14:14:56
+Description:
+'''
+import json
+import os
+import queue
+import shutil
+import torch
+import dill
+from common import utils
+class Saver():
+    def __init__(self, config, start_time=None) -> None:
+        self.config = config
+        if self.config.get("save_dir"):
+            self.model_save_dir = self.config["save_dir"]
+        else:
+            if not os.path.exists("save/"):
+                os.mkdir("save/")
+            self.model_save_dir = "save/" + start_time
+        if not os.path.exists(self.model_save_dir):
+            os.mkdir(self.model_save_dir)
+        save_mode = config.get("save_mode")
+        self.save_mode = save_mode if save_mode is not None else "save-by-eval"
+        max_save_num = self.config.get("max_save_num")
+        self.max_save_num = max_save_num if max_save_num is not None else 1
+        self.save_pool = queue.Queue(maxsize=max_save_num)
+    def save_tokenizer(self, tokenizer):
+        with open(os.path.join(self.model_save_dir, "tokenizer.pkl"), 'wb') as f:
+            dill.dump(tokenizer, f)
+    def save_label(self, intent_list, slot_list):
+        utils.save_json(os.path.join(self.model_save_dir, "label.json"), {"intent": intent_list, "slot": slot_list})
+    def save_model(self, model, train_state, accelerator=None):
+        step = train_state["step"]
+        if self.max_save_num != 1:
+            model_save_dir =os.path.join(self.model_save_dir, str(step))
+            if self.save_pool.full():
+                delete_dir = self.save_pool.get()
+                shutil.rmtree(delete_dir)
+                self.save_pool.put(model_save_dir)
+            else:
+                self.save_pool.put(model_save_dir)
+            if not os.path.exists(model_save_dir):
+                os.mkdir(model_save_dir)
+        else:
+            model_save_dir = self.model_save_dir
+        if not os.path.exists(model_save_dir):
+            os.mkdir(model_save_dir)
+        if accelerator is None:
+            torch.save(model, os.path.join(model_save_dir, "model.pkl"))
+            torch.save(train_state, os.path.join(model_save_dir, "train_state.pkl"), pickle_module=dill)
+        else:
+            accelerator.wait_for_everyone()
+            unwrapped_model = accelerator.unwrap_model(model)
+            accelerator.save(unwrapped_model, os.path.join(model_save_dir, "model.pkl"))
+            accelerator.save_state(output_dir=model_save_dir)
+    def auto_save_step(self, model, train_state, accelerator=None):
+        step = train_state["step"]
+        if self.save_mode == "save-by-step" and step % self.config.get("save_step")==0 and step != 0:
+            self.save_model(model, train_state, accelerator)
+            return True
+        else:
+            return False
+    def save_output(self, outputs, dataset):
+        outputs.save(self.model_save_dir, dataset)

common/tokenizer.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import json
+import os
+from collections import Counter
+from collections import OrderedDict
+from typing import List
+import torch
+from ordered_set import OrderedSet
+from transformers import AutoTokenizer
+from common.utils import download, unzip_file
+def get_tokenizer(tokenizer_name:str):
+    """auto get tokenizer
+    Args:
+        tokenizer_name (str): support "word_tokenizer" and other pretrained tokenizer in hugging face.
+    Returns:
+        Any: Tokenizer Object
+    """
+    if tokenizer_name == "word_tokenizer":
+        return WordTokenizer(tokenizer_name)
+    else:
+        return AutoTokenizer.from_pretrained(tokenizer_name)
+def get_tokenizer_class(tokenizer_name:str):
+    """auto get tokenizer class
+    Args:
+        tokenizer_name (str): support "word_tokenizer" and other pretrained tokenizer in hugging face.
+    Returns:
+        Any: Tokenizer Class
+    """
+    if tokenizer_name == "word_tokenizer":
+        return WordTokenizer
+    else:
+        return AutoTokenizer.from_pretrained
+BATCH_STATE = 1
+INSTANCE_STATE = 2
+class WordTokenizer(object):
+    def __init__(self, name):
+        self.__name = name
+        self.index2instance = OrderedSet()
+        self.instance2index = OrderedDict()
+        # Counter Object record the frequency
+        # of element occurs in raw text.
+        self.counter = Counter()
+        self.__sign_pad = "[PAD]"
+        self.add_instance(self.__sign_pad)
+        self.__sign_unk = "[UNK]"
+        self.add_instance(self.__sign_unk)
+    @property
+    def padding_side(self):
+        return "right"
+    @property
+    def all_special_ids(self):
+        return [self.unk_token_id, self.pad_token_id]
+    @property
+    def name_or_path(self):
+        return self.__name
+    @property
+    def vocab_size(self):
+        return len(self.instance2index)
+    @property
+    def pad_token_id(self):
+        return self.instance2index[self.__sign_pad]
+    @property
+    def unk_token_id(self):
+        return self.instance2index[self.__sign_unk]
+    def add_instance(self, instance):
+        """ Add instances to alphabet.
+        1, We support any iterative data structure which
+        contains elements of str type.
+        2, We will count added instances that will influence
+        the serialization of unknown instance.
+        Args:
+            instance: is given instance or a list of it.
+        """
+        if isinstance(instance, (list, tuple)):
+            for element in instance:
+                self.add_instance(element)
+            return
+        # We only support elements of str type.
+        assert isinstance(instance, str)
+        # count the frequency of instances.
+        # self.counter[instance] += 1
+        if instance not in self.index2instance:
+            self.instance2index[instance] = len(self.index2instance)
+            self.index2instance.append(instance)
+    def __call__(self, instance,
+                 return_tensors="pt",
+                 is_split_into_words=True,
+                 padding=True,
+                 add_special_tokens=False,
+                 truncation=True,
+                 max_length=512,
+                 **config):
+        if isinstance(instance, (list, tuple)) and isinstance(instance[0], (str)) and is_split_into_words:
+            res = self.get_index(instance)
+            state = INSTANCE_STATE
+        elif isinstance(instance, str) and not is_split_into_words:
+            res = self.get_index(instance.split(" "))
+            state = INSTANCE_STATE
+        elif not is_split_into_words and isinstance(instance, (list, tuple)):
+            res = [self.get_index(ins.split(" ")) for ins in instance]
+            state = BATCH_STATE
+        else:
+            res = [self.get_index(ins) for ins in instance]
+            state = BATCH_STATE
+        res = [r[:max_length] if len(r) >= max_length else r for r in res]
+        pad_id = self.get_index(self.__sign_pad)
+        if padding and state == BATCH_STATE:
+            max_len = max([len(x) for x in instance])
+            for i in range(len(res)):
+                res[i] = res[i] + [pad_id] * (max_len - len(res[i]))
+        if return_tensors == "pt":
+            input_ids = torch.Tensor(res).long()
+            attention_mask = (input_ids != pad_id).long()
+        elif state == BATCH_STATE:
+            input_ids = res
+            attention_mask = [1 if r != pad_id else 0 for batch in res for r in batch]
+        else:
+            input_ids = res
+            attention_mask = [1 if r != pad_id else 0 for r in res]
+        return TokenizedData(input_ids, token_type_ids=attention_mask, attention_mask=attention_mask)
+    def get_index(self, instance):
+        """ Serialize given instance and return.
+        For unknown words, the return index of alphabet
+        depends on variable self.__use_unk:
+            1, If True, then return the index of "<UNK>";
+            2, If False, then return the index of the
+            element that hold max frequency in training data.
+        Args:
+            instance (Any): is given instance or a list of it.
+        Return:
+            Any: the serialization of query instance.
+        """
+        if isinstance(instance, (list, tuple)):
+            return [self.get_index(elem) for elem in instance]
+        assert isinstance(instance, str)
+        try:
+            return self.instance2index[instance]
+        except KeyError:
+            return self.instance2index[self.__sign_unk]
+    def decode(self, index):
+        """ Get corresponding instance of query index.
+        if index is invalid, then throws exception.
+        Args:
+            index (int): is query index, possibly iterable.
+        Returns:
+            is corresponding instance.
+        """
+        if isinstance(index, list):
+            return [self.decode(elem) for elem in index]
+        if isinstance(index, torch.Tensor):
+            index = index.tolist()
+            return self.decode(index)
+        return self.index2instance[index]
+    def decode_batch(self, index, **kargs):
+        """ Get corresponding instance of query index.
+        if index is invalid, then throws exception.
+        Args:
+            index (int): is query index, possibly iterable.
+        Returns:
+            is corresponding instance.
+        """
+        return self.decode(index)
+    def save(self, path):
+        """ Save the content of alphabet to files.
+        There are two kinds of saved files:
+            1, The first is a list file, elements are
+            sorted by the frequency of occurrence.
+            2, The second is a dictionary file, elements
+            are sorted by it serialized index.
+        Args:
+            path (str): is the path to save object.
+        """
+        with open(path, 'w', encoding="utf8") as fw:
+            fw.write(json.dumps({"name": self.__name, "token_map": self.instance2index}))
+    @staticmethod
+    def from_file(path):
+        with open(path, 'r', encoding="utf8") as fw:
+            obj = json.load(fw)
+            tokenizer = WordTokenizer(obj["name"])
+            tokenizer.instance2index = OrderedDict(obj["token_map"])
+            # tokenizer.counter = len(tokenizer.instance2index)
+            tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
+            return tokenizer
+    def __len__(self):
+        return len(self.index2instance)
+    def __str__(self):
+        return 'Alphabet {} contains about {} words: \n\t{}'.format(self.name_or_path, len(self), self.index2instance)
+    def convert_tokens_to_ids(self, tokens):
+        """convert token sequence to intput ids sequence
+        Args:
+            tokens (Any): token sequence
+        Returns:
+            Any: intput ids sequence
+        """
+        try:
+            if isinstance(tokens, (list, tuple)):
+                return [self.instance2index[x] for x in tokens]
+            return self.instance2index[tokens]
+        except KeyError:
+            return self.instance2index[self.__sign_unk]
+class TokenizedData():
+    """tokenized output data with input_ids, token_type_ids, attention_mask
+    """
+    def __init__(self, input_ids, token_type_ids, attention_mask):
+        self.input_ids = input_ids
+        self.token_type_ids = token_type_ids
+        self.attention_mask = attention_mask
+    def word_ids(self, index: int) -> List[int or None]:
+        """ get word id list
+        Args:
+            index (int): word index in sequence
+        Returns:
+            List[int or None]: word id list
+        """
+        return [j if self.attention_mask[index][j] != 0 else None for j, x in enumerate(self.input_ids[index])]
+    def word_to_tokens(self, index, word_id, **kwargs):
+        """map word and tokens
+        Args:
+            index (int): unused
+            word_id (int): word index in sequence
+        """
+        return (word_id, word_id + 1)
+    def to(self, device):
+        """set device
+        Args:
+            device (str): support ["cpu", "cuda"]
+        """
+        self.input_ids = self.input_ids.to(device)
+        self.token_type_ids = self.token_type_ids.to(device)
+        self.attention_mask = self.attention_mask.to(device)
+        return self
+def load_embedding(tokenizer: WordTokenizer, glove_name:str):
+    """ load embedding from standford server or local cache.
+    Args:
+        tokenizer (WordTokenizer): non-pretrained tokenizer
+        glove_name (str): _description_
+    Returns:
+        Any: word embedding
+    """
+    save_path = "save/" + glove_name + ".zip"
+    if not os.path.exists(save_path):
+        download("http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip#" + glove_name, save_path)
+        unzip_file(save_path, "save/" + glove_name)
+    dim = int(glove_name.split(".")[-2][:-1])
+    embedding_list = torch.rand((tokenizer.vocab_size, dim))
+    embedding_list[tokenizer.pad_token_id] = torch.zeros((1, dim))
+    with open("save/" + glove_name + "/" + glove_name, "r", encoding="utf8") as f:
+        for line in f.readlines():
+            datas = line.split(" ")
+            word = datas[0]
+            embedding = torch.Tensor([float(datas[i + 1]) for i in range(len(datas) - 1)])
+            tokenized = tokenizer.convert_tokens_to_ids(word)
+            if isinstance(tokenized, int) and tokenized != tokenizer.unk_token_id:
+                embedding_list[tokenized] = embedding
+    return embedding_list

common/utils.py ADDED Viewed

	@@ -0,0 +1,499 @@

+import functools
+import importlib
+import json
+import os
+import tarfile
+from typing import List, Tuple
+import zipfile
+from collections import Callable
+from ruamel import yaml
+import requests
+import torch
+from torch.nn.utils.rnn import pad_sequence
+from tqdm import tqdm
+from torch import Tensor
+import argparse
+class InputData():
+    """input datas class
+    """
+    def __init__(self, inputs: List =None):
+        """init input datas class
+        if inputs is None:
+            this class can be used to save all InputData in the history by 'merge_input_data(X:InputData)'
+        else:
+            this class can be used for model input.
+        Args:
+            inputs (List, optional): inputs with [tokenized_data, slot, intent]. Defaults to None.
+        """
+        if inputs == None:
+            self.slot = []
+            self.intent = []
+            self.input_ids = None
+            self.token_type_ids = None
+            self.attention_mask = None
+            self.seq_lens = None
+        else:
+            self.input_ids = inputs[0].input_ids
+            self.token_type_ids = None
+            if hasattr(inputs[0], "token_type_ids"):
+                self.token_type_ids = inputs[0].token_type_ids
+            self.attention_mask = inputs[0].attention_mask
+            if len(inputs)>=2:
+                self.slot = inputs[1]
+            if len(inputs)>=3:
+                self.intent = inputs[2]
+            self.seq_lens = self.attention_mask.sum(-1)
+    def get_inputs(self):
+        """ get tokenized_data
+        Returns:
+            dict: tokenized data
+        """
+        res = {
+            "input_ids": self.input_ids,
+            "attention_mask": self.attention_mask
+        }
+        if self.token_type_ids is not None:
+            res["token_type_ids"] = self.token_type_ids
+        return res
+    def merge_input_data(self, inp: "InputData"):
+        """merge another InputData object with slot and intent
+        Args:
+            inp (InputData): another InputData object
+        """
+        self.slot += inp.slot
+        self.intent += inp.intent
+    def get_slot_mask(self, ignore_index:int)->Tensor:
+        """get slot mask
+        Args:
+            ignore_index (int): ignore index used in slot padding
+        Returns:
+            Tensor: mask tensor
+        """
+        mask = self.slot != ignore_index
+        mask[:, 0] = torch.ones_like(mask[:, 0]).to(self.slot.device)
+        return mask
+    def get_item(self, index, tokenizer=None, intent_map=None, slot_map=None, ignore_index = -100):
+        res = {"input_ids": self.input_ids[index]}
+        if tokenizer is not None:
+            res["tokens"] = [tokenizer.decode(x) for x in self.input_ids[index]]
+        if intent_map is not None:
+            intents = self.intent.tolist()
+            if isinstance(intents[index], list):
+                res["intent"] = [intent_map[int(x)] for x in intents[index]]
+            else:
+                res["intent"] = intent_map[intents[index]]
+        if slot_map is not None:
+            res["slot"] = [slot_map[x] if x != ignore_index else "#" for x in self.slot.tolist()[index]]
+        return res
+class OutputData():
+    """output data class
+    """
+    def __init__(self, intent_ids=None, slot_ids=None):
+        """init output data class
+        if intent_ids is None and slot_ids is None:
+            this class can be used to save all OutputData in the history by 'merge_output_data(X:OutputData)'
+        else:
+            this class can be used to model output management.
+        Args:
+            intent_ids (Any, optional): list(Tensor) of intent ids / logits / strings. Defaults to None.
+            slot_ids (Any, optional): list(Tensor) of slot ids / ids / strings. Defaults to None.
+        """
+        if intent_ids is None and slot_ids is None:
+            self.intent_ids = []
+            self.slot_ids = []
+        else:
+            if isinstance(intent_ids, ClassifierOutputData):
+                self.intent_ids = intent_ids.classifier_output
+            else:
+                self.intent_ids = intent_ids
+            if isinstance(slot_ids, ClassifierOutputData):
+                self.slot_ids = slot_ids.classifier_output
+            else:
+                self.slot_ids = slot_ids
+    def map_output(self, slot_map=None, intent_map=None):
+        """ map intent or slot ids to intent or slot string.
+        Args:
+            slot_map (dict, optional): slot id-to-string map. Defaults to None.
+            intent_map (dict, optional): intent id-to-string map. Defaults to None.
+        """
+        if self.slot_ids is not None:
+            if slot_map:
+                self.slot_ids = [[slot_map[x] if x >= 0 else "#" for x in sid] for sid in self.slot_ids]
+        if self.intent_ids is not None:
+            if intent_map:
+                self.intent_ids = [[intent_map[x] for x in sid] if isinstance(sid, list) else intent_map[sid] for sid in
+                                   self.intent_ids]
+    def merge_output_data(self, output:"OutputData"):
+        """merge another OutData object with slot and intent
+        Args:
+            output (OutputData): another OutputData object
+        """
+        if output.slot_ids is not None:
+            self.slot_ids += output.slot_ids
+        if output.intent_ids is not None:
+            self.intent_ids += output.intent_ids
+    def save(self, path:str, original_dataset=None):
+        """ save all OutputData in the history
+        Args:
+            path (str): save dir path
+            original_dataset(Iterable): original dataset
+        """
+        # with open(f"{path}/intent.jsonl", "w") as f:
+        #     for x in self.intent_ids:
+        #         f.write(json.dumps(x) + "\n")
+        with open(f"{path}/outputs.jsonl", "w") as f:
+            if original_dataset is not None:
+                for i, s, d in zip(self.intent_ids, self.slot_ids, original_dataset):
+                    f.write(json.dumps({"pred_intent": i, "pred_slot": s, "text": d["text"], "golden_intent":d["intent"], "golden_slot":d["slot"]}) + "\n")
+            else:
+                for i, s in zip(self.intent_ids, self.slot_ids):
+                    f.write(json.dumps({"pred_intent": i, "pred_slot": s}) + "\n")
+class HiddenData():
+    """Interactive data structure for all model components
+    """
+    def __init__(self, intent_hidden, slot_hidden):
+        """init hidden data structure
+        Args:
+            intent_hidden (Any): sentence-level or intent hidden state
+            slot_hidden (Any): token-level or slot hidden state
+        """
+        self.intent_hidden = intent_hidden
+        self.slot_hidden = slot_hidden
+        self.inputs = None
+        self.embedding = None
+    def get_intent_hidden_state(self):
+        """get intent hidden state
+        Returns:
+            Any: intent hidden state
+        """
+        return self.intent_hidden
+    def get_slot_hidden_state(self):
+        """get slot hidden state
+        Returns:
+            Any: slot hidden state
+        """
+        return self.slot_hidden
+    def update_slot_hidden_state(self, hidden_state):
+        """update slot hidden state
+        Args:
+            hidden_state (Any): slot hidden state to update
+        """
+        self.slot_hidden = hidden_state
+    def update_intent_hidden_state(self, hidden_state):
+        """update intent hidden state
+        Args:
+            hidden_state (Any): intent hidden state to update
+        """
+        self.intent_hidden = hidden_state
+    def add_input(self, inputs: InputData or "HiddenData"):
+        """add last model component input information to next model component
+        Args:
+            inputs (InputDataor or HiddenData): last model component input
+        """
+        self.inputs = inputs
+    def add_embedding(self, embedding):
+        self.embedding = embedding
+class ClassifierOutputData():
+    """Classifier output data structure of all classifier components
+    """
+    def __init__(self, classifier_output):
+        self.classifier_output = classifier_output
+        self.output_embedding = None
+def remove_slot_ignore_index(inputs:InputData, outputs:OutputData, ignore_index=-100):
+    """ remove padding or extra token in input id and output id
+    Args:
+        inputs (InputData): input data with input id
+        outputs (OutputData): output data with decoded output id
+        ignore_index (int, optional): ignore_index in input_ids. Defaults to -100.
+    Returns:
+        InputData: input data removed padding or extra token
+        OutputData: output data removed padding or extra token
+    """
+    for index, (inp_ss, out_ss) in enumerate(zip(inputs.slot, outputs.slot_ids)):
+        temp_inp = []
+        temp_out = []
+        for inp_s, out_s in zip(list(inp_ss), list(out_ss)):
+            if inp_s != ignore_index:
+                temp_inp.append(inp_s)
+                temp_out.append(out_s)
+        inputs.slot[index] = temp_inp
+        outputs.slot_ids[index] = temp_out
+    return inputs, outputs
+def pack_sequence(inputs:Tensor, seq_len:Tensor or List) -> Tensor:
+    """pack sequence data to packed data without padding.
+    Args:
+        inputs (Tensor): list(Tensor) of packed sequence inputs
+        seq_len (Tensor or List): list(Tensor) of sequence length
+    Returns:
+        Tensor: packed inputs
+    Examples:
+        inputs = [[x, y, z, PAD, PAD], [x, y, PAD, PAD, PAD]]
+        seq_len = [3,2]
+        return -> [x, y, z, x, y]
+    """
+    output = []
+    for index, batch in enumerate(inputs):
+        output.append(batch[:seq_len[index]])
+    return torch.cat(output, dim=0)
+def unpack_sequence(inputs:Tensor, seq_lens:Tensor or List, padding_value=0) -> Tensor:
+    """unpack sequence data.
+    Args:
+        inputs (Tensor): list(Tensor) of packed sequence inputs
+        seq_lens (Tensor or List): list(Tensor) of sequence length
+        padding_value (int, optional): padding value. Defaults to 0.
+    Returns:
+        Tensor: unpacked inputs
+    Examples:
+        inputs = [x, y, z, x, y]
+        seq_len = [3,2]
+        return -> [[x, y, z, PAD, PAD], [x, y, PAD, PAD, PAD]]
+    """
+    last_idx = 0
+    output = []
+    for _, seq_len in enumerate(seq_lens):
+        output.append(inputs[last_idx:last_idx + seq_len])
+        last_idx = last_idx + seq_len
+    return pad_sequence(output, batch_first=True, padding_value=padding_value)
+def get_dict_with_key_prefix(input_dict: dict, prefix=""):
+    res = {}
+    for t in input_dict:
+        res[t + prefix] = input_dict[t]
+    return res
+def download(url: str, fname: str):
+    """download file from url to fname
+    Args:
+        url (str): remote server url path
+        fname (str): local path to save
+    """
+    resp = requests.get(url, stream=True)
+    total = int(resp.headers.get('content-length', 0))
+    with open(fname, 'wb') as file, tqdm(
+            desc=fname,
+            total=total,
+            unit='iB',
+            unit_scale=True,
+            unit_divisor=1024,
+    ) as bar:
+        for data in resp.iter_content(chunk_size=1024):
+            size = file.write(data)
+            bar.update(size)
+def tar_gz_data(file_name:str):
+    """use "tar.gz" format to compress data
+    Args:
+        file_name (str): file path to tar
+    """
+    t = tarfile.open(f"{file_name}.tar.gz", "w:gz")
+    for root, dir, files in os.walk(f"{file_name}"):
+        print(root, dir, files)
+        for file in files:
+            fullpath = os.path.join(root, file)
+            t.add(fullpath)
+    t.close()
+def untar(fname:str, dirs:str):
+    """ uncompress "tar.gz" file
+    Args:
+        fname (str): file path to untar
+        dirs (str): target dir path
+    """
+    t = tarfile.open(fname)
+    t.extractall(path=dirs)
+def unzip_file(zip_src:str, dst_dir:str):
+    """ uncompress "zip" file
+    Args:
+        fname (str): file path to unzip
+        dirs (str): target dir path
+    """
+    r = zipfile.is_zipfile(zip_src)
+    if r:
+        if not os.path.exists(dst_dir):
+            os.mkdir(dst_dir)
+        fz = zipfile.ZipFile(zip_src, 'r')
+        for file in fz.namelist():
+            fz.extract(file, dst_dir)
+    else:
+        print('This is not zip')
+def find_callable(target: str) -> Callable:
+    """ find callable function / class to instantiate
+    Args:
+        target (str): class/module path
+    Raises:
+        e: can not import module
+    Returns:
+        Callable: return function / class
+    """
+    target_module_path, target_callable_path = target.rsplit(".", 1)
+    target_callable_paths = [target_callable_path]
+    target_module = None
+    while len(target_module_path):
+        try:
+            target_module = importlib.import_module(target_module_path)
+            break
+        except Exception as e:
+            raise e
+    target_callable = target_module
+    for attr in reversed(target_callable_paths):
+        target_callable = getattr(target_callable, attr)
+    return target_callable
+def instantiate(config, target="_model_target_", partial="_model_partial_"):
+    """ instantiate object by config.
+    Modified from https://github.com/HIT-SCIR/ltp/blob/main/python/core/ltp_core/models/utils/instantiate.py.
+    Args:
+        config (Any): configuration
+        target (str, optional): key to assign the class to be instantiated. Defaults to "_model_target_".
+        partial (str, optional): key to judge object whether should be instantiated partially. Defaults to "_model_partial_".
+    Returns:
+        Any: instantiated object
+    """
+    if isinstance(config, dict) and target in config:
+        target_path = config.get(target)
+        target_callable = find_callable(target_path)
+        is_partial = config.get(partial, False)
+        target_args = {
+            key: instantiate(value)
+            for key, value in config.items()
+            if key not in [target, partial]
+        }
+        if is_partial:
+            return functools.partial(target_callable, **target_args)
+        else:
+            return target_callable(**target_args)
+    elif isinstance(config, dict):
+        return {key: instantiate(value) for key, value in config.items()}
+    else:
+        return config
+def load_yaml(file):
+    """ load data from yaml files.
+    Args:
+        file (str): yaml file path.
+    Returns:
+        Any: data
+    """
+    with open(file, encoding="utf-8") as stream:
+        try:
+            return yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            raise exc
+def from_configured(configure_name_or_file:str, model_class:Callable, config_prefix="./config/", **input_config):
+    """load module from pre-configured data
+    Args:
+        configure_name_or_file (str): config path -> {config_prefix}/{configure_name_or_file}.yaml
+        model_class (Callable): module class
+        config_prefix (str, optional): configuration root path. Defaults to "./config/".
+    Returns:
+        Any: instantiated object.
+    """
+    if os.path.exists(configure_name_or_file):
+        configure_file=configure_name_or_file
+    else:
+        configure_file= os.path.join(config_prefix, configure_name_or_file+".yaml")
+    config = load_yaml(configure_file)
+    config.update(input_config)
+    return model_class(**config)
+def save_json(file_path, obj):
+    with open(file_path, 'w', encoding="utf8") as fw:
+            fw.write(json.dumps(obj))
+def load_json(file_path):
+    with open(file_path, 'r', encoding="utf8") as fw:
+        res =json.load(fw)
+    return res
+def str2bool(v):
+    if isinstance(v, bool):
+       return v
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')

config/README.md ADDED Viewed

	@@ -0,0 +1,348 @@

+# Configuation
+## 1. Introduction
+Configuration is divided into fine-grained reusable modules:
+- `base`: basic configuration
+- `logger`: logger setting
+- `model_manager`: loading and saving model parameters
+- `accelerator`: whether to enable multi-GPU
+- `dataset`: dataset management
+- `evaluator`: evaluation and metrics setting.
+- `tokenizer`: Tokenizer initiation and tokenizing setting.
+- `optimizer`: Optimizer initiation setting.
+- `scheduler`: scheduler initiation setting.
+- `model`: model construction setting.
+From Sec. 2 to Sec. 11, we will describe the configuration in detail. Or you can see [Examples](examples/README.md) for Quick Start.
+NOTE: `_*_` config are reserved fields in OpenSLU.
+## Configuration Item Script
+In OpenSLU configuration, we support simple calculation script for each configuration item. For example, we can get `dataset_name` by using `{dataset.dataset_name}`, and fill its value into python script `'LightChen2333/agif-slu-' + '*'`.(Without '', `{dataset.dataset_name}` value will be treated as a variable).
+NOTE: each item with `{}` will be treated as python script.
+```yaml
+tokenizer:
+  _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"  # Support simple calculation script
+```
+## `base` Config
+```yaml
+# `start_time` will generated automatically when start any config script, needless to be assigned.
+# start_time: xxxxxxxx
+base:
+  name: "OpenSLU"                  # project/logger name
+  multi_intent: false              # whether to enable multi-intent setting
+  train: True                      # enable train else enable zero-shot
+  test: True                       # enable test during train.
+  device: cuda                     # device for cuda/cpu
+  seed: 42                         # random seed
+  best_key: EMA                    # save model by which metric[intent_acc/slot_f1/EMA]
+  tokenizer_name: word_tokenizer   # tokenizer: word_tokenizer for no pretrained model, else use [AutoTokenizer] tokenizer name
+  add_special_tokens: false        # whether add [CLS], [SEP] special tokens
+  epoch_num: 300                   # train epoch num
+#  eval_step: 280                  # if eval_by_epoch = false and eval_step > 0, will evaluate model by steps
+  eval_by_epoch: true              # evaluate model by epoch
+  batch_size: 16                   # batch size
+```
+## `logger` Config
+```yaml
+logger:
+  # `wandb` is supported both in single- multi-GPU,
+  # `tensorboard` is only supported in multi-GPU,
+  # and `fitlog` is only supported in single-GPU
+  logger_type: wandb
+```
+## `model_manager` Config
+```yaml
+model_manager:
+  # if load_dir != `null`, OpenSLU will try to load checkpoint to continue training,
+  # if load_dir == `null`, OpenSLU will restart training.
+  load_dir: null
+  # The dir path to save model and training state.
+  # if save_dir == `null` model will be saved to `save/{start_time}`
+  save_dir: save/stack
+  # save_mode can be selected in [save-by-step, save-by-eval]
+  # `save-by-step` means save model only by {save_step} steps without evaluation.
+  # `save-by-eval` means save model by best validation performance
+  save_mode: save-by-eval
+  # save_step: 100         # only enabled when save_mode == `save-by-step`
+  max_save_num: 1          # The number of best models will be saved.
+```
+## `accelerator` Config
+```yaml
+accelerator:
+  use_accelerator: false   # will enable `accelerator` if use_accelerator is `true`
+```
+## `dataset` Config
+```yaml
+dataset:
+  # support load model from hugging-face.
+  # dataset_name can be selected in [atis, snips, mix-atis, mix-snips]
+  dataset_name: atis
+  # support assign any one of dataset path and other dataset split is the same as split in `dataset_name`
+  # train: atis # support load model from hugging-face or assigned local data path.
+  # validation: {root}/ATIS/dev.jsonl
+  # test: {root}/ATIS/test.jsonl
+```
+## `evaluator` Config
+```yaml
+evaluator:
+  best_key: EMA        # the metric to judge the best model
+  eval_by_epoch: true   # Evaluate after an epoch if `true`.
+  # Evaluate after {eval_step} steps if eval_by_epoch == `false`.
+  # eval_step: 1800
+  # metric is supported the metric as below:
+  # - intent_acc
+  # - slot_f1
+  # - EMA
+  # - intent_f1
+  # - macro_intent_f1
+  # - micro_intent_f1
+  # NOTE: [intent_f1, macro_intent_f1, micro_intent_f1] is only supported in multi-intent setting. intent_f1 and macro_intent_f1 is the same metric.
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+```
+## `tokenizer` Config
+```yaml
+tokenizer:
+  # Init tokenizer. Support `word_tokenizer` and other tokenizers in huggingface.
+    _tokenizer_name_: word_tokenizer
+    # if `_tokenizer_name_` is not assigned, you can load pretrained tokenizer from hugging-face.
+    # _from_pretrained_: LightChen2333/stack-propagation-slu-atis
+    _padding_side_: right            # the padding side of tokenizer, support [left/ right]
+    # Align mode between text and slot, support [fast/ general],
+    # `general` is supported in most tokenizer, `fast` is supported only in small portion of tokenizers.
+    _align_mode_: fast
+    _to_lower_case_: true
+    add_special_tokens: false        # other tokenizer args, you can add other args to tokenizer initialization except `_*_` format args
+    max_length: 512
+```
+## `optimizer` Config
+```yaml
+optimizer:
+  _model_target_: torch.optim.Adam # Optimizer class/ function return Optimizer object
+  _model_partial_: true            # partial load configuration. Here will add model.parameters() to complete all Optimizer parameters
+  lr: 0.001                        # learning rate
+  weight_decay: 1e-6               # weight decay
+```
+## `scheduler` Config
+```yaml
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true     # partial load configuration. Here will add optimizer, num_training_steps to complete all Optimizer parameters
+  name : "linear"
+  num_warmup_steps: 0
+```
+## `model` Config
+```yaml
+model:
+  # _from_pretrained_: LightChen2333/stack-propagation-slu-atis # load model from hugging-face and is not need to assigned any parameters below.
+  _model_target_: model.OpenSLUModel # the general model class, can automatically build the model through configuration.
+  encoder:
+    _model_target_: model.encoder.AutoEncoder # auto-encoder to autoload provided encoder model
+    encoder_name: self-attention-lstm         # support [lstm/ self-attention-lstm] and other pretrained models those hugging-face supported
+    embedding:                                # word embedding layer
+#      load_embedding_name: glove.6B.300d.txt  # support autoload glove embedding.
+      embedding_dim: 256                      # embedding dim
+      dropout_rate: 0.5                       # dropout ratio after embedding
+    lstm:
+      layer_num: 1                           # lstm configuration
+      bidirectional: true
+      output_dim: 256                        # module should set output_dim for autoload input_dim in next module. You can also set input_dim manually.
+      dropout_rate: 0.5
+    attention:                              # self-attention configuration
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.5
+    return_with_input: true                # add inputs information, like attention_mask, to decoder module.
+    return_sentence_level_hidden: false    # if return sentence representation to decoder module
+  decoder:
+    _model_target_: model.decoder.StackPropagationDecoder  # decoder name
+    interaction:
+      _model_target_: model.decoder.interaction.StackInteraction # interaction module name
+      differentiable: false                                      # interaction module config
+    intent_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier # intent classifier module name
+      layer_num: 1
+      bidirectional: false
+      hidden_dim: 64
+      force_ratio: 0.9                                        # teacher-force ratio
+      embedding_dim: 8                                        # intent embedding dim
+      ignore_index: -100                                      # ignore index to compute loss and metric
+      dropout_rate: 0.5
+      mode: "token-level-intent"                              # decode mode, support [token-level-intent, intent, slot]
+      use_multi: "{base.multi_intent}"
+      return_sentence_level: true                             # whether to return sentence level prediction as decoded input
+    slot_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: 64
+      embedding_dim: 32
+      ignore_index: -100
+      dropout_rate: 0.5
+      mode: "slot"
+      use_multi: false
+      return_sentence_level: false
+```
+## Implementing a New Model
+### 1. Interaction Re-Implement
+Here we take `DCA-Net` as an example:
+In most cases, you just need to rewrite `Interaction` module:
+```python
+from common.utils import HiddenData
+from model.decoder.interaction import BaseInteraction
+class DCANetInteraction(BaseInteraction):
+    def __init__(self, **config):
+        super().__init__(**config)
+        self.T_block1 = I_S_Block(self.config["output_dim"], self.config["attention_dropout"], self.config["num_attention_heads"])
+        ...
+    def forward(self, encode_hidden: HiddenData, **kwargs):
+        ...
+```
+and then you should configure your module:
+```yaml
+base:
+  ...
+optimizer:
+  ...
+scheduler:
+  ...
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: lstm
+    embedding:
+      load_embedding_name: glove.6B.300d.txt
+      embedding_dim: 300
+      dropout_rate: 0.5
+    lstm:
+      dropout_rate: 0.5
+      output_dim: 128
+      layer_num: 2
+      bidirectional: true
+    output_dim: "{model.encoder.lstm.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.DCANetDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.DCANetInteraction
+      output_dim: "{model.encoder.output_dim}"
+      attention_dropout: 0.5
+      num_attention_heads: 8
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      input_dim: "{model.decoder.output_dim.output_dim}"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      input_dim: "{model.decoder.output_dim.output_dim}"
+      ignore_index: -100
+```
+Oops, you finish all model construction. You can run script as follows to train model:
+```shell
+python run.py -cp config/dca_net.yaml [-ds atis]
+```
+### 2. Decoder Re-Implement
+Sometimes, `interaction then classification` order can not meet your needs. Therefore, you should simply rewrite decoder for flexible interaction order:
+Here, we take `stack-propagation` as an example:
+1. We should rewrite interaction module for `stack-propagation`
+```python
+from common.utils import ClassifierOutputData, HiddenData
+from model.decoder.interaction.base_interaction import BaseInteraction
+class StackInteraction(BaseInteraction):
+    def __init__(self, **config):
+        super().__init__(**config)
+        ...
+    def forward(self, intent_output: ClassifierOutputData, encode_hidden: HiddenData):
+        ...
+```
+2. We should rewrite `StackPropagationDecoder` for stack-propagation interaction order:
+```python
+from common.utils import HiddenData, OutputData
+class StackPropagationDecoder(BaseDecoder):
+    def forward(self, hidden: HiddenData):
+        pred_intent = self.intent_classifier(hidden)
+        hidden = self.interaction(pred_intent, hidden)
+        pred_slot = self.slot_classifier(hidden)
+        return OutputData(pred_intent, pred_slot)
+```
+3. Then we can easily combine general model by `config/stack-propagation.yaml` configuration file:
+```yaml
+base:
+  ...
+...
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    ...
+  decoder:
+    _model_target_: model.decoder.StackPropagationDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.StackInteraction
+      differentiable: false
+    intent_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      ... # parameters needed __init__(*)
+      mode: "token-level-intent"
+      use_multi: false
+      return_sentence_level: true
+    slot_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      ... # parameters needed __init__(*)
+      mode: "slot"
+      use_multi: false
+      return_sentence_level: false
+```
+4. You can run script as follows to train model:
+```shell
+python run.py -cp config/stack-propagation.yaml
+```

config/app.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+host: 127.0.0.1
+port: 7860
+is_push_to_public: false
+save-path: save/stack/outputs.jsonl
+page-size: 2

config/decoder/interaction/stack-propagation.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ differentiable: false

config/examples/README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+# Examples
+Here we introduce some usage of our famework by configuration.
+## Reload to train
+Firstly, you can run this script to train a `joint-bert` model:
+```shell
+python run.py -cp config/examples/normal.yaml
+```
+and you can use `kill` or `Ctrl+C` to kill the training process.
+Then, to reload model and continue training, you can run `reload_to_train.yaml` to reload checkpoint and training state.
+```shell
+python run.py -cp config/examples/reload_to_train.yaml
+```
+The main difference in `reload_to_train.yaml` is the `model_manager` configuration item:
+```yaml
+...
+model_manager:
+  load_train_state: True    # set to True
+  load_dir: save/joint_bert # not null
+  ...
+...
+```
+## Load from Pre-finetuned model.
+We upload all models to [LightChen2333](https://huggingface.co/LightChen2333). You can load those model by simple configuration.
+In `from_pretrained.yaml` and `from_pretrained_multi.yaml`, we show two example scripts to load from hugging face in single- and multi-intent, respectively. The key configuration items are as below:
+```yaml
+tokenizer:
+  _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"  # Support simple calculation script
+model:
+  _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
+```

config/examples/from_pretrained.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  train: false
+  test: true
+  device: cpu
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+logger:
+  logger_type: local # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
+model_manager:
+  load_dir: null
+  save_dir: save/joint_bert
+  save_mode: save-by-eval # save-by-step
+  # save_step: 100
+  max_save_num: 1
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+tokenizer:
+  _from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"

config/examples/from_pretrained_multi.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: false
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+logger:
+  logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
+model_manager:
+  load_dir: null
+  save_dir: save/joint_bert
+  save_mode: save-by-eval # save-by-step
+  # save_step: 100
+  max_save_num: 1
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+tokenizer:
+  _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"

config/examples/normal.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLU-test"
+  train: True
+  test: True
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 128
+model_manager:
+  load_dir: null
+  save_dir: save/joint_bert
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+tokenizer:
+  _tokenizer_name_: bert-base-uncased
+  _padding_side_: right
+  _align_mode_: general
+  add_special_tokens: true
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 4e-6
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: bert-base-uncased
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/examples/reload_to_train.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLU-test"
+  train: True
+  test: True
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 128
+model_manager:
+  load_train_state: True
+  load_dir: save/joint_bert
+  save_dir: save/joint_bert
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+tokenizer:
+  _tokenizer_name_: bert-base-uncased
+  _padding_side_: right
+  _align_mode_: general
+  add_special_tokens: true
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 4e-6
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: bert-base-uncased
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/bi-model.yaml ADDED Viewed

	@@ -0,0 +1,106 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/bi-model-atis
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+tokenizer:
+  _tokenizer_name_: word_tokenizer
+  _padding_side_: right
+  _align_mode_: fast
+  add_special_tokens: false
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.BiEncoder
+    intent_encoder:
+      _model_target_: model.encoder.AutoEncoder
+      encoder_name: lstm
+      embedding:
+        embedding_dim: 256
+        dropout_rate: 0.4
+      lstm:
+        dropout_rate: 0.5
+        output_dim: 256
+        layer_num: 2
+        bidirectional: true
+      return_with_input: true
+      return_sentence_level_hidden: false
+    slot_encoder:
+      _model_target_: model.encoder.AutoEncoder
+      encoder_name: lstm
+      embedding:
+        embedding_dim: 256
+        dropout_rate: 0.4
+      lstm:
+        dropout_rate: 0.5
+        output_dim: 256
+        layer_num: 2
+        bidirectional: true
+      return_with_input: true
+      return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.BaseDecoder
+#    teacher_forcing: true
+    interaction:
+      _model_target_: model.decoder.interaction.BiModelInteraction
+      output_dim: 256
+      dropout_rate: 0.4
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/dca-net.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+device: "Tesla P100-PCIE-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/dca-net-atis
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: lstm
+    embedding:
+      load_embedding_name: glove.6B.300d.txt
+      embedding_dim: 300
+      dropout_rate: 0.5
+    lstm:
+      dropout_rate: 0.5
+      output_dim: 128
+      layer_num: 2
+      bidirectional: true
+    output_dim: "{model.encoder.lstm.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.DCANetDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.DCANetInteraction
+      output_dim: "{model.encoder.output_dim}"
+      attention_dropout: 0.5
+      num_attention_heads: 8
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      input_dim: "{model.encoder.output_dim}"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      input_dim: "{model.encoder.output_dim}"
+      ignore_index: -100

config/reproduction/atis/deberta.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+model_manager:
+  load_dir: null
+  save_dir: save/deberta-atis
+dataset:
+  dataset_name: atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+tokenizer:
+  _tokenizer_name_: microsoft/deberta-v3-base
+  _padding_side_: right
+  add_special_tokens: true
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 2e-5
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: microsoft/deberta-v3-base
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/electra.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: True
+  test: True
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+model_manager:
+  load_dir: null
+  save_dir: save/electra-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+dataset:
+  dataset_name: atis
+tokenizer:
+  _tokenizer_name_: google/electra-small-discriminator
+  _padding_side_: right
+  add_special_tokens: true
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 2e-5
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: google/electra-small-discriminator
+    output_dim: 256
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/joint-bert.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: True
+  test: True
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 128
+model_manager:
+  load_dir: null
+  save_dir: save/joint-bert-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+tokenizer:
+  _tokenizer_name_: bert-base-uncased
+  _padding_side_: right
+  _align_mode_: general
+  add_special_tokens: true
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 4e-6
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: bert-base-uncased
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/roberta.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+device: "Tesla V100-SXM2-16GB" #Useless info
+base:
+  name: "OpenSLUv1"
+  train: True
+  test: True
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+model_manager:
+  load_dir: null
+  save_dir: save/roberta-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+tokenizer:
+  _tokenizer_name_: roberta-base
+  _padding_side_: right
+  add_special_tokens: true
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 2e-5
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: roberta-base
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/slot-gated.yaml ADDED Viewed

	@@ -0,0 +1,87 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/slot-gated-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+tokenizer:
+  _tokenizer_name_: word_tokenizer
+  _padding_side_: right
+  _align_mode_: fast
+  add_special_tokens: false
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: lstm
+    embedding:
+      embedding_dim: 256
+      dropout_rate: 0.4
+    lstm:
+      dropout_rate: 0.5
+      output_dim: 256
+      layer_num: 2
+      bidirectional: true
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.BaseDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.SlotGatedInteraction
+      remove_slot_attn: false
+      output_dim: 256
+      dropout_rate: 0.4
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/atis/stack-propagation.yaml ADDED Viewed

	@@ -0,0 +1,109 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/stack-propagation-atis
+  save_mode: save-by-eval # save-by-step
+  # save_step: 100
+  max_save_num: 1
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+tokenizer:
+  _tokenizer_name_: word_tokenizer
+  _padding_side_: right
+  _align_mode_: fast
+  _to_lower_case_: true
+  add_special_tokens: false
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 256
+      dropout_rate: 0.55
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.5
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.6
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.StackPropagationDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.StackInteraction
+      differentiable: false
+    intent_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: 64
+      embedding_dim: 8
+      ignore_index: -100
+      dropout_rate: 0.5
+      mode: "token-level-intent"
+      use_multi: false
+      return_sentence_level: true
+    slot_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: 64
+      embedding_dim: 32
+      ignore_index: -100
+      dropout_rate: 0.55
+      mode: "slot"
+      use_multi: false
+      return_sentence_level: false

config/reproduction/mix-atis/agif.yaml ADDED Viewed

	@@ -0,0 +1,133 @@

+device: "NVIDIA GeForce RTX 3080"
+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 100
+  batch_size: 32
+  ignore_index: -100
+model_manager:
+  load_dir: null
+  save_dir: save/agif-mix-atis
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: mix-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - intent_f1
+    - slot_f1
+    - EMA
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 128
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    unflat_attention:
+      dropout_rate: 0.4
+    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.AGIFDecoder
+#    teacher_forcing: true
+    interaction:
+      _model_target_: model.decoder.interaction.AGIFInteraction
+      intent_embedding_dim: 128
+      input_dim: "{model.encoder.output_dim}"
+      hidden_dim: 128
+      output_dim: "{model.decoder.interaction.intent_embedding_dim}"
+      dropout_rate: 0.4
+      alpha: 0.2
+      num_heads: 4
+      num_layers: 2
+      row_normalized: true
+    intent_classifier:
+      _model_target_: model.decoder.classifier.MLPClassifier
+      mode: "intent"
+      mlp:
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.encoder.output_dim}"
+          out_features: 256
+        - _model_target_: torch.nn.LeakyReLU
+          negative_slope: 0.2
+        - _model_target_: torch.nn.Linear
+          in_features: 256
+          out_features: "{base.intent_label_num}"
+      dropout_rate: 0.4
+      loss_fn:
+        _model_target_: torch.nn.BCEWithLogitsLoss
+      use_multi: "{base.multi_intent}"
+      multi_threshold: 0.5
+      return_sentence_level: true
+      ignore_index: -100
+      weight: 0.3
+    slot_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      mode: "slot"
+      input_dim: "{model.encoder.output_dim}"
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
+      embedding_dim: 128
+#      loss_fn:
+#        _model_target_: torch.nn.NLLLoss
+      ignore_index: -100
+      dropout_rate: 0.4
+      use_multi: false
+      multi_threshold: 0.5
+      return_sentence_level: false
+      weight: 0.7

config/reproduction/mix-atis/gl-gin.yaml ADDED Viewed

	@@ -0,0 +1,128 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+  ignore_index: -100
+model_manager:
+  load_dir: null
+  save_dir: save/gl-gin-mix-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - intent_f1
+    - slot_f1
+    - EMA
+dataset:
+  dataset_name: mix-atis
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 128
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.GLGINDecoder
+    dropout_rate: 0.4
+    interaction:
+      _model_target_: model.decoder.interaction.GLGINInteraction
+      intent_embedding_dim: 64
+      input_dim: "{model.encoder.output_dim}"
+      hidden_dim: 256
+      output_dim: "{model.decoder.interaction.intent_embedding_dim}"
+      dropout_rate: 0.4
+      alpha: 0.2
+      num_heads: 8
+      num_layers: 2
+      row_normalized: true
+      slot_graph_window: 1
+      intent_label_num: "{base.intent_label_num}"
+    intent_classifier:
+      _model_target_: model.decoder.classifier.MLPClassifier
+      mode: "token-level-intent"
+      mlp:
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.encoder.output_dim}"
+          out_features: 256
+        - _model_target_: torch.nn.LeakyReLU
+          negative_slope: 0.2
+        - _model_target_: torch.nn.Linear
+          in_features: 256
+          out_features: "{base.intent_label_num}"
+      loss_fn:
+        _model_target_: torch.nn.BCEWithLogitsLoss
+      dropout_rate: 0.4
+      use_multi: "{base.multi_intent}"
+      multi_threshold: 0.5
+      return_sentence_level: true
+      ignore_index: "{base.ignore_index}"
+    slot_classifier:
+      _model_target_: model.decoder.classifier.MLPClassifier
+      mode: "slot"
+      mlp:
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.decoder.interaction.output_dim}"
+          out_features: "{model.decoder.interaction.output_dim}"
+        - _model_target_: torch.nn.LeakyReLU
+          negative_slope: 0.2
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.decoder.interaction.output_dim}"
+          out_features: "{base.slot_label_num}"
+      ignore_index: "{base.ignore_index}"
+      dropout_rate: 0.4
+      use_multi: false
+      multi_threshold: 0.5
+      return_sentence_level: false

config/reproduction/mix-atis/vanilla.yaml ADDED Viewed

	@@ -0,0 +1,95 @@

+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 100
+  batch_size: 16
+  ignore_index: -100
+model_manager:
+  load_dir: null
+  save_dir: save/vanilla-mix-atis
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - intent_f1
+    - slot_f1
+    - EMA
+dataset:
+  dataset_name: atis
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 128
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      input_dim: "{model.encoder.output_dim}"
+      loss_fn:
+        _model_target_: torch.nn.BCEWithLogitsLoss
+      use_multi: "{base.multi_intent}"
+      multi_threshold: 0.5
+      return_sentence_level: true
+      ignore_index: "{base.ignore_index}"
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      input_dim: "{model.encoder.output_dim}"
+      use_multi: false
+      multi_threshold: 0.5
+      ignore_index: "{base.ignore_index}"
+      return_sentence_level: false

config/reproduction/mix-snips/agif.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+device: "Tesla P100-PCIE-16GB"
+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 50
+  batch_size: 64
+  ignore_index: -100
+model_manager:
+  load_dir: null
+  save_dir: save/agif-mix-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - intent_f1
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: mix-snips
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 128
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    unflat_attention:
+      dropout_rate: 0.4
+    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.AGIFDecoder
+#    teacher_forcing: true
+    interaction:
+      _model_target_: model.decoder.interaction.AGIFInteraction
+      intent_embedding_dim: 128
+      input_dim: "{model.encoder.output_dim}"
+      hidden_dim: 128
+      output_dim: "{model.decoder.interaction.intent_embedding_dim}"
+      dropout_rate: 0.4
+      alpha: 0.2
+      num_heads: 4
+      num_layers: 2
+      row_normalized: true
+    intent_classifier:
+      _model_target_: model.decoder.classifier.MLPClassifier
+      mode: "intent"
+      mlp:
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.encoder.output_dim}"
+          out_features: 256
+        - _model_target_: torch.nn.LeakyReLU
+          negative_slope: 0.2
+        - _model_target_: torch.nn.Linear
+          in_features: 256
+          out_features: "{base.intent_label_num}"
+      dropout_rate: 0.4
+      loss_fn:
+        _model_target_: torch.nn.BCEWithLogitsLoss
+      use_multi: "{base.multi_intent}"
+      multi_threshold: 0.5
+      return_sentence_level: true
+      ignore_index: -100
+      weight: 0.3
+    slot_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      mode: "slot"
+      input_dim: "{model.encoder.output_dim}"
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
+      embedding_dim: 128
+      ignore_index: -100
+      dropout_rate: 0.4
+      use_multi: false
+      multi_threshold: 0.5
+      return_sentence_level: false
+      weight: 0.7

config/reproduction/mix-snips/gl-gin.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 50
+  batch_size: 32
+  ignore_index: -100
+model_manager:
+  load_dir: null
+  save_dir: save/gl-gin-mix-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: false
+  eval_step: 1800
+  metric:
+    - intent_acc
+    - intent_f1
+    - slot_f1
+    - EMA
+dataset:
+  dataset_name: mix-snips
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 128
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 2
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.GLGINDecoder
+    dropout_rate: 0.4
+    interaction:
+      _model_target_: model.decoder.interaction.GLGINInteraction
+      intent_embedding_dim: 256
+      input_dim: "{model.encoder.output_dim}"
+      hidden_dim: 256
+      output_dim: "{model.decoder.interaction.intent_embedding_dim}"
+      dropout_rate: 0.4
+      alpha: 0.2
+      num_heads: 4
+      num_layers: 2
+      row_normalized: true
+      slot_graph_window: 1
+      intent_label_num: "{base.intent_label_num}"
+    intent_classifier:
+      _model_target_: model.decoder.classifier.MLPClassifier
+      mode: "token-level-intent"
+      mlp:
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.encoder.output_dim}"
+          out_features: 256
+        - _model_target_: torch.nn.LeakyReLU
+          negative_slope: 0.2
+        - _model_target_: torch.nn.Linear
+          in_features: 256
+          out_features: "{base.intent_label_num}"
+      loss_fn:
+        _model_target_: torch.nn.BCEWithLogitsLoss
+      dropout_rate: 0.4
+      use_multi: "{base.multi_intent}"
+      multi_threshold: 0.5
+      return_sentence_level: true
+      ignore_index: "{base.ignore_index}"
+      weight: 0.2
+    slot_classifier:
+      _model_target_: model.decoder.classifier.MLPClassifier
+      mode: "slot"
+      mlp:
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.decoder.interaction.output_dim}"
+          out_features: "{model.decoder.interaction.output_dim}"
+        - _model_target_: torch.nn.LeakyReLU
+          negative_slope: 0.2
+        - _model_target_: torch.nn.Linear
+          in_features: "{model.decoder.interaction.output_dim}"
+          out_features: "{base.slot_label_num}"
+      ignore_index: "{base.ignore_index}"
+      dropout_rate: 0.4
+      use_multi: false
+      multi_threshold: 0.5
+      weight: 0.8
+      return_sentence_level: false

config/reproduction/mix-snips/vanilla.yaml ADDED Viewed

	@@ -0,0 +1,95 @@

+base:
+  name: "OpenSLUv1"
+  multi_intent: true
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 100
+  batch_size: 16
+  ignore_index: -100
+model_manager:
+  load_dir: null
+  save_dir: save/vanilla-mix-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - intent_f1
+    - slot_f1
+    - EMA
+dataset:
+  dataset_name: atis
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 128
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      input_dim: "{model.encoder.output_dim}"
+      loss_fn:
+        _model_target_: torch.nn.BCEWithLogitsLoss
+      use_multi: "{base.multi_intent}"
+      multi_threshold: 0.5
+      return_sentence_level: true
+      ignore_index: "{base.ignore_index}"
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      input_dim: "{model.encoder.output_dim}"
+      use_multi: false
+      multi_threshold: 0.5
+      ignore_index: "{base.ignore_index}"
+      return_sentence_level: false

config/reproduction/snips/bi-model.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/bi-model-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+  _tokenizer_name_: word_tokenizer
+  _padding_side_: right
+  _align_mode_: fast
+  add_special_tokens: false
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.BiEncoder
+    intent_encoder:
+      _model_target_: model.encoder.AutoEncoder
+      encoder_name: lstm
+      embedding:
+        embedding_dim: 256
+        dropout_rate: 0.5
+      lstm:
+        dropout_rate: 0.5
+        output_dim: 256
+        layer_num: 2
+        bidirectional: true
+      return_with_input: true
+      return_sentence_level_hidden: false
+    slot_encoder:
+      _model_target_: model.encoder.AutoEncoder
+      encoder_name: lstm
+      embedding:
+        embedding_dim: 256
+        dropout_rate: 0.5
+      lstm:
+        dropout_rate: 0.5
+        output_dim: 256
+        layer_num: 2
+        bidirectional: true
+      return_with_input: true
+      return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.BaseDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.BiModelInteraction
+      output_dim: 256
+      dropout_rate: 0.5
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/snips/dca_net.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/dca-net-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+  _tokenizer_name_: word_tokenizer
+  _padding_side_: right
+  _align_mode_: fast
+  add_special_tokens: false
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: lstm
+    embedding:
+      load_embedding_name: glove.6B.300d.txt
+      embedding_dim: 300
+      dropout_rate: 0.4
+    lstm:
+      dropout_rate: 0.4
+      output_dim: 128
+      layer_num: 2
+      bidirectional: true
+    output_dim: "{model.encoder.lstm.output_dim}"
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.DCANetDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.DCANetInteraction
+      output_dim: "{model.encoder.output_dim}"
+      attention_dropout: 0.4
+      num_attention_heads: 8
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      input_dim: "{model.encoder.output_dim}"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      input_dim: "{model.encoder.output_dim}"
+      ignore_index: -100

config/reproduction/snips/deberta.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+model_manager:
+  load_dir: null
+  save_dir: save/deberta-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+  _tokenizer_name_: microsoft/deberta-v3-base
+  _padding_side_: right
+  add_special_tokens: true
+  max_length: 512
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 2e-5
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: microsoft/deberta-v3-base
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/snips/electra.yaml ADDED Viewed

	@@ -0,0 +1,69 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+model_manager:
+  load_dir: null
+  save_dir: save/electra-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+    _tokenizer_name_: google/electra-small-discriminator
+    _padding_side_: right
+    add_special_tokens: true
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 2e-5
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: google/electra-small-discriminator
+    output_dim: 256
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/snips/joint-bert.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 128
+model_manager:
+  load_dir: null
+  save_dir: save/joint-bert-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+metric:
+  - intent_acc
+  - slot_f1
+  - EMA
+tokenizer:
+    _tokenizer_name_: bert-base-uncased
+    _padding_side_: right
+    _align_mode_: general
+    add_special_tokens: true
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 4e-6
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: bert-base-uncased
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/snips/roberta.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 32
+model_manager:
+  load_dir: null
+  save_dir: save/roberta-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+    _tokenizer_name_: roberta-base
+    _padding_side_: right
+    add_special_tokens: true
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.AdamW
+  _model_partial_: true
+  lr: 2e-5
+  weight_decay: 1e-8
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.open_slu_model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: roberta-base
+    output_dim: 768
+    return_with_input: true
+    return_sentence_level_hidden: true
+  decoder:
+    _model_target_: model.decoder.base_decoder.BaseDecoder
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/snips/slot-gated.yaml ADDED Viewed

	@@ -0,0 +1,87 @@

+device: "NVIDIA GeForce RTX 2080 Ti"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/slot-gated-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  ignore_index: -100
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: lstm
+    embedding:
+      embedding_dim: 256
+      dropout_rate: 0.4
+    lstm:
+      dropout_rate: 0.5
+      output_dim: 256
+      layer_num: 2
+      bidirectional: true
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.BaseDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.SlotGatedInteraction
+      remove_slot_attn: false
+      output_dim: 256
+      dropout_rate: 0.4
+    intent_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "intent"
+      ignore_index: -100
+    slot_classifier:
+      _model_target_: model.decoder.classifier.LinearClassifier
+      mode: "slot"
+      ignore_index: -100

config/reproduction/snips/stack-propagation.yaml ADDED Viewed

	@@ -0,0 +1,105 @@

+device: "Tesla V100-SXM2-16GB"
+base:
+  name: "OpenSLUv1"
+  train: true
+  test: true
+  device: cuda
+  seed: 42
+  epoch_num: 300
+  batch_size: 16
+model_manager:
+  load_dir: null
+  save_dir: save/stack-propagation-snips
+evaluator:
+  best_key: EMA
+  eval_by_epoch: true
+  # eval_step: 1800
+  metric:
+    - intent_acc
+    - slot_f1
+    - EMA
+accelerator:
+  use_accelerator: false
+dataset:
+  dataset_name: snips
+tokenizer:
+    _tokenizer_name_: word_tokenizer
+    _padding_side_: right
+    _align_mode_: fast
+    add_special_tokens: false
+    max_length: 512
+optimizer:
+  _model_target_: torch.optim.Adam
+  _model_partial_: true
+  lr: 0.001
+  weight_decay: 1e-6
+scheduler:
+  _model_target_: transformers.get_scheduler
+  _model_partial_: true
+  name : "linear"
+  num_warmup_steps: 0
+model:
+  _model_target_: model.OpenSLUModel
+  encoder:
+    _model_target_: model.encoder.AutoEncoder
+    encoder_name: self-attention-lstm
+    embedding:
+      embedding_dim: 256
+      dropout_rate: 0.4
+    lstm:
+      layer_num: 1
+      bidirectional: true
+      output_dim: 256
+      dropout_rate: 0.4
+    attention:
+      hidden_dim: 1024
+      output_dim: 128
+      dropout_rate: 0.4
+    return_with_input: true
+    return_sentence_level_hidden: false
+  decoder:
+    _model_target_: model.decoder.StackPropagationDecoder
+    interaction:
+      _model_target_: model.decoder.interaction.StackInteraction
+      differentiable: false
+    intent_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: 64
+      embedding_dim: 8
+      ignore_index: -100
+      dropout_rate: 0.4
+      mode: "token-level-intent"
+      use_multi: false
+      return_sentence_level: true
+    slot_classifier:
+      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
+      layer_num: 1
+      bidirectional: false
+      force_ratio: 0.9
+      hidden_dim: 64
+      embedding_dim: 32
+      ignore_index: -100
+      dropout_rate: 0.4
+      mode: "slot"
+      use_multi: false
+      return_sentence_level: false

config/visual.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+host: 127.0.0.1
+port: 7861
+is_push_to_public: true
+output_path: save/stack/outputs.jsonl
+page-size: 2

model/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from model.open_slu_model import OpenSLUModel
2	+
3	+ __all__ = ["OpenSLUModel"]

model/decoder/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from model.decoder.agif_decoder import AGIFDecoder
+from model.decoder.base_decoder import StackPropagationDecoder, BaseDecoder, DCANetDecoder
+from model.decoder.gl_gin_decoder import GLGINDecoder
+__all__ = ["StackPropagationDecoder", "BaseDecoder", "DCANetDecoder", "AGIFDecoder", "GLGINDecoder"]