Rams901 LightChen2333 commited on
Commit
da332f1
0 Parent(s):

Duplicate from LightChen2333/OpenSLU

Browse files

Co-authored-by: Qiguang Chen <LightChen2333@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +34 -0
  2. .gitignore +136 -0
  3. README.md +14 -0
  4. __init__.py +1 -0
  5. accelerate/config-old.yaml +16 -0
  6. accelerate/config.yaml +22 -0
  7. app.py +63 -0
  8. common/__init__.py +1 -0
  9. common/config.py +192 -0
  10. common/global_pool.py +26 -0
  11. common/loader.py +332 -0
  12. common/logger.py +237 -0
  13. common/metric.py +346 -0
  14. common/model_manager.py +419 -0
  15. common/saver.py +80 -0
  16. common/tokenizer.py +323 -0
  17. common/utils.py +499 -0
  18. config/README.md +348 -0
  19. config/app.yaml +6 -0
  20. config/decoder/interaction/stack-propagation.yaml +1 -0
  21. config/examples/README.md +38 -0
  22. config/examples/from_pretrained.yaml +53 -0
  23. config/examples/from_pretrained_multi.yaml +55 -0
  24. config/examples/normal.yaml +70 -0
  25. config/examples/reload_to_train.yaml +71 -0
  26. config/reproduction/atis/bi-model.yaml +106 -0
  27. config/reproduction/atis/dca-net.yaml +88 -0
  28. config/reproduction/atis/deberta.yaml +67 -0
  29. config/reproduction/atis/electra.yaml +67 -0
  30. config/reproduction/atis/joint-bert.yaml +70 -0
  31. config/reproduction/atis/roberta.yaml +70 -0
  32. config/reproduction/atis/slot-gated.yaml +87 -0
  33. config/reproduction/atis/stack-propagation.yaml +109 -0
  34. config/reproduction/mix-atis/agif.yaml +133 -0
  35. config/reproduction/mix-atis/gl-gin.yaml +128 -0
  36. config/reproduction/mix-atis/vanilla.yaml +95 -0
  37. config/reproduction/mix-snips/agif.yaml +131 -0
  38. config/reproduction/mix-snips/gl-gin.yaml +131 -0
  39. config/reproduction/mix-snips/vanilla.yaml +95 -0
  40. config/reproduction/snips/bi-model.yaml +104 -0
  41. config/reproduction/snips/dca_net.yaml +88 -0
  42. config/reproduction/snips/deberta.yaml +70 -0
  43. config/reproduction/snips/electra.yaml +69 -0
  44. config/reproduction/snips/joint-bert.yaml +75 -0
  45. config/reproduction/snips/roberta.yaml +70 -0
  46. config/reproduction/snips/slot-gated.yaml +87 -0
  47. config/reproduction/snips/stack-propagation.yaml +105 -0
  48. config/visual.yaml +6 -0
  49. model/__init__.py +3 -0
  50. model/decoder/__init__.py +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ .idea/
6
+ wandb/*
7
+ save/*
8
+ !save/.gitkeep
9
+ logs/*
10
+ !logs/.gitkeep
11
+ test
12
+ # C extensions
13
+ *.so
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ pip-wheel-metadata/
30
+ share/python-wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ MANIFEST
35
+
36
+ # PyInstaller
37
+ # Usually these files are written by a python script from a template
38
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
39
+ *.manifest
40
+ *.spec
41
+
42
+ # Installer logs
43
+ pip-log.txt
44
+ pip-delete-this-directory.txt
45
+
46
+ # Unit test / coverage reports
47
+ htmlcov/
48
+ .tox/
49
+ .nox/
50
+ .coverage
51
+ .coverage.*
52
+ .cache
53
+ nosetests.xml
54
+ coverage.xml
55
+ *.cover
56
+ *.py,cover
57
+ .hypothesis/
58
+ .pytest_cache/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101
+ __pypackages__/
102
+
103
+ # Celery stuff
104
+ celerybeat-schedule
105
+ celerybeat.pid
106
+
107
+ # SageMath parsed files
108
+ *.sage.py
109
+
110
+ # Environments
111
+ .env
112
+ .venv
113
+ env/
114
+ venv/
115
+ ENV/
116
+ env.bak/
117
+ venv.bak/
118
+
119
+ # Spyder project settings
120
+ .spyderproject
121
+ .spyproject
122
+
123
+ # Rope project settings
124
+ .ropeproject
125
+
126
+ # mkdocs documentation
127
+ /site
128
+
129
+ # mypy
130
+ .mypy_cache/
131
+ .dmypy.json
132
+ dmypy.json
133
+
134
+ # Pyre type checker
135
+ .pyre/
136
+ .vscode/
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ title: OpenSLU
4
+ sdk: gradio
5
+ sdk_version: 3.18.0
6
+ app_file: app.py
7
+ emoji: 🚀
8
+ colorFrom: blue
9
+ colorTo: purple
10
+ pinned: false
11
+ tags:
12
+ - making-demos
13
+ duplicated_from: LightChen2333/OpenSLU
14
+ ---
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
accelerate/config-old.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ deepspeed_config: {}
3
+ distributed_type: MULTI_GPU
4
+ downcast_bf16: 'no'
5
+ fsdp_config: {}
6
+ gpu_ids: all
7
+ machine_rank: 0
8
+ main_process_ip: null
9
+ main_process_port: 9001
10
+ main_training_function: main
11
+ mixed_precision: 'no'
12
+ num_machines: 0
13
+ num_processes: 2
14
+ rdzv_backend: static
15
+ same_network: true
16
+ use_cpu: false
accelerate/config.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ command_file: null
2
+ commands: null
3
+ compute_environment: LOCAL_MACHINE
4
+ deepspeed_config: {}
5
+ distributed_type: 'NO'
6
+ downcast_bf16: 'no'
7
+ dynamo_backend: 'NO'
8
+ fsdp_config: {}
9
+ gpu_ids: all
10
+ machine_rank: 0
11
+ main_process_ip: null
12
+ main_process_port: null
13
+ main_training_function: main
14
+ megatron_lm_config: {}
15
+ mixed_precision: 'no'
16
+ num_machines: 1
17
+ num_processes: 2
18
+ rdzv_backend: static
19
+ same_network: true
20
+ tpu_name: null
21
+ tpu_zone: null
22
+ use_cpu: false
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ LastEditors: Qiguang Chen
4
+ Date: 2023-02-07 15:42:32
5
+ LastEditTime: 2023-02-19 21:04:03
6
+ Description:
7
+
8
+ '''
9
+ import argparse
10
+ import gradio as gr
11
+
12
+ from common.config import Config
13
+ from common.model_manager import ModelManager
14
+ from common.utils import str2bool
15
+
16
+
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument('--config_path', '-cp', type=str, default="config/examples/from_pretrained.yaml")
19
+ parser.add_argument('--push_to_public', '-p', type=str2bool, nargs='?',
20
+ const=True, default=False,
21
+ help="Push to public network.")
22
+ args = parser.parse_args()
23
+ config = Config.load_from_yaml(args.config_path)
24
+ config.base["train"] = False
25
+ config.base["test"] = False
26
+
27
+ model_manager = ModelManager(config)
28
+ model_manager.init_model()
29
+
30
+
31
+ def text_analysis(text):
32
+ print(text)
33
+ data = model_manager.predict(text)
34
+ html = """<link href="https://cdn.staticfile.org/twitter-bootstrap/5.1.1/css/bootstrap.min.css" rel="stylesheet">
35
+ <script src="https://cdn.staticfile.org/twitter-bootstrap/5.1.1/js/bootstrap.bundle.min.js"></script>"""
36
+ html += """<div style="background: white; padding: 16px;"><b>Intent:</b>"""
37
+
38
+ for intent in data["intent"]:
39
+ html += """<button type="button" class="btn btn-white">
40
+ <span class="badge text-dark btn-light">""" + intent + """</span> </button>"""
41
+ html += """<br /> <b>Slot:</b>"""
42
+ for t, slot in zip(data["text"], data["slot"]):
43
+ html += """<button type="button" class="btn btn-white">"""+t+"""<span class="badge text-dark" style="background-color: rgb(255, 255, 255);
44
+ color: rgb(62 62 62);
45
+ box-shadow: 2px 2px 7px 1px rgba(210, 210, 210, 0.42);">"""+slot+\
46
+ """</span>
47
+ </button>"""
48
+ html+="</div>"
49
+ return html
50
+
51
+
52
+ demo = gr.Interface(
53
+ text_analysis,
54
+ gr.Textbox(placeholder="Enter sentence here..."),
55
+ ["html"],
56
+ examples=[
57
+ ["i would like to find a flight from charlotte to las vegas that makes a stop in st louis"],
58
+ ],
59
+ )
60
+ if args.push_to_public:
61
+ demo.launch(share=True)
62
+ else:
63
+ demo.launch()
common/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
common/config.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ Date: 2023-01-11 10:39:26
4
+ LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-15 17:58:53
6
+ Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
7
+
8
+ '''
9
+ import re
10
+
11
+ from ruamel import yaml
12
+ import datetime
13
+
14
+ class Config(dict):
15
+ def __init__(self, *args, **kwargs):
16
+ """ init with dict as args
17
+ """
18
+ dict.__init__(self, *args, **kwargs)
19
+ self.__dict__ = self
20
+ self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
21
+ if not self.model.get("_from_pretrained_"):
22
+ self.__autowired()
23
+
24
+ @staticmethod
25
+ def load_from_yaml(file_path:str)->"Config":
26
+ """load config files with path
27
+
28
+ Args:
29
+ file_path (str): yaml configuration file path.
30
+
31
+ Returns:
32
+ Config: config object.
33
+ """
34
+ with open(file_path) as stream:
35
+ try:
36
+ return Config(yaml.safe_load(stream))
37
+ except yaml.YAMLError as exc:
38
+ print(exc)
39
+
40
+ @staticmethod
41
+ def load_from_args(args)->"Config":
42
+ """ load args to replace item value in config files assigned with '--config_path' or '--model'
43
+
44
+ Args:
45
+ args (Any): args with command line.
46
+
47
+ Returns:
48
+ Config: _description_
49
+ """
50
+ if args.model is not None and args.dataset is not None:
51
+ args.config_path = f"config/reproduction/{args.dataset}/{args.model}.yaml"
52
+ config = Config.load_from_yaml(args.config_path)
53
+ if args.dataset is not None:
54
+ config.__update_dataset(args.dataset)
55
+ if args.device is not None:
56
+ config["base"]["device"] = args.device
57
+ if args.learning_rate is not None:
58
+ config["optimizer"]["lr"] = args.learning_rate
59
+ if args.epoch_num is not None:
60
+ config["base"]["epoch_num"] = args.epoch_num
61
+ return config
62
+
63
+ def autoload_template(self):
64
+ """ search '{*}' template to excute as python code, support replace variable as any configure item
65
+ """
66
+ self.__autoload_template(self.__dict__)
67
+
68
+ def __get_autoload_value(self, matched):
69
+ keys = matched.group()[1:-1].split(".")
70
+ temp = self.__dict__
71
+ for k in keys:
72
+ temp = temp[k]
73
+ return str(temp)
74
+
75
+ def __autoload_template(self, config:dict):
76
+ for k in config:
77
+ if isinstance(config, dict):
78
+ sub_config = config[k]
79
+ elif isinstance(config, list):
80
+ sub_config = k
81
+ else:
82
+ continue
83
+ if isinstance(sub_config, dict) or isinstance(sub_config, list):
84
+ self.__autoload_template(sub_config)
85
+ if isinstance(sub_config, str) and "{" in sub_config and "}" in sub_config:
86
+ res = re.sub(r'{.*?}', self.__get_autoload_value, config[k])
87
+ res_dict= {"res": None}
88
+ exec("res=" + res, res_dict)
89
+ config[k] = res_dict["res"]
90
+
91
+ def __update_dataset(self, dataset_name):
92
+ if dataset_name is not None and isinstance(dataset_name, str):
93
+ self.__dict__["dataset"]["dataset_name"] = dataset_name
94
+
95
+ def get_model_config(self):
96
+ return self.__dict__["model"]
97
+
98
+ def __autowired(self):
99
+ # Set encoder
100
+ encoder_config = self.__dict__["model"]["encoder"]
101
+ encoder_type = encoder_config["_model_target_"].split(".")[-1]
102
+
103
+ def get_output_dim(encoder_config):
104
+ encoder_type = encoder_config["_model_target_"].split(".")[-1]
105
+ if (encoder_type == "AutoEncoder" and encoder_config["encoder_name"] in ["lstm", "self-attention-lstm",
106
+ "bi-encoder"]) or encoder_type == "NoPretrainedEncoder":
107
+ output_dim = 0
108
+ if encoder_config.get("lstm"):
109
+ output_dim += encoder_config["lstm"]["output_dim"]
110
+ if encoder_config.get("attention"):
111
+ output_dim += encoder_config["attention"]["output_dim"]
112
+ return output_dim
113
+ else:
114
+ return encoder_config["output_dim"]
115
+
116
+ if encoder_type == "BiEncoder":
117
+ output_dim = get_output_dim(encoder_config["intent_encoder"]) + \
118
+ get_output_dim(encoder_config["slot_encoder"])
119
+ else:
120
+ output_dim = get_output_dim(encoder_config)
121
+ self.__dict__["model"]["encoder"]["output_dim"] = output_dim
122
+
123
+ # Set interaction
124
+ if "interaction" in self.__dict__["model"]["decoder"] and self.__dict__["model"]["decoder"]["interaction"].get(
125
+ "input_dim") is None:
126
+ self.__dict__["model"]["decoder"]["interaction"]["input_dim"] = output_dim
127
+ interaction_type = self.__dict__["model"]["decoder"]["interaction"]["_model_target_"].split(".")[-1]
128
+ if not ((encoder_type == "AutoEncoder" and encoder_config[
129
+ "encoder_name"] == "self-attention-lstm") or encoder_type == "SelfAttentionLSTMEncoder") and interaction_type != "BiModelWithoutDecoderInteraction":
130
+ output_dim = self.__dict__["model"]["decoder"]["interaction"]["output_dim"]
131
+
132
+ # Set classifier
133
+ if "slot_classifier" in self.__dict__["model"]["decoder"]:
134
+ if self.__dict__["model"]["decoder"]["slot_classifier"].get("input_dim") is None:
135
+ self.__dict__["model"]["decoder"]["slot_classifier"]["input_dim"] = output_dim
136
+ self.__dict__["model"]["decoder"]["slot_classifier"]["use_slot"] = True
137
+ if "intent_classifier" in self.__dict__["model"]["decoder"]:
138
+ if self.__dict__["model"]["decoder"]["intent_classifier"].get("input_dim") is None:
139
+ self.__dict__["model"]["decoder"]["intent_classifier"]["input_dim"] = output_dim
140
+ self.__dict__["model"]["decoder"]["intent_classifier"]["use_intent"] = True
141
+
142
+ def get_intent_label_num(self):
143
+ """ get the number of intent labels.
144
+ """
145
+ classifier_conf = self.__dict__["model"]["decoder"]["intent_classifier"]
146
+ return classifier_conf["intent_label_num"] if "intent_label_num" in classifier_conf else 0
147
+
148
+ def get_slot_label_num(self):
149
+ """ get the number of slot labels.
150
+ """
151
+ classifier_conf = self.__dict__["model"]["decoder"]["slot_classifier"]
152
+ return classifier_conf["slot_label_num"] if "slot_label_num" in classifier_conf else 0
153
+
154
+ def set_intent_label_num(self, intent_label_num):
155
+ """ set the number of intent labels.
156
+
157
+ Args:
158
+ slot_label_num (int): the number of intent label
159
+ """
160
+ self.__dict__["base"]["intent_label_num"] = intent_label_num
161
+ self.__dict__["model"]["decoder"]["intent_classifier"]["intent_label_num"] = intent_label_num
162
+ if "interaction" in self.__dict__["model"]["decoder"]:
163
+
164
+ self.__dict__["model"]["decoder"]["interaction"]["intent_label_num"] = intent_label_num
165
+ if self.__dict__["model"]["decoder"]["interaction"]["_model_target_"].split(".")[
166
+ -1] == "StackInteraction":
167
+ self.__dict__["model"]["decoder"]["slot_classifier"]["input_dim"] += intent_label_num
168
+
169
+
170
+ def set_slot_label_num(self, slot_label_num:int)->None:
171
+ """set the number of slot label
172
+
173
+ Args:
174
+ slot_label_num (int): the number of slot label
175
+ """
176
+ self.__dict__["base"]["slot_label_num"] = slot_label_num
177
+ self.__dict__["model"]["decoder"]["slot_classifier"]["slot_label_num"] = slot_label_num
178
+ if "interaction" in self.__dict__["model"]["decoder"]:
179
+ self.__dict__["model"]["decoder"]["interaction"]["slot_label_num"] = slot_label_num
180
+
181
+ def set_vocab_size(self, vocab_size):
182
+ """set the size of vocabulary in non-pretrained tokenizer
183
+ Args:
184
+ slot_label_num (int): the number of slot label
185
+ """
186
+ encoder_type = self.__dict__["model"]["encoder"]["_model_target_"].split(".")[-1]
187
+ encoder_name = self.__dict__["model"]["encoder"].get("encoder_name")
188
+ if encoder_type == "BiEncoder" or (encoder_type == "AutoEncoder" and encoder_name == "bi-encoder"):
189
+ self.__dict__["model"]["encoder"]["intent_encoder"]["embedding"]["vocab_size"] = vocab_size
190
+ self.__dict__["model"]["encoder"]["slot_encoder"]["embedding"]["vocab_size"] = vocab_size
191
+ elif self.__dict__["model"]["encoder"].get("embedding"):
192
+ self.__dict__["model"]["encoder"]["embedding"]["vocab_size"] = vocab_size
common/global_pool.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ LastEditors: Qiguang Chen
4
+ Date: 2023-02-12 14:35:37
5
+ LastEditTime: 2023-02-12 14:37:40
6
+ Description:
7
+
8
+ '''
9
+ def _init():
10
+ global _global_dict
11
+ _global_dict = {}
12
+
13
+
14
+ def set_value(key, value):
15
+ # set gobal value to object pool
16
+ _global_dict[key] = value
17
+
18
+
19
+ def get_value(key):
20
+ # get gobal value from object pool
21
+ try:
22
+ return _global_dict[key]
23
+ except:
24
+ print('读取' + key + '失败\r\n')
25
+
26
+
common/loader.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ Date: 2023-01-11 10:39:26
4
+ LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-19 15:39:48
6
+ Description: all class for load data.
7
+
8
+ '''
9
+ import os
10
+ import torch
11
+ import json
12
+ from datasets import load_dataset, Dataset
13
+ from torch.utils.data import DataLoader
14
+
15
+ from common.utils import InputData
16
+
17
+ ABS_PATH=os.path.join(os.path.abspath(os.path.dirname(__file__)), "../")
18
+
19
+ class DataFactory(object):
20
+ def __init__(self, tokenizer,use_multi_intent=False, to_lower_case=True):
21
+ """_summary_
22
+
23
+ Args:
24
+ tokenizer (Tokenizer): _description_
25
+ use_multi_intent (bool, optional): _description_. Defaults to False.
26
+ """
27
+ self.tokenizer = tokenizer
28
+ self.slot_label_list = []
29
+ self.intent_label_list = []
30
+ self.use_multi = use_multi_intent
31
+ self.to_lower_case = to_lower_case
32
+ self.slot_label_dict = None
33
+ self.intent_label_dict = None
34
+
35
+ def __is_supported_datasets(self, dataset_name:str)->bool:
36
+ return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
37
+
38
+ def load_dataset(self, dataset_config, split="train"):
39
+ dataset_name = None
40
+ if split not in dataset_config:
41
+ dataset_name = dataset_config.get("dataset_name")
42
+ elif self.__is_supported_datasets(dataset_config[split]):
43
+ dataset_name = dataset_config[split].lower()
44
+ if dataset_name is not None:
45
+ return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split)
46
+ else:
47
+ data_file = dataset_config[split]
48
+ data_dict = {"text": [], "slot": [], "intent":[]}
49
+ with open(data_file, encoding="utf-8") as f:
50
+ for line in f:
51
+ row = json.loads(line)
52
+ data_dict["text"].append(row["text"])
53
+ data_dict["slot"].append(row["slot"])
54
+ data_dict["intent"].append(row["intent"])
55
+ return Dataset.from_dict(data_dict)
56
+
57
+ def update_label_names(self, dataset):
58
+ for intent_labels in dataset["intent"]:
59
+ if self.use_multi:
60
+ intent_label = intent_labels.split("#")
61
+ else:
62
+ intent_label = [intent_labels]
63
+ for x in intent_label:
64
+ if x not in self.intent_label_list:
65
+ self.intent_label_list.append(x)
66
+ for slot_label in dataset["slot"]:
67
+ for x in slot_label:
68
+ if x not in self.slot_label_list:
69
+ self.slot_label_list.append(x)
70
+ self.intent_label_dict = {key: index for index,
71
+ key in enumerate(self.intent_label_list)}
72
+ self.slot_label_dict = {key: index for index,
73
+ key in enumerate(self.slot_label_list)}
74
+
75
+ def update_vocabulary(self, dataset):
76
+ if self.tokenizer.name_or_path in ["word_tokenizer"]:
77
+ for data in dataset:
78
+ self.tokenizer.add_instance(data["text"])
79
+
80
+ @staticmethod
81
+ def fast_align_data(text, padding_side="right"):
82
+ for i in range(len(text.input_ids)):
83
+ desired_output = []
84
+ for word_id in text.word_ids(i):
85
+ if word_id is not None:
86
+ start, end = text.word_to_tokens(
87
+ i, word_id, sequence_index=0 if padding_side == "right" else 1)
88
+ if start == end - 1:
89
+ tokens = [start]
90
+ else:
91
+ tokens = [start, end - 1]
92
+ if len(desired_output) == 0 or desired_output[-1] != tokens:
93
+ desired_output.append(tokens)
94
+ yield desired_output
95
+
96
+ def fast_align(self,
97
+ batch,
98
+ ignore_index=-100,
99
+ device="cuda",
100
+ config=None,
101
+ enable_label=True,
102
+ label2tensor=True):
103
+ if self.to_lower_case:
104
+ input_list = [[t.lower() for t in x["text"]] for x in batch]
105
+ else:
106
+ input_list = [x["text"] for x in batch]
107
+ text = self.tokenizer(input_list,
108
+ return_tensors="pt",
109
+ padding=True,
110
+ is_split_into_words=True,
111
+ truncation=True,
112
+ **config).to(device)
113
+ if enable_label:
114
+ if label2tensor:
115
+
116
+ slot_mask = torch.ones_like(text.input_ids) * ignore_index
117
+ for i, offsets in enumerate(
118
+ DataFactory.fast_align_data(text, padding_side=self.tokenizer.padding_side)):
119
+ num = 0
120
+ assert len(offsets) == len(batch[i]["text"])
121
+ assert len(offsets) == len(batch[i]["slot"])
122
+ for off in offsets:
123
+ slot_mask[i][off[0]
124
+ ] = self.slot_label_dict[batch[i]["slot"][num]]
125
+ num += 1
126
+ slot = slot_mask.clone()
127
+ attentin_id = 0 if self.tokenizer.padding_side == "right" else 1
128
+ for i, slot_batch in enumerate(slot):
129
+ for j, x in enumerate(slot_batch):
130
+ if x == ignore_index and text.attention_mask[i][j] == attentin_id and (text.input_ids[i][
131
+ j] not in self.tokenizer.all_special_ids or text.input_ids[i][j] == self.tokenizer.unk_token_id):
132
+ slot[i][j] = slot[i][j - 1]
133
+ slot = slot.to(device)
134
+ if not self.use_multi:
135
+ intent = torch.tensor(
136
+ [self.intent_label_dict[x["intent"]] for x in batch]).to(device)
137
+ else:
138
+ one_hot = torch.zeros(
139
+ (len(batch), len(self.intent_label_list)), dtype=torch.float)
140
+ for index, b in enumerate(batch):
141
+ for x in b["intent"].split("#"):
142
+ one_hot[index][self.intent_label_dict[x]] = 1.
143
+ intent = one_hot.to(device)
144
+ else:
145
+ slot_mask = None
146
+ slot = [['#' for _ in range(text.input_ids.shape[1])]
147
+ for _ in range(text.input_ids.shape[0])]
148
+ for i, offsets in enumerate(DataFactory.fast_align_data(text)):
149
+ num = 0
150
+ for off in offsets:
151
+ slot[i][off[0]] = batch[i]["slot"][num]
152
+ num += 1
153
+ if not self.use_multi:
154
+ intent = [x["intent"] for x in batch]
155
+ else:
156
+ intent = [
157
+ [x for x in b["intent"].split("#")] for b in batch]
158
+ return InputData((text, slot, intent))
159
+ else:
160
+ return InputData((text, None, None))
161
+
162
+ def general_align_data(self, split_text_list, raw_text_list, encoded_text):
163
+ for i in range(len(split_text_list)):
164
+ desired_output = []
165
+ jdx = 0
166
+ offset = encoded_text.offset_mapping[i].tolist()
167
+ split_texts = split_text_list[i]
168
+ raw_text = raw_text_list[i]
169
+ last = 0
170
+ temp_offset = []
171
+ for off in offset:
172
+ s, e = off
173
+ if len(temp_offset) > 0 and (e != 0 and last == s):
174
+ len_1 = off[1] - off[0]
175
+ len_2 = temp_offset[-1][1] - temp_offset[-1][0]
176
+ if len_1 > len_2:
177
+ temp_offset.pop(-1)
178
+ temp_offset.append([0, 0])
179
+ temp_offset.append(off)
180
+ continue
181
+ temp_offset.append(off)
182
+ last = s
183
+ offset = temp_offset
184
+ for split_text in split_texts:
185
+ while jdx < len(offset) and offset[jdx][0] == 0 and offset[jdx][1] == 0:
186
+ jdx += 1
187
+ if jdx == len(offset):
188
+ continue
189
+ start_, end_ = offset[jdx]
190
+ tokens = None
191
+ if split_text == raw_text[start_:end_].strip():
192
+ tokens = [jdx]
193
+ else:
194
+ # Compute "xxx" -> "xx" "#x"
195
+ temp_jdx = jdx
196
+ last_str = raw_text[start_:end_].strip()
197
+ while last_str != split_text and temp_jdx < len(offset) - 1:
198
+ temp_jdx += 1
199
+ last_str += raw_text[offset[temp_jdx]
200
+ [0]:offset[temp_jdx][1]].strip()
201
+
202
+ if temp_jdx == jdx:
203
+ raise ValueError("Illegal Input data")
204
+ elif last_str == split_text:
205
+ tokens = [jdx, temp_jdx]
206
+ jdx = temp_jdx
207
+ else:
208
+ jdx -= 1
209
+ jdx += 1
210
+ if tokens is not None:
211
+ desired_output.append(tokens)
212
+ yield desired_output
213
+
214
+ def general_align(self,
215
+ batch,
216
+ ignore_index=-100,
217
+ device="cuda",
218
+ config=None,
219
+ enable_label=True,
220
+ label2tensor=True,
221
+ locale="en-US"):
222
+ if self.to_lower_case:
223
+ raw_data = [" ".join(x["text"]).lower() if locale not in ['ja-JP', 'zh-CN', 'zh-TW'] else "".join(x["text"]) for x in
224
+ batch]
225
+ input_list = [[t.lower() for t in x["text"]] for x in batch]
226
+ else:
227
+ input_list = [x["text"] for x in batch]
228
+ raw_data = [" ".join(x["text"]) if locale not in ['ja-JP', 'zh-CN', 'zh-TW'] else "".join(x["text"]) for x in
229
+ batch]
230
+ text = self.tokenizer(raw_data,
231
+ return_tensors="pt",
232
+ padding=True,
233
+ truncation=True,
234
+ return_offsets_mapping=True,
235
+ **config).to(device)
236
+ if enable_label:
237
+ if label2tensor:
238
+ slot_mask = torch.ones_like(text.input_ids) * ignore_index
239
+ for i, offsets in enumerate(
240
+ self.general_align_data(input_list, raw_data, encoded_text=text)):
241
+ num = 0
242
+ # if len(offsets) != len(batch[i]["text"]) or len(offsets) != len(batch[i]["slot"]):
243
+ # if
244
+ for off in offsets:
245
+ slot_mask[i][off[0]
246
+ ] = self.slot_label_dict[batch[i]["slot"][num]]
247
+ num += 1
248
+ # slot = slot_mask.clone()
249
+ # attentin_id = 0 if self.tokenizer.padding_side == "right" else 1
250
+ # for i, slot_batch in enumerate(slot):
251
+ # for j, x in enumerate(slot_batch):
252
+ # if x == ignore_index and text.attention_mask[i][j] == attentin_id and text.input_ids[i][
253
+ # j] not in self.tokenizer.all_special_ids:
254
+ # slot[i][j] = slot[i][j - 1]
255
+ slot = slot_mask.to(device)
256
+ if not self.use_multi:
257
+ intent = torch.tensor(
258
+ [self.intent_label_dict[x["intent"]] for x in batch]).to(device)
259
+ else:
260
+ one_hot = torch.zeros(
261
+ (len(batch), len(self.intent_label_list)), dtype=torch.float)
262
+ for index, b in enumerate(batch):
263
+ for x in b["intent"].split("#"):
264
+ one_hot[index][self.intent_label_dict[x]] = 1.
265
+ intent = one_hot.to(device)
266
+ else:
267
+ slot_mask = None
268
+ slot = [['#' for _ in range(text.input_ids.shape[1])]
269
+ for _ in range(text.input_ids.shape[0])]
270
+ for i, offsets in enumerate(self.general_align_data(input_list, raw_data, encoded_text=text)):
271
+ num = 0
272
+ for off in offsets:
273
+ slot[i][off[0]] = batch[i]["slot"][num]
274
+ num += 1
275
+ if not self.use_multi:
276
+ intent = [x["intent"] for x in batch]
277
+ else:
278
+ intent = [
279
+ [x for x in b["intent"].split("#")] for b in batch]
280
+ return InputData((text, slot, intent))
281
+ else:
282
+ return InputData((text, None, None))
283
+
284
+ def batch_fn(self,
285
+ batch,
286
+ ignore_index=-100,
287
+ device="cuda",
288
+ config=None,
289
+ align_mode="fast",
290
+ enable_label=True,
291
+ label2tensor=True):
292
+ if align_mode == "fast":
293
+ # try:
294
+ return self.fast_align(batch,
295
+ ignore_index=ignore_index,
296
+ device=device,
297
+ config=config,
298
+ enable_label=enable_label,
299
+ label2tensor=label2tensor)
300
+ # except:
301
+ # return self.general_align(batch,
302
+ # ignore_index=ignore_index,
303
+ # device=device,
304
+ # config=config,
305
+ # enable_label=enable_label,
306
+ # label2tensor=label2tensor)
307
+ else:
308
+ return self.general_align(batch,
309
+ ignore_index=ignore_index,
310
+ device=device,
311
+ config=config,
312
+ enable_label=enable_label,
313
+ label2tensor=label2tensor)
314
+
315
+ def get_data_loader(self,
316
+ dataset,
317
+ batch_size,
318
+ shuffle=False,
319
+ device="cuda",
320
+ enable_label=True,
321
+ align_mode="fast",
322
+ label2tensor=True, **config):
323
+ data_loader = DataLoader(dataset,
324
+ shuffle=shuffle,
325
+ batch_size=batch_size,
326
+ collate_fn=lambda x: self.batch_fn(x,
327
+ device=device,
328
+ config=config,
329
+ enable_label=enable_label,
330
+ align_mode=align_mode,
331
+ label2tensor=label2tensor))
332
+ return data_loader
common/logger.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ Date: 2023-01-11 10:39:26
4
+ LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-19 22:05:49
6
+ Description: log manager
7
+
8
+ '''
9
+ import datetime
10
+ import json
11
+ import os
12
+ import time
13
+ from common.config import Config
14
+ import logging
15
+ import colorlog
16
+
17
+ def mkdirs(dir_names):
18
+ for dir_name in dir_names:
19
+ if not os.path.exists(dir_name):
20
+ os.mkdir(dir_name)
21
+
22
+
23
+
24
+ class Logger():
25
+ """ logging infomation by [wandb, fitlog, local file]
26
+ """
27
+ def __init__(self,
28
+ logger_type: str,
29
+ logger_name: str,
30
+ logging_level="INFO",
31
+ start_time='',
32
+ accelerator=None):
33
+ """ create logger
34
+
35
+ Args:
36
+ logger_type (str): support type = ["wandb", "fitlog", "local"]
37
+ logger_name (str): logger name, means project name in wandb, and logging file name
38
+ logging_level (str, optional): logging level. Defaults to "INFO".
39
+ start_time (str, optional): start time string. Defaults to ''.
40
+ """
41
+ self.logger_type = logger_type
42
+ times = time.localtime()
43
+ self.output_dir = "logs/" + logger_name + "/" + start_time
44
+ self.accelerator = accelerator
45
+ self.logger_name = logger_name
46
+ if accelerator is not None:
47
+ from accelerate.logging import get_logger
48
+ self.logging = get_logger(logger_name)
49
+ else:
50
+ if self.logger_type == "wandb":
51
+ import wandb
52
+ self.logger = wandb
53
+ mkdirs(["logs", "logs/" + logger_name, self.output_dir])
54
+ self.logger.init(project=logger_name)
55
+ elif self.logger_type == "fitlog":
56
+ import fitlog
57
+ self.logger = fitlog
58
+ mkdirs(["logs", "logs/" + logger_name, self.output_dir])
59
+ self.logger.set_log_dir("logs/" + logger_name)
60
+ else:
61
+ mkdirs(["logs", "logs/" + logger_name, self.output_dir])
62
+ self.config_file = os.path.join(self.output_dir, "config.jsonl")
63
+ with open(self.config_file, "w", encoding="utf8") as f:
64
+ print(f"Config will be written to {self.config_file}")
65
+
66
+ self.loss_file = os.path.join(self.output_dir, "loss.jsonl")
67
+ with open(self.loss_file, "w", encoding="utf8") as f:
68
+ print(f"Loss Result will be written to {self.loss_file}")
69
+
70
+ self.metric_file = os.path.join(self.output_dir, "metric.jsonl")
71
+ with open(self.metric_file, "w", encoding="utf8") as f:
72
+ print(f"Metric Result will be written to {self.metric_file}")
73
+
74
+ self.other_log_file = os.path.join(self.output_dir, "other_log.jsonl")
75
+ with open(self.other_log_file, "w", encoding="utf8") as f:
76
+ print(f"Other Log Result will be written to {self.other_log_file}")
77
+
78
+ LOGGING_LEVEL_MAP = {
79
+ "CRITICAL": logging.CRITICAL,
80
+ "FATAL": logging.FATAL,
81
+ "ERROR": logging.ERROR,
82
+ "WARNING": logging.WARNING,
83
+ "WARN": logging.WARN,
84
+ "INFO": logging.INFO,
85
+ "DEBUG": logging.DEBUG,
86
+ "NOTSET": logging.NOTSET,
87
+ }
88
+ # logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
89
+ # filename=os.path.join(self.output_dir, "log.log"), level=LOGGING_LEVEL_MAP[logging_level])
90
+
91
+ # logger = logging.getLogger()
92
+ # KZT = logging.StreamHandler()
93
+ # KZT.setLevel(logging.DEBUG)
94
+ # logger.addHandler(KZT)
95
+
96
+ self.logging = self._get_logging_logger(logging_level)
97
+
98
+ def _get_logging_logger(self, level="INFO"):
99
+ log_colors_config = {
100
+ 'DEBUG': 'cyan',
101
+ 'INFO': 'blue',
102
+ 'WARNING': 'yellow',
103
+ 'ERROR': 'red',
104
+ 'CRITICAL': 'red,bg_white',
105
+ }
106
+
107
+ logger = logging.getLogger()
108
+ logger.setLevel(level)
109
+
110
+ log_path = os.path.join(self.output_dir, "log.log")
111
+
112
+ if not logger.handlers:
113
+ sh = logging.StreamHandler()
114
+ fh = logging.FileHandler(filename=log_path, mode='a', encoding="utf-8")
115
+ fmt = logging.Formatter(
116
+ fmt='[%(levelname)s - %(asctime)s]\t%(message)s',
117
+ datefmt='%m/%d/%Y %I:%M:%S %p')
118
+
119
+ sh_fmt = colorlog.ColoredFormatter(
120
+ fmt='%(log_color)s[%(levelname)s - %(asctime)s]\t%(message)s',
121
+ datefmt='%m/%d/%Y %I:%M:%S %p',
122
+ log_colors=log_colors_config)
123
+ sh.setFormatter(fmt=sh_fmt)
124
+ fh.setFormatter(fmt=fmt)
125
+ logger.addHandler(sh)
126
+ logger.addHandler(fh)
127
+ return logger
128
+
129
+ def set_config(self, config: Config):
130
+ """save config
131
+
132
+ Args:
133
+ config (Config): configuration object to save
134
+ """
135
+ if self.accelerator is not None:
136
+ self.accelerator.init_trackers(self.logger_name, config=config)
137
+ elif self.logger_type == "wandb":
138
+ self.logger.config.update(config)
139
+ elif self.logger_type == "fitlog":
140
+ self.logger.add_hyper(config)
141
+ else:
142
+ with open(self.config_file, "a", encoding="utf8") as f:
143
+ f.write(json.dumps(config) + "\n")
144
+
145
+ def log(self, data, step=0):
146
+ """log data and step
147
+
148
+ Args:
149
+ data (Any): data to log
150
+ step (int, optional): step num. Defaults to 0.
151
+ """
152
+ if self.accelerator is not None:
153
+ self.accelerator.log(data, step=0)
154
+ elif self.logger_type == "wandb":
155
+ self.logger.log(data, step=step)
156
+ elif self.logger_type == "fitlog":
157
+ self.logger.add_other({"data": data, "step": step})
158
+ else:
159
+ with open(self.other_log_file, "a", encoding="utf8") as f:
160
+ f.write(json.dumps({"data": data, "step": step}) + "\n")
161
+
162
+ def log_metric(self, metric, metric_split="dev", step=0):
163
+ """log metric
164
+
165
+ Args:
166
+ metric (Any): metric
167
+ metric_split (str, optional): dataset split. Defaults to 'dev'.
168
+ step (int, optional): step num. Defaults to 0.
169
+ """
170
+ if self.accelerator is not None:
171
+ self.accelerator.log({metric_split: metric}, step=step)
172
+ elif self.logger_type == "wandb":
173
+ self.logger.log({metric_split: metric}, step=step)
174
+ elif self.logger_type == "fitlog":
175
+ self.logger.add_metric({metric_split: metric}, step=step)
176
+ else:
177
+ with open(self.metric_file, "a", encoding="utf8") as f:
178
+ f.write(json.dumps({metric_split: metric, "step": step}) + "\n")
179
+
180
+ def log_loss(self, loss, loss_name="Loss", step=0):
181
+ """log loss
182
+
183
+ Args:
184
+ loss (Any): loss
185
+ loss_name (str, optional): loss description. Defaults to 'Loss'.
186
+ step (int, optional): step num. Defaults to 0.
187
+ """
188
+ if self.accelerator is not None:
189
+ self.accelerator.log({loss_name: loss}, step=step)
190
+ elif self.logger_type == "wandb":
191
+ self.logger.log({loss_name: loss}, step=step)
192
+ elif self.logger_type == "fitlog":
193
+ self.logger.add_loss(loss, name=loss_name, step=step)
194
+ else:
195
+ with open(self.loss_file, "a", encoding="utf8") as f:
196
+ f.write(json.dumps({loss_name: loss, "step": step}) + "\n")
197
+
198
+ def finish(self):
199
+ """finish logging
200
+ """
201
+ if self.logger_type == "fitlog":
202
+ self.logger.finish()
203
+
204
+ def info(self, message:str):
205
+ """ Log a message with severity 'INFO' in local file / console.
206
+
207
+ Args:
208
+ message (str): message to log
209
+ """
210
+ self.logging.info(message)
211
+
212
+ def warning(self, message):
213
+ """ Log a message with severity 'WARNING' in local file / console.
214
+
215
+ Args:
216
+ message (str): message to log
217
+ """
218
+ self.logging.warning(message)
219
+
220
+ def error(self, message):
221
+ """ Log a message with severity 'ERROR' in local file / console.
222
+
223
+ Args:
224
+ message (str): message to log
225
+ """
226
+ self.logging.error(message)
227
+
228
+ def debug(self, message):
229
+ """ Log a message with severity 'DEBUG' in local file / console.
230
+
231
+ Args:
232
+ message (str): message to log
233
+ """
234
+ self.logging.debug(message)
235
+
236
+ def critical(self, message):
237
+ self.logging.critical(message)
common/metric.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ Date: 2023-01-11 10:39:26
4
+ LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-17 19:39:22
6
+ Description: Metric calculation class
7
+
8
+ '''
9
+ from collections import Counter
10
+ from typing import List, Dict
11
+
12
+ import numpy as np
13
+ from sklearn.metrics import f1_score
14
+
15
+ from common.utils import InputData, OutputData
16
+
17
+
18
+ class Evaluator(object):
19
+ """Evaluation metric funtions library class
20
+ supported metric:
21
+ - slot_f1
22
+ - intent_acc
23
+ - exactly_match_accuracy
24
+ - intent_f1 (defult "macro_intent_f1")
25
+ - macro_intent_f1
26
+ - micro_intent_f1=
27
+ """
28
+ @staticmethod
29
+ def exactly_match_accuracy(pred_slot: List[List[str or int]],
30
+ real_slot: List[List[str or int]],
31
+ pred_intent: List[List[str or int] or str or int],
32
+ real_intent: List[List[str or int] or str or int]) -> float:
33
+ """Compute the accuracy based on the whole predictions of given sentence, including slot and intent.
34
+ (both support str or int index as the representation of slot and intent)
35
+ Args:
36
+ pred_slot (List[List[str or int]]): predicted sequence of slot list
37
+ real_slot (List[List[str or int]]): golden sequence of slot list.
38
+ pred_intent (List[List[str or int] or str or int]): golden intent list / golden multi intent list.
39
+ real_intent (List[List[str or int] or str or int]): predicted intent list / predicted multi intent list.
40
+
41
+ Returns:
42
+ float: exactly match accuracy score
43
+ """
44
+ total_count, correct_count = 0.0, 0.0
45
+ for p_slot, r_slot, p_intent, r_intent in zip(pred_slot, real_slot, pred_intent, real_intent):
46
+ if isinstance(p_intent, list):
47
+ p_intent, r_intent = set(p_intent), set(r_intent)
48
+ if p_slot == r_slot and p_intent == r_intent:
49
+ correct_count += 1.0
50
+ total_count += 1.0
51
+
52
+ return 1.0 * correct_count / total_count
53
+
54
+
55
+ @staticmethod
56
+ def intent_accuracy(pred_list: List, real_list: List) -> float:
57
+ """Get intent accuracy measured by predictions and ground-trues. Support both multi intent and single intent.
58
+
59
+ Args:
60
+ pred_list (List): predicted intent list
61
+ real_list (List): golden intent list
62
+
63
+ Returns:
64
+ float: intent accuracy score
65
+ """
66
+ total_count, correct_count = 0.0, 0.0
67
+ for p_intent, r_intent in zip(pred_list, real_list):
68
+ if isinstance(p_intent, list):
69
+ p_intent, r_intent = set(p_intent), set(r_intent)
70
+ if p_intent == r_intent:
71
+ correct_count += 1.0
72
+ total_count += 1.0
73
+
74
+ return 1.0 * correct_count / total_count
75
+
76
+ @staticmethod
77
+ def intent_f1(pred_list: List[List[int]], real_list: List[List[int]], num_intent: int, average='macro') -> float:
78
+ """Get intent accuracy measured by predictions and ground-trues. Support both multi intent and single intent.
79
+ (Only support multi intent now, but you can use [[intent1], [intent2], ...] to compute intent f1 in single intent)
80
+ Args:
81
+ pred_list (List[List[int]]): predicted multi intent list.
82
+ real_list (List[List[int]]): golden multi intent list.
83
+ num_intent (int)
84
+ average (str): support "micro" and "macro"
85
+
86
+ Returns:
87
+ float: intent accuracy score
88
+ """
89
+ return f1_score(Evaluator.__instance2onehot(num_intent, real_list),
90
+ Evaluator.__instance2onehot(num_intent, pred_list),
91
+ average=average,
92
+ zero_division=0)
93
+
94
+ @staticmethod
95
+ def __multilabel2one_hot(labels, nums):
96
+ res = [0.] * nums
97
+ if len(labels) == 0:
98
+ return res
99
+ if isinstance(labels[0], list):
100
+ for label in labels[0]:
101
+ res[label] = 1.
102
+ return res
103
+ for label in labels:
104
+ res[label] = 1.
105
+ return res
106
+
107
+ @staticmethod
108
+ def __instance2onehot(num_intent, data):
109
+ res = []
110
+ for intents in data:
111
+ res.append(Evaluator.__multilabel2one_hot(intents, num_intent))
112
+ return np.array(res)
113
+
114
+ @staticmethod
115
+ def __startOfChunk(prevTag, tag, prevTagType, tagType, chunkStart=False):
116
+ if prevTag == 'B' and tag == 'B':
117
+ chunkStart = True
118
+ if prevTag == 'I' and tag == 'B':
119
+ chunkStart = True
120
+ if prevTag == 'O' and tag == 'B':
121
+ chunkStart = True
122
+ if prevTag == 'O' and tag == 'I':
123
+ chunkStart = True
124
+
125
+ if prevTag == 'E' and tag == 'E':
126
+ chunkStart = True
127
+ if prevTag == 'E' and tag == 'I':
128
+ chunkStart = True
129
+ if prevTag == 'O' and tag == 'E':
130
+ chunkStart = True
131
+ if prevTag == 'O' and tag == 'I':
132
+ chunkStart = True
133
+
134
+ if tag != 'O' and tag != '.' and prevTagType != tagType:
135
+ chunkStart = True
136
+ return chunkStart
137
+
138
+ @staticmethod
139
+ def __endOfChunk(prevTag, tag, prevTagType, tagType, chunkEnd=False):
140
+ if prevTag == 'B' and tag == 'B':
141
+ chunkEnd = True
142
+ if prevTag == 'B' and tag == 'O':
143
+ chunkEnd = True
144
+ if prevTag == 'I' and tag == 'B':
145
+ chunkEnd = True
146
+ if prevTag == 'I' and tag == 'O':
147
+ chunkEnd = True
148
+
149
+ if prevTag == 'E' and tag == 'E':
150
+ chunkEnd = True
151
+ if prevTag == 'E' and tag == 'I':
152
+ chunkEnd = True
153
+ if prevTag == 'E' and tag == 'O':
154
+ chunkEnd = True
155
+ if prevTag == 'I' and tag == 'O':
156
+ chunkEnd = True
157
+
158
+ if prevTag != 'O' and prevTag != '.' and prevTagType != tagType:
159
+ chunkEnd = True
160
+ return chunkEnd
161
+
162
+ @staticmethod
163
+ def __splitTagType(tag):
164
+ s = tag.split('-')
165
+ if len(s) > 2 or len(s) == 0:
166
+ raise ValueError('tag format wrong. it must be B-xxx.xxx')
167
+ if len(s) == 1:
168
+ tag = s[0]
169
+ tagType = ""
170
+ else:
171
+ tag = s[0]
172
+ tagType = s[1]
173
+ return tag, tagType
174
+
175
+ @staticmethod
176
+ def computeF1Score(correct_slots: List[List[str]], pred_slots: List[List[str]]) -> float:
177
+ """compute f1 score is modified from conlleval.pl
178
+
179
+ Args:
180
+ correct_slots (List[List[str]]): golden slot string list
181
+ pred_slots (List[List[str]]): predicted slot string list
182
+
183
+ Returns:
184
+ float: slot f1 score
185
+ """
186
+ correctChunk = {}
187
+ correctChunkCnt = 0.0
188
+ foundCorrect = {}
189
+ foundCorrectCnt = 0.0
190
+ foundPred = {}
191
+ foundPredCnt = 0.0
192
+ correctTags = 0.0
193
+ tokenCount = 0.0
194
+ for correct_slot, pred_slot in zip(correct_slots, pred_slots):
195
+ inCorrect = False
196
+ lastCorrectTag = 'O'
197
+ lastCorrectType = ''
198
+ lastPredTag = 'O'
199
+ lastPredType = ''
200
+ for c, p in zip(correct_slot, pred_slot):
201
+ c = str(c)
202
+ p = str(p)
203
+ correctTag, correctType = Evaluator.__splitTagType(c)
204
+ predTag, predType = Evaluator.__splitTagType(p)
205
+
206
+ if inCorrect == True:
207
+ if Evaluator.__endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
208
+ Evaluator.__endOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
209
+ (lastCorrectType == lastPredType):
210
+ inCorrect = False
211
+ correctChunkCnt += 1.0
212
+ if lastCorrectType in correctChunk:
213
+ correctChunk[lastCorrectType] += 1.0
214
+ else:
215
+ correctChunk[lastCorrectType] = 1.0
216
+ elif Evaluator.__endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) != \
217
+ Evaluator.__endOfChunk(lastPredTag, predTag, lastPredType, predType) or \
218
+ (correctType != predType):
219
+ inCorrect = False
220
+
221
+ if Evaluator.__startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
222
+ Evaluator.__startOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
223
+ (correctType == predType):
224
+ inCorrect = True
225
+
226
+ if Evaluator.__startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True:
227
+ foundCorrectCnt += 1
228
+ if correctType in foundCorrect:
229
+ foundCorrect[correctType] += 1.0
230
+ else:
231
+ foundCorrect[correctType] = 1.0
232
+
233
+ if Evaluator.__startOfChunk(lastPredTag, predTag, lastPredType, predType) == True:
234
+ foundPredCnt += 1.0
235
+ if predType in foundPred:
236
+ foundPred[predType] += 1.0
237
+ else:
238
+ foundPred[predType] = 1.0
239
+
240
+ if correctTag == predTag and correctType == predType:
241
+ correctTags += 1.0
242
+
243
+ tokenCount += 1.0
244
+
245
+ lastCorrectTag = correctTag
246
+ lastCorrectType = correctType
247
+ lastPredTag = predTag
248
+ lastPredType = predType
249
+
250
+ if inCorrect == True:
251
+ correctChunkCnt += 1.0
252
+ if lastCorrectType in correctChunk:
253
+ correctChunk[lastCorrectType] += 1.0
254
+ else:
255
+ correctChunk[lastCorrectType] = 1.0
256
+
257
+ if foundPredCnt > 0:
258
+ precision = 1.0 * correctChunkCnt / foundPredCnt
259
+ else:
260
+ precision = 0
261
+
262
+ if foundCorrectCnt > 0:
263
+ recall = 1.0 * correctChunkCnt / foundCorrectCnt
264
+ else:
265
+ recall = 0
266
+
267
+ if (precision + recall) > 0:
268
+ f1 = (2.0 * precision * recall) / (precision + recall)
269
+ else:
270
+ f1 = 0
271
+
272
+ return f1
273
+
274
+ @staticmethod
275
+ def max_freq_predict(sample):
276
+ """Max frequency prediction.
277
+ """
278
+ predict = []
279
+ for items in sample:
280
+ predict.append(Counter(items).most_common(1)[0][0])
281
+ return predict
282
+
283
+ @staticmethod
284
+ def __token_map(indexes, token_label_map):
285
+ return [[token_label_map[idx] if idx in token_label_map else -1 for idx in index] for index in indexes]
286
+
287
+ @staticmethod
288
+ def compute_all_metric(inps: InputData,
289
+ output: OutputData,
290
+ intent_label_map: dict = None,
291
+ metric_list: List=None)-> Dict:
292
+ """Auto compute all metric mentioned in 'metric_list'
293
+
294
+ Args:
295
+ inps (InputData): input golden slot and intent labels
296
+ output (OutputData): output predicted slot and intent labels
297
+ intent_label_map (dict, Optional): dict like {"intent1": 0, "intent2": 1, ...},which aims to map intent string to index
298
+ metric_list (List): support metrics in ["slot_f1", "intent_acc", "intent_f1", "macro_intent_f1", "micro_intent_f1", "EMA"]
299
+
300
+ Returns:
301
+ Dict: all metric mentioned in 'metric_list', like {'EMA': 0.7, ...}
302
+
303
+
304
+ Example:
305
+ if compute slot metric:
306
+
307
+ inps.slot = [["slot1", "slot2", ...], ...]; output.slot_ids=[["slot1", "slot2", ...], ...];
308
+
309
+ if compute intent metric:
310
+
311
+ [Multi Intent] inps.intent = [["intent1", "intent2", ...], ...]; output.intent_ids = [["intent1", "intent2", ...], ...]
312
+
313
+ [Single Intent] inps.intent = ["intent1", ...]; [Single Intent] output.intent_ids = ["intent1", ...]
314
+ """
315
+ if not metric_list:
316
+ metric_list = ["slot_f1", "intent_acc", "EMA"]
317
+ res_dict = {}
318
+ use_slot = output.slot_ids is not None and len(output.slot_ids) > 0
319
+ use_intent = output.intent_ids is not None and len(
320
+ output.intent_ids) > 0
321
+ if use_slot and "slot_f1" in metric_list:
322
+
323
+ res_dict["slot_f1"] = Evaluator.computeF1Score(
324
+ output.slot_ids, inps.slot)
325
+ if use_intent and "intent_acc" in metric_list:
326
+ res_dict["intent_acc"] = Evaluator.intent_accuracy(
327
+ output.intent_ids, inps.intent)
328
+ if isinstance(output.intent_ids[0], list):
329
+ if "intent_f1" in metric_list:
330
+ res_dict["intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
331
+ Evaluator.__token_map(
332
+ inps.intent, intent_label_map),
333
+ len(intent_label_map.keys()))
334
+ elif "macro_intent_f1" in metric_list:
335
+ res_dict["macro_intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
336
+ Evaluator.__token_map(inps.intent, intent_label_map),
337
+ len(intent_label_map.keys()), average="macro")
338
+ if "micro_intent_f1" in metric_list:
339
+ res_dict["micro_intent_f1"] = Evaluator.intent_f1(Evaluator.__token_map(output.intent_ids, intent_label_map),
340
+ Evaluator.__token_map(inps.intent, intent_label_map),
341
+ len(intent_label_map.keys()), average="micro")
342
+
343
+ if use_slot and use_intent and "EMA" in metric_list:
344
+ res_dict["EMA"] = Evaluator.exactly_match_accuracy(output.slot_ids, inps.slot, output.intent_ids,
345
+ inps.intent)
346
+ return res_dict
common/model_manager.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ Date: 2023-01-11 10:39:26
4
+ LastEditors: Qiguang Chen
5
+ LastEditTime: 2023-02-19 18:50:11
6
+ Description: manage all process of model training and prediction.
7
+
8
+ '''
9
+ import math
10
+ import os
11
+ import queue
12
+ import random
13
+
14
+ import numpy as np
15
+ import torch
16
+ from tqdm import tqdm
17
+
18
+
19
+ from common import utils
20
+ from common.loader import DataFactory
21
+ from common.logger import Logger
22
+ from common.metric import Evaluator
23
+ from common.saver import Saver
24
+ from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
25
+ from common.utils import InputData, instantiate
26
+ from common.utils import OutputData
27
+ from common.config import Config
28
+ import dill
29
+ from common import global_pool
30
+ from tools.load_from_hugging_face import PreTrainedTokenizerForSLU, PretrainedModelForSLU
31
+ # from tools.hugging_face_parser import load_model, load_tokenizer
32
+
33
+
34
+ class ModelManager(object):
35
+ def __init__(self, config: Config):
36
+ """create model manager by config
37
+
38
+ Args:
39
+ config (Config): configuration to manage all process in OpenSLU
40
+ """
41
+ # init config
42
+ global_pool._init()
43
+ self.config = config
44
+ self.__set_seed(self.config.base.get("seed"))
45
+ self.device = self.config.base.get("device")
46
+ self.load_dir = self.config.model_manager.get("load_dir")
47
+ if self.config.get("logger") and self.config["logger"].get("logger_type"):
48
+ logger_type = self.config["logger"].get("logger_type")
49
+ else:
50
+ logger_type = "wandb"
51
+ # enable accelerator
52
+ if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
53
+ from accelerate import Accelerator
54
+ self.accelerator = Accelerator(log_with=logger_type)
55
+ else:
56
+ self.accelerator = None
57
+ self.tokenizer = None
58
+ self.saver = Saver(self.config.model_manager, start_time=self.config.start_time)
59
+ if self.config.base.get("train"):
60
+ self.model = None
61
+ self.optimizer = None
62
+ self.total_step = None
63
+ self.lr_scheduler = None
64
+ self.init_step = 0
65
+ self.best_metric = 0
66
+ self.logger = Logger(logger_type=logger_type,
67
+ logger_name=self.config.base["name"],
68
+ start_time=self.config.start_time,
69
+ accelerator=self.accelerator)
70
+ global_pool.set_value("logger", self.logger)
71
+
72
+ def init_model(self):
73
+ """init model, optimizer, lr_scheduler
74
+
75
+ Args:
76
+ model (Any): pytorch model
77
+ """
78
+ self.prepared = False
79
+ if self.load_dir is not None:
80
+ self.load()
81
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
82
+ self.init_data()
83
+ if self.config.base.get("train") and self.config.model_manager.get("load_train_state"):
84
+ train_state = torch.load(os.path.join(
85
+ self.load_dir, "train_state.pkl"), pickle_module=dill)
86
+ self.optimizer = instantiate(
87
+ self.config["optimizer"])(self.model.parameters())
88
+ self.lr_scheduler = instantiate(self.config["scheduler"])(
89
+ optimizer=self.optimizer,
90
+ num_training_steps=self.total_step
91
+ )
92
+ self.optimizer.load_state_dict(train_state["optimizer"])
93
+ self.optimizer.zero_grad()
94
+ self.lr_scheduler.load_state_dict(train_state["lr_scheduler"])
95
+ self.init_step = train_state["step"]
96
+ self.best_metric = train_state["best_metric"]
97
+ elif self.config.model.get("_from_pretrained_") and self.config.tokenizer.get("_from_pretrained_"):
98
+ self.from_pretrained()
99
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
100
+ self.init_data()
101
+ else:
102
+ self.tokenizer = get_tokenizer(
103
+ self.config.tokenizer.get("_tokenizer_name_"))
104
+ self.init_data()
105
+ self.model = instantiate(self.config.model)
106
+ self.model.to(self.device)
107
+ if self.config.base.get("train"):
108
+ self.optimizer = instantiate(
109
+ self.config["optimizer"])(self.model.parameters())
110
+ self.lr_scheduler = instantiate(self.config["scheduler"])(
111
+ optimizer=self.optimizer,
112
+ num_training_steps=self.total_step
113
+ )
114
+
115
+
116
+ def init_data(self):
117
+ self.data_factory = DataFactory(tokenizer=self.tokenizer,
118
+ use_multi_intent=self.config.base.get("multi_intent"),
119
+ to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
120
+ batch_size = self.config.base["batch_size"]
121
+ # init tokenizer config and dataloaders
122
+ tokenizer_config = {key: self.config.tokenizer[key]
123
+ for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
124
+
125
+ if self.config.base.get("train"):
126
+ # init dataloader & load data
127
+
128
+
129
+ train_dataset = self.data_factory.load_dataset(self.config.dataset, split="train")
130
+
131
+ # update label and vocabulary (ONLY SUPPORT FOR "word_tokenizer")
132
+ self.data_factory.update_label_names(train_dataset)
133
+ self.data_factory.update_vocabulary(train_dataset)
134
+
135
+
136
+ self.train_dataloader = self.data_factory.get_data_loader(train_dataset,
137
+ batch_size,
138
+ shuffle=True,
139
+ device=self.device,
140
+ enable_label=True,
141
+ align_mode=self.config.tokenizer.get(
142
+ "_align_mode_"),
143
+ label2tensor=True,
144
+ **tokenizer_config)
145
+ self.total_step = int(self.config.base.get("epoch_num")) * len(self.train_dataloader)
146
+ dev_dataset = self.data_factory.load_dataset(self.config.dataset, split="validation")
147
+ self.dev_dataloader = self.data_factory.get_data_loader(dev_dataset,
148
+ batch_size,
149
+ shuffle=False,
150
+ device=self.device,
151
+ enable_label=True,
152
+ align_mode=self.config.tokenizer.get(
153
+ "_align_mode_"),
154
+ label2tensor=False,
155
+ **tokenizer_config)
156
+ self.data_factory.update_vocabulary(dev_dataset)
157
+ self.intent_list = None
158
+ self.intent_dict = None
159
+ self.slot_list = None
160
+ self.slot_dict = None
161
+ # add intent label num and slot label num to config
162
+ if self.config.model["decoder"].get("intent_classifier") and int(self.config.get_intent_label_num()) == 0:
163
+ self.intent_list = self.data_factory.intent_label_list
164
+ self.intent_dict = self.data_factory.intent_label_dict
165
+ self.config.set_intent_label_num(len(self.intent_list))
166
+ if self.config.model["decoder"].get("slot_classifier") and int(self.config.get_slot_label_num()) == 0:
167
+ self.slot_list = self.data_factory.slot_label_list
168
+ self.slot_dict = self.data_factory.slot_label_dict
169
+ self.config.set_slot_label_num(len(self.slot_list))
170
+
171
+
172
+
173
+ # autoload embedding for non-pretrained encoder
174
+ if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
175
+ "load_embedding_name"):
176
+ self.config["model"]["encoder"]["embedding"]["embedding_matrix"] = load_embedding(self.tokenizer,
177
+ self.config["model"][
178
+ "encoder"][
179
+ "embedding"].get(
180
+ "load_embedding_name"))
181
+ # fill template in config
182
+ self.config.autoload_template()
183
+ # save config
184
+ self.logger.set_config(self.config)
185
+ self.saver.save_tokenizer(self.tokenizer)
186
+ self.saver.save_label(self.intent_list, self.slot_list)
187
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
188
+
189
+ if self.config.base.get("test"):
190
+ self.test_dataset = self.data_factory.load_dataset(self.config.dataset, split="test")
191
+ self.test_dataloader = self.data_factory.get_data_loader(self.test_dataset,
192
+ batch_size,
193
+ shuffle=False,
194
+ device=self.device,
195
+ enable_label=True,
196
+ align_mode=self.config.tokenizer.get(
197
+ "_align_mode_"),
198
+ label2tensor=False,
199
+ **tokenizer_config)
200
+
201
+ def eval(self, step: int, best_metric: float) -> float:
202
+ """ evaluation models.
203
+
204
+ Args:
205
+ step (int): which step the model has trained in
206
+ best_metric (float): last best metric value to judge whether to test or save model
207
+
208
+ Returns:
209
+ float: updated best metric value
210
+ """
211
+ # TODO: save dev
212
+ _, res = self.__evaluate(self.model, self.dev_dataloader, mode="dev")
213
+ self.logger.log_metric(res, metric_split="dev", step=step)
214
+ if res[self.config.evaluator.get("best_key")] > best_metric:
215
+ best_metric = res[self.config.evaluator.get("best_key")]
216
+ train_state = {
217
+ "step": step,
218
+ "best_metric": best_metric,
219
+ "optimizer": self.optimizer.state_dict(),
220
+ "lr_scheduler": self.lr_scheduler.state_dict()
221
+ }
222
+ self.saver.save_model(self.model, train_state, self.accelerator)
223
+ if self.config.base.get("test"):
224
+ outputs, test_res = self.__evaluate(self.model, self.test_dataloader, mode="test")
225
+ self.saver.save_output(outputs, self.test_dataset)
226
+ self.logger.log_metric(test_res, metric_split="test", step=step)
227
+ return best_metric
228
+
229
+ def train(self) -> float:
230
+ """ train models.
231
+
232
+ Returns:
233
+ float: updated best metric value
234
+ """
235
+ self.model.train()
236
+ if self.accelerator is not None:
237
+ self.total_step = math.ceil(self.total_step / self.accelerator.num_processes)
238
+ if self.optimizer is None:
239
+ self.optimizer = instantiate(self.config["optimizer"])(self.model.parameters())
240
+ if self.lr_scheduler is None:
241
+ self.lr_scheduler = instantiate(self.config["scheduler"])(
242
+ optimizer=self.optimizer,
243
+ num_training_steps=self.total_step
244
+ )
245
+ if not self.prepared and self.accelerator is not None:
246
+ self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
247
+ self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
248
+ step = self.init_step
249
+ progress_bar = tqdm(range(self.total_step))
250
+ progress_bar.update(self.init_step)
251
+ self.optimizer.zero_grad()
252
+ for _ in range(int(self.config.base.get("epoch_num"))):
253
+ for data in self.train_dataloader:
254
+ if step == 0:
255
+ self.logger.info(data.get_item(
256
+ 0, tokenizer=self.tokenizer, intent_map=self.intent_list, slot_map=self.slot_list))
257
+ output = self.model(data)
258
+ if self.accelerator is not None and hasattr(self.model, "module"):
259
+ loss, intent_loss, slot_loss = self.model.module.compute_loss(
260
+ pred=output, target=data)
261
+ else:
262
+ loss, intent_loss, slot_loss = self.model.compute_loss(
263
+ pred=output, target=data)
264
+ self.logger.log_loss(loss, "Loss", step=step)
265
+ self.logger.log_loss(intent_loss, "Intent Loss", step=step)
266
+ self.logger.log_loss(slot_loss, "Slot Loss", step=step)
267
+ self.optimizer.zero_grad()
268
+
269
+ if self.accelerator is not None:
270
+ self.accelerator.backward(loss)
271
+ else:
272
+ loss.backward()
273
+ self.optimizer.step()
274
+ self.lr_scheduler.step()
275
+ train_state = {
276
+ "step": step,
277
+ "best_metric": self.best_metric,
278
+ "optimizer": self.optimizer.state_dict(),
279
+ "lr_scheduler": self.lr_scheduler.state_dict()
280
+ }
281
+ if not self.saver.auto_save_step(self.model, train_state, self.accelerator):
282
+ if not self.config.evaluator.get("eval_by_epoch") and step % self.config.evaluator.get("eval_step") == 0 and step != 0:
283
+ self.best_metric = self.eval(step, self.best_metric)
284
+ step += 1
285
+ progress_bar.update(1)
286
+ if self.config.evaluator.get("eval_by_epoch"):
287
+ self.best_metric = self.eval(step, self.best_metric)
288
+ self.logger.finish()
289
+ return self.best_metric
290
+
291
+ def test(self):
292
+ return self.__evaluate(self.model, self.test_dataloader, mode="test")
293
+
294
+ def __set_seed(self, seed_value: int):
295
+ """Manually set random seeds.
296
+
297
+ Args:
298
+ seed_value (int): random seed
299
+ """
300
+ random.seed(seed_value)
301
+ np.random.seed(seed_value)
302
+ torch.manual_seed(seed_value)
303
+ torch.random.manual_seed(seed_value)
304
+ os.environ['PYTHONHASHSEED'] = str(seed_value)
305
+ if torch.cuda.is_available():
306
+ torch.cuda.manual_seed(seed_value)
307
+ torch.cuda.manual_seed_all(seed_value)
308
+ torch.backends.cudnn.deterministic = True
309
+ torch.backends.cudnn.benchmark = True
310
+ return
311
+
312
+ def __evaluate(self, model, dataloader, mode="dev"):
313
+ model.eval()
314
+ inps = InputData()
315
+ outputs = OutputData()
316
+ for data in dataloader:
317
+ torch.cuda.empty_cache()
318
+ output = model(data)
319
+ if self.accelerator is not None and hasattr(self.model, "module"):
320
+ decode_output = model.module.decode(output, data)
321
+ else:
322
+ decode_output = model.decode(output, data)
323
+
324
+ decode_output.map_output(slot_map=self.slot_list,
325
+ intent_map=self.intent_list)
326
+ if self.config.model["decoder"].get("slot_classifier"):
327
+ data, decode_output = utils.remove_slot_ignore_index(
328
+ data, decode_output, ignore_index="#")
329
+
330
+ inps.merge_input_data(data)
331
+ outputs.merge_output_data(decode_output)
332
+ if "metric" in self.config.evaluator:
333
+ res = Evaluator.compute_all_metric(
334
+ inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.evaluator["metric"])
335
+ else:
336
+ res = Evaluator.compute_all_metric(
337
+ inps, outputs, intent_label_map=self.intent_dict)
338
+ self.logger.info(f"Best {mode} metric: "+str(res))
339
+ model.train()
340
+ return outputs, res
341
+
342
+ def load(self):
343
+
344
+ if self.tokenizer is None:
345
+ with open(os.path.join(self.load_dir, "tokenizer.pkl"), 'rb') as f:
346
+ self.tokenizer = dill.load(f)
347
+ label = utils.load_json(os.path.join(self.load_dir, "label.json"))
348
+ if label["intent"] is None:
349
+ self.intent_list = None
350
+ self.intent_dict = None
351
+ else:
352
+ self.intent_list = label["intent"]
353
+ self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
354
+ self.config.set_intent_label_num(len(self.intent_list))
355
+ if label["slot"] is None:
356
+ self.slot_list = None
357
+ self.slot_dict = None
358
+ else:
359
+ self.slot_list = label["slot"]
360
+ self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
361
+ self.config.set_slot_label_num(len(self.slot_list))
362
+ self.config.set_vocab_size(self.tokenizer.vocab_size)
363
+ if self.accelerator is not None and self.load_dir is not None:
364
+ self.model = torch.load(os.path.join(self.load_dir, "model.pkl"), map_location=torch.device(self.device))
365
+ self.prepared = True
366
+ self.accelerator.load_state(self.load_dir)
367
+ self.accelerator.prepare_model(self.model)
368
+ else:
369
+ self.model = torch.load(os.path.join(
370
+ self.load_dir, "model.pkl"), map_location=torch.device(self.device))
371
+ # if self.config.tokenizer["_tokenizer_name_"] == "word_tokenizer":
372
+ # self.tokenizer = get_tokenizer_class(self.config.tokenizer["_tokenizer_name_"]).from_file(os.path.join(self.load_dir, "tokenizer.json"))
373
+ # else:
374
+ # self.tokenizer = get_tokenizer(self.config.tokenizer["_tokenizer_name_"])
375
+ self.model.to(self.device)
376
+
377
+
378
+ def from_pretrained(self):
379
+ self.config.autoload_template()
380
+ model = PretrainedModelForSLU.from_pretrained(self.config.model["_from_pretrained_"])
381
+ # model = load_model(self.config.model["_from_pretrained_"])
382
+ self.model = model.model
383
+ if self.tokenizer is None:
384
+ self.tokenizer = PreTrainedTokenizerForSLU.from_pretrained(
385
+ self.config.tokenizer["_from_pretrained_"])
386
+ self.config.tokenizer = model.config.tokenizer
387
+ # self.tokenizer = load_tokenizer(self.config.tokenizer["_from_pretrained_"])
388
+
389
+ self.model.to(self.device)
390
+ label = model.config._id2label
391
+ self.config.model = model.config.model
392
+ self.intent_list = label["intent"]
393
+ self.slot_list = label["slot"]
394
+ self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
395
+ self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
396
+
397
+ def predict(self, text_data):
398
+ self.model.eval()
399
+ tokenizer_config = {key: self.config.tokenizer[key]
400
+ for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
401
+ align_mode = self.config.tokenizer.get("_align_mode_")
402
+ inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
403
+ device=self.device,
404
+ config=tokenizer_config,
405
+ enable_label=False,
406
+ align_mode=align_mode if align_mode is not None else "general",
407
+ label2tensor=False)
408
+ output = self.model(inputs)
409
+ decode_output = self.model.decode(output, inputs)
410
+ decode_output.map_output(slot_map=self.slot_list,
411
+ intent_map=self.intent_list)
412
+ if self.config.base.get("multi_intent"):
413
+ intent = decode_output.intent_ids[0]
414
+ else:
415
+ intent = [decode_output.intent_ids[0]]
416
+ input_ids = inputs.input_ids[0].tolist()
417
+ tokens = [self.tokenizer.decode(ids) for ids in input_ids]
418
+ slots = decode_output.slot_ids[0]
419
+ return {"intent": intent, "slot": slots, "text": tokens}
common/saver.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author: Qiguang Chen
3
+ LastEditors: Qiguang Chen
4
+ Date: 2023-02-12 22:23:58
5
+ LastEditTime: 2023-02-19 14:14:56
6
+ Description:
7
+
8
+ '''
9
+ import json
10
+ import os
11
+ import queue
12
+ import shutil
13
+ import torch
14
+ import dill
15
+ from common import utils
16
+
17
+
18
+ class Saver():
19
+ def __init__(self, config, start_time=None) -> None:
20
+ self.config = config
21
+ if self.config.get("save_dir"):
22
+ self.model_save_dir = self.config["save_dir"]
23
+ else:
24
+ if not os.path.exists("save/"):
25
+ os.mkdir("save/")
26
+ self.model_save_dir = "save/" + start_time
27
+ if not os.path.exists(self.model_save_dir):
28
+ os.mkdir(self.model_save_dir)
29
+ save_mode = config.get("save_mode")
30
+ self.save_mode = save_mode if save_mode is not None else "save-by-eval"
31
+
32
+ max_save_num = self.config.get("max_save_num")
33
+ self.max_save_num = max_save_num if max_save_num is not None else 1
34
+ self.save_pool = queue.Queue(maxsize=max_save_num)
35
+
36
+ def save_tokenizer(self, tokenizer):
37
+ with open(os.path.join(self.model_save_dir, "tokenizer.pkl"), 'wb') as f:
38
+ dill.dump(tokenizer, f)
39
+
40
+ def save_label(self, intent_list, slot_list):
41
+ utils.save_json(os.path.join(self.model_save_dir, "label.json"), {"intent": intent_list, "slot": slot_list})
42
+
43
+
44
+ def save_model(self, model, train_state, accelerator=None):
45
+ step = train_state["step"]
46
+ if self.max_save_num != 1:
47
+
48
+ model_save_dir =os.path.join(self.model_save_dir, str(step))
49
+ if self.save_pool.full():
50
+ delete_dir = self.save_pool.get()
51
+ shutil.rmtree(delete_dir)
52
+ self.save_pool.put(model_save_dir)
53
+ else:
54
+ self.save_pool.put(model_save_dir)
55
+ if not os.path.exists(model_save_dir):
56
+ os.mkdir(model_save_dir)
57
+ else:
58
+ model_save_dir = self.model_save_dir
59
+ if not os.path.exists(model_save_dir):
60
+ os.mkdir(model_save_dir)
61
+ if accelerator is None:
62
+ torch.save(model, os.path.join(model_save_dir, "model.pkl"))
63
+ torch.save(train_state, os.path.join(model_save_dir, "train_state.pkl"), pickle_module=dill)
64
+ else:
65
+ accelerator.wait_for_everyone()
66
+ unwrapped_model = accelerator.unwrap_model(model)
67
+ accelerator.save(unwrapped_model, os.path.join(model_save_dir, "model.pkl"))
68
+ accelerator.save_state(output_dir=model_save_dir)
69
+
70
+ def auto_save_step(self, model, train_state, accelerator=None):
71
+ step = train_state["step"]
72
+ if self.save_mode == "save-by-step" and step % self.config.get("save_step")==0 and step != 0:
73
+ self.save_model(model, train_state, accelerator)
74
+ return True
75
+ else:
76
+ return False
77
+
78
+
79
+ def save_output(self, outputs, dataset):
80
+ outputs.save(self.model_save_dir, dataset)
common/tokenizer.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from collections import Counter
4
+ from collections import OrderedDict
5
+ from typing import List
6
+
7
+ import torch
8
+ from ordered_set import OrderedSet
9
+ from transformers import AutoTokenizer
10
+
11
+ from common.utils import download, unzip_file
12
+
13
+
14
+ def get_tokenizer(tokenizer_name:str):
15
+ """auto get tokenizer
16
+
17
+ Args:
18
+ tokenizer_name (str): support "word_tokenizer" and other pretrained tokenizer in hugging face.
19
+
20
+ Returns:
21
+ Any: Tokenizer Object
22
+ """
23
+ if tokenizer_name == "word_tokenizer":
24
+ return WordTokenizer(tokenizer_name)
25
+ else:
26
+ return AutoTokenizer.from_pretrained(tokenizer_name)
27
+
28
+ def get_tokenizer_class(tokenizer_name:str):
29
+ """auto get tokenizer class
30
+
31
+ Args:
32
+ tokenizer_name (str): support "word_tokenizer" and other pretrained tokenizer in hugging face.
33
+
34
+ Returns:
35
+ Any: Tokenizer Class
36
+ """
37
+ if tokenizer_name == "word_tokenizer":
38
+ return WordTokenizer
39
+ else:
40
+ return AutoTokenizer.from_pretrained
41
+
42
+ BATCH_STATE = 1
43
+ INSTANCE_STATE = 2
44
+
45
+
46
+ class WordTokenizer(object):
47
+
48
+ def __init__(self, name):
49
+ self.__name = name
50
+ self.index2instance = OrderedSet()
51
+ self.instance2index = OrderedDict()
52
+ # Counter Object record the frequency
53
+ # of element occurs in raw text.
54
+ self.counter = Counter()
55
+
56
+ self.__sign_pad = "[PAD]"
57
+ self.add_instance(self.__sign_pad)
58
+ self.__sign_unk = "[UNK]"
59
+ self.add_instance(self.__sign_unk)
60
+
61
+ @property
62
+ def padding_side(self):
63
+ return "right"
64
+ @property
65
+ def all_special_ids(self):
66
+ return [self.unk_token_id, self.pad_token_id]
67
+
68
+ @property
69
+ def name_or_path(self):
70
+ return self.__name
71
+
72
+ @property
73
+ def vocab_size(self):
74
+ return len(self.instance2index)
75
+
76
+ @property
77
+ def pad_token_id(self):
78
+ return self.instance2index[self.__sign_pad]
79
+
80
+ @property
81
+ def unk_token_id(self):
82
+ return self.instance2index[self.__sign_unk]
83
+
84
+ def add_instance(self, instance):
85
+ """ Add instances to alphabet.
86
+
87
+ 1, We support any iterative data structure which
88
+ contains elements of str type.
89
+
90
+ 2, We will count added instances that will influence
91
+ the serialization of unknown instance.
92
+
93
+ Args:
94
+ instance: is given instance or a list of it.
95
+ """
96
+
97
+ if isinstance(instance, (list, tuple)):
98
+ for element in instance:
99
+ self.add_instance(element)
100
+ return
101
+
102
+ # We only support elements of str type.
103
+ assert isinstance(instance, str)
104
+
105
+ # count the frequency of instances.
106
+ # self.counter[instance] += 1
107
+
108
+ if instance not in self.index2instance:
109
+ self.instance2index[instance] = len(self.index2instance)
110
+ self.index2instance.append(instance)
111
+
112
+ def __call__(self, instance,
113
+ return_tensors="pt",
114
+ is_split_into_words=True,
115
+ padding=True,
116
+ add_special_tokens=False,
117
+ truncation=True,
118
+ max_length=512,
119
+ **config):
120
+ if isinstance(instance, (list, tuple)) and isinstance(instance[0], (str)) and is_split_into_words:
121
+ res = self.get_index(instance)
122
+ state = INSTANCE_STATE
123
+ elif isinstance(instance, str) and not is_split_into_words:
124
+ res = self.get_index(instance.split(" "))
125
+ state = INSTANCE_STATE
126
+ elif not is_split_into_words and isinstance(instance, (list, tuple)):
127
+ res = [self.get_index(ins.split(" ")) for ins in instance]
128
+ state = BATCH_STATE
129
+ else:
130
+ res = [self.get_index(ins) for ins in instance]
131
+ state = BATCH_STATE
132
+ res = [r[:max_length] if len(r) >= max_length else r for r in res]
133
+ pad_id = self.get_index(self.__sign_pad)
134
+ if padding and state == BATCH_STATE:
135
+ max_len = max([len(x) for x in instance])
136
+
137
+ for i in range(len(res)):
138
+ res[i] = res[i] + [pad_id] * (max_len - len(res[i]))
139
+ if return_tensors == "pt":
140
+ input_ids = torch.Tensor(res).long()
141
+ attention_mask = (input_ids != pad_id).long()
142
+ elif state == BATCH_STATE:
143
+ input_ids = res
144
+ attention_mask = [1 if r != pad_id else 0 for batch in res for r in batch]
145
+ else:
146
+ input_ids = res
147
+ attention_mask = [1 if r != pad_id else 0 for r in res]
148
+ return TokenizedData(input_ids, token_type_ids=attention_mask, attention_mask=attention_mask)
149
+
150
+ def get_index(self, instance):
151
+ """ Serialize given instance and return.
152
+
153
+ For unknown words, the return index of alphabet
154
+ depends on variable self.__use_unk:
155
+
156
+ 1, If True, then return the index of "<UNK>";
157
+ 2, If False, then return the index of the
158
+ element that hold max frequency in training data.
159
+
160
+ Args:
161
+ instance (Any): is given instance or a list of it.
162
+ Return:
163
+ Any: the serialization of query instance.
164
+ """
165
+
166
+ if isinstance(instance, (list, tuple)):
167
+ return [self.get_index(elem) for elem in instance]
168
+
169
+ assert isinstance(instance, str)
170
+
171
+ try:
172
+ return self.instance2index[instance]
173
+ except KeyError:
174
+ return self.instance2index[self.__sign_unk]
175
+
176
+ def decode(self, index):
177
+ """ Get corresponding instance of query index.
178
+
179
+ if index is invalid, then throws exception.
180
+
181
+ Args:
182
+ index (int): is query index, possibly iterable.
183
+ Returns:
184
+ is corresponding instance.
185
+ """
186
+
187
+ if isinstance(index, list):
188
+ return [self.decode(elem) for elem in index]
189
+ if isinstance(index, torch.Tensor):
190
+ index = index.tolist()
191
+ return self.decode(index)
192
+ return self.index2instance[index]
193
+
194
+ def decode_batch(self, index, **kargs):
195
+ """ Get corresponding instance of query index.
196
+
197
+ if index is invalid, then throws exception.
198
+
199
+ Args:
200
+ index (int): is query index, possibly iterable.
201
+ Returns:
202
+ is corresponding instance.
203
+ """
204
+ return self.decode(index)
205
+
206
+ def save(self, path):
207
+ """ Save the content of alphabet to files.
208
+
209
+ There are two kinds of saved files:
210
+ 1, The first is a list file, elements are
211
+ sorted by the frequency of occurrence.
212
+
213
+ 2, The second is a dictionary file, elements
214
+ are sorted by it serialized index.
215
+
216
+ Args:
217
+ path (str): is the path to save object.
218
+ """
219
+
220
+ with open(path, 'w', encoding="utf8") as fw:
221
+ fw.write(json.dumps({"name": self.__name, "token_map": self.instance2index}))
222
+
223
+ @staticmethod
224
+ def from_file(path):
225
+ with open(path, 'r', encoding="utf8") as fw:
226
+ obj = json.load(fw)
227
+ tokenizer = WordTokenizer(obj["name"])
228
+ tokenizer.instance2index = OrderedDict(obj["token_map"])
229
+ # tokenizer.counter = len(tokenizer.instance2index)
230
+ tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
231
+ return tokenizer
232
+
233
+ def __len__(self):
234
+ return len(self.index2instance)
235
+
236
+ def __str__(self):
237
+ return 'Alphabet {} contains about {} words: \n\t{}'.format(self.name_or_path, len(self), self.index2instance)
238
+
239
+ def convert_tokens_to_ids(self, tokens):
240
+ """convert token sequence to intput ids sequence
241
+
242
+ Args:
243
+ tokens (Any): token sequence
244
+
245
+ Returns:
246
+ Any: intput ids sequence
247
+ """
248
+ try:
249
+ if isinstance(tokens, (list, tuple)):
250
+ return [self.instance2index[x] for x in tokens]
251
+ return self.instance2index[tokens]
252
+
253
+ except KeyError:
254
+ return self.instance2index[self.__sign_unk]
255
+
256
+
257
+ class TokenizedData():
258
+ """tokenized output data with input_ids, token_type_ids, attention_mask
259
+ """
260
+ def __init__(self, input_ids, token_type_ids, attention_mask):
261
+ self.input_ids = input_ids
262
+ self.token_type_ids = token_type_ids
263
+ self.attention_mask = attention_mask
264
+
265
+ def word_ids(self, index: int) -> List[int or None]:
266
+ """ get word id list
267
+
268
+ Args:
269
+ index (int): word index in sequence
270
+
271
+ Returns:
272
+ List[int or None]: word id list
273
+ """
274
+ return [j if self.attention_mask[index][j] != 0 else None for j, x in enumerate(self.input_ids[index])]
275
+
276
+ def word_to_tokens(self, index, word_id, **kwargs):
277
+ """map word and tokens
278
+
279
+ Args:
280
+ index (int): unused
281
+ word_id (int): word index in sequence
282
+ """
283
+ return (word_id, word_id + 1)
284
+
285
+ def to(self, device):
286
+ """set device
287
+
288
+ Args:
289
+ device (str): support ["cpu", "cuda"]
290
+ """
291
+ self.input_ids = self.input_ids.to(device)
292
+ self.token_type_ids = self.token_type_ids.to(device)
293
+ self.attention_mask = self.attention_mask.to(device)
294
+ return self
295
+
296
+
297
+ def load_embedding(tokenizer: WordTokenizer, glove_name:str):
298
+ """ load embedding from standford server or local cache.
299
+
300
+ Args:
301
+ tokenizer (WordTokenizer): non-pretrained tokenizer
302
+ glove_name (str): _description_
303
+
304
+ Returns:
305
+ Any: word embedding
306
+ """
307
+ save_path = "save/" + glove_name + ".zip"
308
+ if not os.path.exists(save_path):
309
+ download("http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip#" + glove_name, save_path)
310
+ unzip_file(save_path, "save/" + glove_name)
311
+ dim = int(glove_name.split(".")[-2][:-1])
312
+ embedding_list = torch.rand((tokenizer.vocab_size, dim))
313
+ embedding_list[tokenizer.pad_token_id] = torch.zeros((1, dim))
314
+ with open("save/" + glove_name + "/" + glove_name, "r", encoding="utf8") as f:
315
+ for line in f.readlines():
316
+ datas = line.split(" ")
317
+ word = datas[0]
318
+ embedding = torch.Tensor([float(datas[i + 1]) for i in range(len(datas) - 1)])
319
+ tokenized = tokenizer.convert_tokens_to_ids(word)
320
+ if isinstance(tokenized, int) and tokenized != tokenizer.unk_token_id:
321
+ embedding_list[tokenized] = embedding
322
+
323
+ return embedding_list
common/utils.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import importlib
3
+ import json
4
+ import os
5
+ import tarfile
6
+ from typing import List, Tuple
7
+ import zipfile
8
+ from collections import Callable
9
+ from ruamel import yaml
10
+ import requests
11
+ import torch
12
+ from torch.nn.utils.rnn import pad_sequence
13
+ from tqdm import tqdm
14
+ from torch import Tensor
15
+ import argparse
16
+ class InputData():
17
+ """input datas class
18
+ """
19
+ def __init__(self, inputs: List =None):
20
+ """init input datas class
21
+
22
+ if inputs is None:
23
+ this class can be used to save all InputData in the history by 'merge_input_data(X:InputData)'
24
+ else:
25
+ this class can be used for model input.
26
+
27
+ Args:
28
+ inputs (List, optional): inputs with [tokenized_data, slot, intent]. Defaults to None.
29
+ """
30
+ if inputs == None:
31
+ self.slot = []
32
+ self.intent = []
33
+ self.input_ids = None
34
+ self.token_type_ids = None
35
+ self.attention_mask = None
36
+ self.seq_lens = None
37
+ else:
38
+ self.input_ids = inputs[0].input_ids
39
+ self.token_type_ids = None
40
+ if hasattr(inputs[0], "token_type_ids"):
41
+ self.token_type_ids = inputs[0].token_type_ids
42
+ self.attention_mask = inputs[0].attention_mask
43
+ if len(inputs)>=2:
44
+ self.slot = inputs[1]
45
+ if len(inputs)>=3:
46
+ self.intent = inputs[2]
47
+ self.seq_lens = self.attention_mask.sum(-1)
48
+
49
+ def get_inputs(self):
50
+ """ get tokenized_data
51
+
52
+ Returns:
53
+ dict: tokenized data
54
+ """
55
+ res = {
56
+ "input_ids": self.input_ids,
57
+ "attention_mask": self.attention_mask
58
+ }
59
+ if self.token_type_ids is not None:
60
+ res["token_type_ids"] = self.token_type_ids
61
+ return res
62
+
63
+ def merge_input_data(self, inp: "InputData"):
64
+ """merge another InputData object with slot and intent
65
+
66
+ Args:
67
+ inp (InputData): another InputData object
68
+ """
69
+ self.slot += inp.slot
70
+ self.intent += inp.intent
71
+
72
+ def get_slot_mask(self, ignore_index:int)->Tensor:
73
+ """get slot mask
74
+
75
+ Args:
76
+ ignore_index (int): ignore index used in slot padding
77
+
78
+ Returns:
79
+ Tensor: mask tensor
80
+ """
81
+ mask = self.slot != ignore_index
82
+ mask[:, 0] = torch.ones_like(mask[:, 0]).to(self.slot.device)
83
+ return mask
84
+
85
+ def get_item(self, index, tokenizer=None, intent_map=None, slot_map=None, ignore_index = -100):
86
+ res = {"input_ids": self.input_ids[index]}
87
+ if tokenizer is not None:
88
+ res["tokens"] = [tokenizer.decode(x) for x in self.input_ids[index]]
89
+ if intent_map is not None:
90
+ intents = self.intent.tolist()
91
+ if isinstance(intents[index], list):
92
+ res["intent"] = [intent_map[int(x)] for x in intents[index]]
93
+ else:
94
+ res["intent"] = intent_map[intents[index]]
95
+ if slot_map is not None:
96
+ res["slot"] = [slot_map[x] if x != ignore_index else "#" for x in self.slot.tolist()[index]]
97
+ return res
98
+
99
+ class OutputData():
100
+ """output data class
101
+ """
102
+ def __init__(self, intent_ids=None, slot_ids=None):
103
+ """init output data class
104
+
105
+ if intent_ids is None and slot_ids is None:
106
+ this class can be used to save all OutputData in the history by 'merge_output_data(X:OutputData)'
107
+ else:
108
+ this class can be used to model output management.
109
+
110
+ Args:
111
+ intent_ids (Any, optional): list(Tensor) of intent ids / logits / strings. Defaults to None.
112
+ slot_ids (Any, optional): list(Tensor) of slot ids / ids / strings. Defaults to None.
113
+ """
114
+ if intent_ids is None and slot_ids is None:
115
+ self.intent_ids = []
116
+ self.slot_ids = []
117
+ else:
118
+ if isinstance(intent_ids, ClassifierOutputData):
119
+ self.intent_ids = intent_ids.classifier_output
120
+ else:
121
+ self.intent_ids = intent_ids
122
+ if isinstance(slot_ids, ClassifierOutputData):
123
+ self.slot_ids = slot_ids.classifier_output
124
+ else:
125
+ self.slot_ids = slot_ids
126
+
127
+ def map_output(self, slot_map=None, intent_map=None):
128
+ """ map intent or slot ids to intent or slot string.
129
+
130
+ Args:
131
+ slot_map (dict, optional): slot id-to-string map. Defaults to None.
132
+ intent_map (dict, optional): intent id-to-string map. Defaults to None.
133
+ """
134
+ if self.slot_ids is not None:
135
+ if slot_map:
136
+ self.slot_ids = [[slot_map[x] if x >= 0 else "#" for x in sid] for sid in self.slot_ids]
137
+ if self.intent_ids is not None:
138
+ if intent_map:
139
+ self.intent_ids = [[intent_map[x] for x in sid] if isinstance(sid, list) else intent_map[sid] for sid in
140
+ self.intent_ids]
141
+
142
+ def merge_output_data(self, output:"OutputData"):
143
+ """merge another OutData object with slot and intent
144
+
145
+ Args:
146
+ output (OutputData): another OutputData object
147
+ """
148
+ if output.slot_ids is not None:
149
+ self.slot_ids += output.slot_ids
150
+ if output.intent_ids is not None:
151
+ self.intent_ids += output.intent_ids
152
+
153
+ def save(self, path:str, original_dataset=None):
154
+ """ save all OutputData in the history
155
+
156
+ Args:
157
+ path (str): save dir path
158
+ original_dataset(Iterable): original dataset
159
+ """
160
+ # with open(f"{path}/intent.jsonl", "w") as f:
161
+ # for x in self.intent_ids:
162
+ # f.write(json.dumps(x) + "\n")
163
+ with open(f"{path}/outputs.jsonl", "w") as f:
164
+ if original_dataset is not None:
165
+ for i, s, d in zip(self.intent_ids, self.slot_ids, original_dataset):
166
+ f.write(json.dumps({"pred_intent": i, "pred_slot": s, "text": d["text"], "golden_intent":d["intent"], "golden_slot":d["slot"]}) + "\n")
167
+ else:
168
+ for i, s in zip(self.intent_ids, self.slot_ids):
169
+ f.write(json.dumps({"pred_intent": i, "pred_slot": s}) + "\n")
170
+
171
+
172
+ class HiddenData():
173
+ """Interactive data structure for all model components
174
+ """
175
+ def __init__(self, intent_hidden, slot_hidden):
176
+ """init hidden data structure
177
+
178
+ Args:
179
+ intent_hidden (Any): sentence-level or intent hidden state
180
+ slot_hidden (Any): token-level or slot hidden state
181
+ """
182
+ self.intent_hidden = intent_hidden
183
+ self.slot_hidden = slot_hidden
184
+ self.inputs = None
185
+ self.embedding = None
186
+
187
+ def get_intent_hidden_state(self):
188
+ """get intent hidden state
189
+
190
+ Returns:
191
+ Any: intent hidden state
192
+ """
193
+ return self.intent_hidden
194
+
195
+ def get_slot_hidden_state(self):
196
+ """get slot hidden state
197
+
198
+ Returns:
199
+ Any: slot hidden state
200
+ """
201
+ return self.slot_hidden
202
+
203
+ def update_slot_hidden_state(self, hidden_state):
204
+ """update slot hidden state
205
+
206
+ Args:
207
+ hidden_state (Any): slot hidden state to update
208
+ """
209
+ self.slot_hidden = hidden_state
210
+
211
+ def update_intent_hidden_state(self, hidden_state):
212
+ """update intent hidden state
213
+
214
+ Args:
215
+ hidden_state (Any): intent hidden state to update
216
+ """
217
+ self.intent_hidden = hidden_state
218
+
219
+ def add_input(self, inputs: InputData or "HiddenData"):
220
+ """add last model component input information to next model component
221
+
222
+ Args:
223
+ inputs (InputDataor or HiddenData): last model component input
224
+ """
225
+ self.inputs = inputs
226
+
227
+ def add_embedding(self, embedding):
228
+ self.embedding = embedding
229
+
230
+
231
+ class ClassifierOutputData():
232
+ """Classifier output data structure of all classifier components
233
+ """
234
+ def __init__(self, classifier_output):
235
+ self.classifier_output = classifier_output
236
+ self.output_embedding = None
237
+
238
+ def remove_slot_ignore_index(inputs:InputData, outputs:OutputData, ignore_index=-100):
239
+ """ remove padding or extra token in input id and output id
240
+
241
+ Args:
242
+ inputs (InputData): input data with input id
243
+ outputs (OutputData): output data with decoded output id
244
+ ignore_index (int, optional): ignore_index in input_ids. Defaults to -100.
245
+
246
+ Returns:
247
+ InputData: input data removed padding or extra token
248
+ OutputData: output data removed padding or extra token
249
+ """
250
+ for index, (inp_ss, out_ss) in enumerate(zip(inputs.slot, outputs.slot_ids)):
251
+ temp_inp = []
252
+ temp_out = []
253
+ for inp_s, out_s in zip(list(inp_ss), list(out_ss)):
254
+ if inp_s != ignore_index:
255
+ temp_inp.append(inp_s)
256
+ temp_out.append(out_s)
257
+
258
+ inputs.slot[index] = temp_inp
259
+ outputs.slot_ids[index] = temp_out
260
+ return inputs, outputs
261
+
262
+
263
+ def pack_sequence(inputs:Tensor, seq_len:Tensor or List) -> Tensor:
264
+ """pack sequence data to packed data without padding.
265
+
266
+ Args:
267
+ inputs (Tensor): list(Tensor) of packed sequence inputs
268
+ seq_len (Tensor or List): list(Tensor) of sequence length
269
+
270
+ Returns:
271
+ Tensor: packed inputs
272
+
273
+ Examples:
274
+ inputs = [[x, y, z, PAD, PAD], [x, y, PAD, PAD, PAD]]
275
+
276
+ seq_len = [3,2]
277
+
278
+ return -> [x, y, z, x, y]
279
+ """
280
+ output = []
281
+ for index, batch in enumerate(inputs):
282
+ output.append(batch[:seq_len[index]])
283
+ return torch.cat(output, dim=0)
284
+
285
+
286
+ def unpack_sequence(inputs:Tensor, seq_lens:Tensor or List, padding_value=0) -> Tensor:
287
+ """unpack sequence data.
288
+
289
+ Args:
290
+ inputs (Tensor): list(Tensor) of packed sequence inputs
291
+ seq_lens (Tensor or List): list(Tensor) of sequence length
292
+ padding_value (int, optional): padding value. Defaults to 0.
293
+
294
+ Returns:
295
+ Tensor: unpacked inputs
296
+
297
+ Examples:
298
+ inputs = [x, y, z, x, y]
299
+
300
+ seq_len = [3,2]
301
+
302
+ return -> [[x, y, z, PAD, PAD], [x, y, PAD, PAD, PAD]]
303
+ """
304
+ last_idx = 0
305
+ output = []
306
+ for _, seq_len in enumerate(seq_lens):
307
+ output.append(inputs[last_idx:last_idx + seq_len])
308
+ last_idx = last_idx + seq_len
309
+ return pad_sequence(output, batch_first=True, padding_value=padding_value)
310
+
311
+
312
+ def get_dict_with_key_prefix(input_dict: dict, prefix=""):
313
+ res = {}
314
+ for t in input_dict:
315
+ res[t + prefix] = input_dict[t]
316
+ return res
317
+
318
+
319
+ def download(url: str, fname: str):
320
+ """download file from url to fname
321
+
322
+ Args:
323
+ url (str): remote server url path
324
+ fname (str): local path to save
325
+ """
326
+ resp = requests.get(url, stream=True)
327
+ total = int(resp.headers.get('content-length', 0))
328
+ with open(fname, 'wb') as file, tqdm(
329
+ desc=fname,
330
+ total=total,
331
+ unit='iB',
332
+ unit_scale=True,
333
+ unit_divisor=1024,
334
+ ) as bar:
335
+ for data in resp.iter_content(chunk_size=1024):
336
+ size = file.write(data)
337
+ bar.update(size)
338
+
339
+
340
+ def tar_gz_data(file_name:str):
341
+ """use "tar.gz" format to compress data
342
+
343
+ Args:
344
+ file_name (str): file path to tar
345
+ """
346
+ t = tarfile.open(f"{file_name}.tar.gz", "w:gz")
347
+
348
+ for root, dir, files in os.walk(f"{file_name}"):
349
+ print(root, dir, files)
350
+ for file in files:
351
+ fullpath = os.path.join(root, file)
352
+ t.add(fullpath)
353
+ t.close()
354
+
355
+
356
+ def untar(fname:str, dirs:str):
357
+ """ uncompress "tar.gz" file
358
+
359
+ Args:
360
+ fname (str): file path to untar
361
+ dirs (str): target dir path
362
+ """
363
+ t = tarfile.open(fname)
364
+ t.extractall(path=dirs)
365
+
366
+
367
+ def unzip_file(zip_src:str, dst_dir:str):
368
+ """ uncompress "zip" file
369
+
370
+ Args:
371
+ fname (str): file path to unzip
372
+ dirs (str): target dir path
373
+ """
374
+ r = zipfile.is_zipfile(zip_src)
375
+ if r:
376
+ if not os.path.exists(dst_dir):
377
+ os.mkdir(dst_dir)
378
+ fz = zipfile.ZipFile(zip_src, 'r')
379
+ for file in fz.namelist():
380
+ fz.extract(file, dst_dir)
381
+ else:
382
+ print('This is not zip')
383
+
384
+
385
+ def find_callable(target: str) -> Callable:
386
+ """ find callable function / class to instantiate
387
+
388
+ Args:
389
+ target (str): class/module path
390
+
391
+ Raises:
392
+ e: can not import module
393
+
394
+ Returns:
395
+ Callable: return function / class
396
+ """
397
+ target_module_path, target_callable_path = target.rsplit(".", 1)
398
+ target_callable_paths = [target_callable_path]
399
+
400
+ target_module = None
401
+ while len(target_module_path):
402
+ try:
403
+ target_module = importlib.import_module(target_module_path)
404
+ break
405
+ except Exception as e:
406
+ raise e
407
+ target_callable = target_module
408
+ for attr in reversed(target_callable_paths):
409
+ target_callable = getattr(target_callable, attr)
410
+
411
+ return target_callable
412
+
413
+
414
+ def instantiate(config, target="_model_target_", partial="_model_partial_"):
415
+ """ instantiate object by config.
416
+
417
+ Modified from https://github.com/HIT-SCIR/ltp/blob/main/python/core/ltp_core/models/utils/instantiate.py.
418
+
419
+ Args:
420
+ config (Any): configuration
421
+ target (str, optional): key to assign the class to be instantiated. Defaults to "_model_target_".
422
+ partial (str, optional): key to judge object whether should be instantiated partially. Defaults to "_model_partial_".
423
+
424
+ Returns:
425
+ Any: instantiated object
426
+ """
427
+ if isinstance(config, dict) and target in config:
428
+ target_path = config.get(target)
429
+ target_callable = find_callable(target_path)
430
+
431
+ is_partial = config.get(partial, False)
432
+ target_args = {
433
+ key: instantiate(value)
434
+ for key, value in config.items()
435
+ if key not in [target, partial]
436
+ }
437
+
438
+ if is_partial:
439
+ return functools.partial(target_callable, **target_args)
440
+ else:
441
+ return target_callable(**target_args)
442
+ elif isinstance(config, dict):
443
+ return {key: instantiate(value) for key, value in config.items()}
444
+ else:
445
+ return config
446
+
447
+
448
+ def load_yaml(file):
449
+ """ load data from yaml files.
450
+
451
+ Args:
452
+ file (str): yaml file path.
453
+
454
+ Returns:
455
+ Any: data
456
+ """
457
+ with open(file, encoding="utf-8") as stream:
458
+ try:
459
+ return yaml.safe_load(stream)
460
+ except yaml.YAMLError as exc:
461
+ raise exc
462
+
463
+ def from_configured(configure_name_or_file:str, model_class:Callable, config_prefix="./config/", **input_config):
464
+ """load module from pre-configured data
465
+
466
+ Args:
467
+ configure_name_or_file (str): config path -> {config_prefix}/{configure_name_or_file}.yaml
468
+ model_class (Callable): module class
469
+ config_prefix (str, optional): configuration root path. Defaults to "./config/".
470
+
471
+ Returns:
472
+ Any: instantiated object.
473
+ """
474
+ if os.path.exists(configure_name_or_file):
475
+ configure_file=configure_name_or_file
476
+ else:
477
+ configure_file= os.path.join(config_prefix, configure_name_or_file+".yaml")
478
+ config = load_yaml(configure_file)
479
+ config.update(input_config)
480
+ return model_class(**config)
481
+
482
+ def save_json(file_path, obj):
483
+ with open(file_path, 'w', encoding="utf8") as fw:
484
+ fw.write(json.dumps(obj))
485
+
486
+ def load_json(file_path):
487
+ with open(file_path, 'r', encoding="utf8") as fw:
488
+ res =json.load(fw)
489
+ return res
490
+
491
+ def str2bool(v):
492
+ if isinstance(v, bool):
493
+ return v
494
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
495
+ return True
496
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
497
+ return False
498
+ else:
499
+ raise argparse.ArgumentTypeError('Boolean value expected.')
config/README.md ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuation
2
+
3
+ ## 1. Introduction
4
+
5
+ Configuration is divided into fine-grained reusable modules:
6
+
7
+ - `base`: basic configuration
8
+ - `logger`: logger setting
9
+ - `model_manager`: loading and saving model parameters
10
+ - `accelerator`: whether to enable multi-GPU
11
+ - `dataset`: dataset management
12
+ - `evaluator`: evaluation and metrics setting.
13
+ - `tokenizer`: Tokenizer initiation and tokenizing setting.
14
+ - `optimizer`: Optimizer initiation setting.
15
+ - `scheduler`: scheduler initiation setting.
16
+ - `model`: model construction setting.
17
+
18
+ From Sec. 2 to Sec. 11, we will describe the configuration in detail. Or you can see [Examples](examples/README.md) for Quick Start.
19
+
20
+ NOTE: `_*_` config are reserved fields in OpenSLU.
21
+
22
+ ## Configuration Item Script
23
+ In OpenSLU configuration, we support simple calculation script for each configuration item. For example, we can get `dataset_name` by using `{dataset.dataset_name}`, and fill its value into python script `'LightChen2333/agif-slu-' + '*'`.(Without '', `{dataset.dataset_name}` value will be treated as a variable).
24
+
25
+ NOTE: each item with `{}` will be treated as python script.
26
+ ```yaml
27
+ tokenizer:
28
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
29
+
30
+ ```
31
+
32
+ ## `base` Config
33
+ ```yaml
34
+ # `start_time` will generated automatically when start any config script, needless to be assigned.
35
+ # start_time: xxxxxxxx
36
+ base:
37
+ name: "OpenSLU" # project/logger name
38
+ multi_intent: false # whether to enable multi-intent setting
39
+ train: True # enable train else enable zero-shot
40
+ test: True # enable test during train.
41
+ device: cuda # device for cuda/cpu
42
+ seed: 42 # random seed
43
+ best_key: EMA # save model by which metric[intent_acc/slot_f1/EMA]
44
+ tokenizer_name: word_tokenizer # tokenizer: word_tokenizer for no pretrained model, else use [AutoTokenizer] tokenizer name
45
+ add_special_tokens: false # whether add [CLS], [SEP] special tokens
46
+ epoch_num: 300 # train epoch num
47
+ # eval_step: 280 # if eval_by_epoch = false and eval_step > 0, will evaluate model by steps
48
+ eval_by_epoch: true # evaluate model by epoch
49
+ batch_size: 16 # batch size
50
+ ```
51
+ ## `logger` Config
52
+ ```yaml
53
+ logger:
54
+ # `wandb` is supported both in single- multi-GPU,
55
+ # `tensorboard` is only supported in multi-GPU,
56
+ # and `fitlog` is only supported in single-GPU
57
+ logger_type: wandb
58
+ ```
59
+ ## `model_manager` Config
60
+ ```yaml
61
+ model_manager:
62
+ # if load_dir != `null`, OpenSLU will try to load checkpoint to continue training,
63
+ # if load_dir == `null`, OpenSLU will restart training.
64
+ load_dir: null
65
+ # The dir path to save model and training state.
66
+ # if save_dir == `null` model will be saved to `save/{start_time}`
67
+ save_dir: save/stack
68
+ # save_mode can be selected in [save-by-step, save-by-eval]
69
+ # `save-by-step` means save model only by {save_step} steps without evaluation.
70
+ # `save-by-eval` means save model by best validation performance
71
+ save_mode: save-by-eval
72
+ # save_step: 100 # only enabled when save_mode == `save-by-step`
73
+ max_save_num: 1 # The number of best models will be saved.
74
+ ```
75
+ ## `accelerator` Config
76
+ ```yaml
77
+ accelerator:
78
+ use_accelerator: false # will enable `accelerator` if use_accelerator is `true`
79
+ ```
80
+ ## `dataset` Config
81
+ ```yaml
82
+ dataset:
83
+ # support load model from hugging-face.
84
+ # dataset_name can be selected in [atis, snips, mix-atis, mix-snips]
85
+ dataset_name: atis
86
+ # support assign any one of dataset path and other dataset split is the same as split in `dataset_name`
87
+ # train: atis # support load model from hugging-face or assigned local data path.
88
+ # validation: {root}/ATIS/dev.jsonl
89
+ # test: {root}/ATIS/test.jsonl
90
+ ```
91
+ ## `evaluator` Config
92
+ ```yaml
93
+ evaluator:
94
+ best_key: EMA # the metric to judge the best model
95
+ eval_by_epoch: true # Evaluate after an epoch if `true`.
96
+ # Evaluate after {eval_step} steps if eval_by_epoch == `false`.
97
+ # eval_step: 1800
98
+ # metric is supported the metric as below:
99
+ # - intent_acc
100
+ # - slot_f1
101
+ # - EMA
102
+ # - intent_f1
103
+ # - macro_intent_f1
104
+ # - micro_intent_f1
105
+ # NOTE: [intent_f1, macro_intent_f1, micro_intent_f1] is only supported in multi-intent setting. intent_f1 and macro_intent_f1 is the same metric.
106
+ metric:
107
+ - intent_acc
108
+ - slot_f1
109
+ - EMA
110
+ ```
111
+ ## `tokenizer` Config
112
+ ```yaml
113
+ tokenizer:
114
+ # Init tokenizer. Support `word_tokenizer` and other tokenizers in huggingface.
115
+ _tokenizer_name_: word_tokenizer
116
+ # if `_tokenizer_name_` is not assigned, you can load pretrained tokenizer from hugging-face.
117
+ # _from_pretrained_: LightChen2333/stack-propagation-slu-atis
118
+ _padding_side_: right # the padding side of tokenizer, support [left/ right]
119
+ # Align mode between text and slot, support [fast/ general],
120
+ # `general` is supported in most tokenizer, `fast` is supported only in small portion of tokenizers.
121
+ _align_mode_: fast
122
+ _to_lower_case_: true
123
+ add_special_tokens: false # other tokenizer args, you can add other args to tokenizer initialization except `_*_` format args
124
+ max_length: 512
125
+
126
+ ```
127
+ ## `optimizer` Config
128
+ ```yaml
129
+ optimizer:
130
+ _model_target_: torch.optim.Adam # Optimizer class/ function return Optimizer object
131
+ _model_partial_: true # partial load configuration. Here will add model.parameters() to complete all Optimizer parameters
132
+ lr: 0.001 # learning rate
133
+ weight_decay: 1e-6 # weight decay
134
+ ```
135
+ ## `scheduler` Config
136
+ ```yaml
137
+ scheduler:
138
+ _model_target_: transformers.get_scheduler
139
+ _model_partial_: true # partial load configuration. Here will add optimizer, num_training_steps to complete all Optimizer parameters
140
+ name : "linear"
141
+ num_warmup_steps: 0
142
+ ```
143
+ ## `model` Config
144
+ ```yaml
145
+ model:
146
+ # _from_pretrained_: LightChen2333/stack-propagation-slu-atis # load model from hugging-face and is not need to assigned any parameters below.
147
+ _model_target_: model.OpenSLUModel # the general model class, can automatically build the model through configuration.
148
+
149
+ encoder:
150
+ _model_target_: model.encoder.AutoEncoder # auto-encoder to autoload provided encoder model
151
+ encoder_name: self-attention-lstm # support [lstm/ self-attention-lstm] and other pretrained models those hugging-face supported
152
+
153
+ embedding: # word embedding layer
154
+ # load_embedding_name: glove.6B.300d.txt # support autoload glove embedding.
155
+ embedding_dim: 256 # embedding dim
156
+ dropout_rate: 0.5 # dropout ratio after embedding
157
+
158
+ lstm:
159
+ layer_num: 1 # lstm configuration
160
+ bidirectional: true
161
+ output_dim: 256 # module should set output_dim for autoload input_dim in next module. You can also set input_dim manually.
162
+ dropout_rate: 0.5
163
+
164
+ attention: # self-attention configuration
165
+ hidden_dim: 1024
166
+ output_dim: 128
167
+ dropout_rate: 0.5
168
+
169
+ return_with_input: true # add inputs information, like attention_mask, to decoder module.
170
+ return_sentence_level_hidden: false # if return sentence representation to decoder module
171
+
172
+ decoder:
173
+ _model_target_: model.decoder.StackPropagationDecoder # decoder name
174
+ interaction:
175
+ _model_target_: model.decoder.interaction.StackInteraction # interaction module name
176
+ differentiable: false # interaction module config
177
+
178
+ intent_classifier:
179
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier # intent classifier module name
180
+ layer_num: 1
181
+ bidirectional: false
182
+ hidden_dim: 64
183
+ force_ratio: 0.9 # teacher-force ratio
184
+ embedding_dim: 8 # intent embedding dim
185
+ ignore_index: -100 # ignore index to compute loss and metric
186
+ dropout_rate: 0.5
187
+ mode: "token-level-intent" # decode mode, support [token-level-intent, intent, slot]
188
+ use_multi: "{base.multi_intent}"
189
+ return_sentence_level: true # whether to return sentence level prediction as decoded input
190
+
191
+ slot_classifier:
192
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
193
+ layer_num: 1
194
+ bidirectional: false
195
+ force_ratio: 0.9
196
+ hidden_dim: 64
197
+ embedding_dim: 32
198
+ ignore_index: -100
199
+ dropout_rate: 0.5
200
+ mode: "slot"
201
+ use_multi: false
202
+ return_sentence_level: false
203
+ ```
204
+
205
+ ## Implementing a New Model
206
+
207
+ ### 1. Interaction Re-Implement
208
+ Here we take `DCA-Net` as an example:
209
+
210
+ In most cases, you just need to rewrite `Interaction` module:
211
+
212
+ ```python
213
+ from common.utils import HiddenData
214
+ from model.decoder.interaction import BaseInteraction
215
+ class DCANetInteraction(BaseInteraction):
216
+ def __init__(self, **config):
217
+ super().__init__(**config)
218
+ self.T_block1 = I_S_Block(self.config["output_dim"], self.config["attention_dropout"], self.config["num_attention_heads"])
219
+ ...
220
+
221
+ def forward(self, encode_hidden: HiddenData, **kwargs):
222
+ ...
223
+ ```
224
+
225
+ and then you should configure your module:
226
+ ```yaml
227
+ base:
228
+ ...
229
+
230
+ optimizer:
231
+ ...
232
+
233
+ scheduler:
234
+ ...
235
+
236
+ model:
237
+ _model_target_: model.OpenSLUModel
238
+ encoder:
239
+ _model_target_: model.encoder.AutoEncoder
240
+ encoder_name: lstm
241
+
242
+ embedding:
243
+ load_embedding_name: glove.6B.300d.txt
244
+ embedding_dim: 300
245
+ dropout_rate: 0.5
246
+
247
+ lstm:
248
+ dropout_rate: 0.5
249
+ output_dim: 128
250
+ layer_num: 2
251
+ bidirectional: true
252
+ output_dim: "{model.encoder.lstm.output_dim}"
253
+ return_with_input: true
254
+ return_sentence_level_hidden: false
255
+
256
+ decoder:
257
+ _model_target_: model.decoder.DCANetDecoder
258
+ interaction:
259
+ _model_target_: model.decoder.interaction.DCANetInteraction
260
+ output_dim: "{model.encoder.output_dim}"
261
+ attention_dropout: 0.5
262
+ num_attention_heads: 8
263
+
264
+ intent_classifier:
265
+ _model_target_: model.decoder.classifier.LinearClassifier
266
+ mode: "intent"
267
+ input_dim: "{model.decoder.output_dim.output_dim}"
268
+ ignore_index: -100
269
+
270
+ slot_classifier:
271
+ _model_target_: model.decoder.classifier.LinearClassifier
272
+ mode: "slot"
273
+ input_dim: "{model.decoder.output_dim.output_dim}"
274
+ ignore_index: -100
275
+ ```
276
+
277
+ Oops, you finish all model construction. You can run script as follows to train model:
278
+ ```shell
279
+ python run.py -cp config/dca_net.yaml [-ds atis]
280
+ ```
281
+ ### 2. Decoder Re-Implement
282
+ Sometimes, `interaction then classification` order can not meet your needs. Therefore, you should simply rewrite decoder for flexible interaction order:
283
+
284
+ Here, we take `stack-propagation` as an example:
285
+ 1. We should rewrite interaction module for `stack-propagation`
286
+ ```python
287
+ from common.utils import ClassifierOutputData, HiddenData
288
+ from model.decoder.interaction.base_interaction import BaseInteraction
289
+ class StackInteraction(BaseInteraction):
290
+ def __init__(self, **config):
291
+ super().__init__(**config)
292
+ ...
293
+
294
+ def forward(self, intent_output: ClassifierOutputData, encode_hidden: HiddenData):
295
+ ...
296
+ ```
297
+ 2. We should rewrite `StackPropagationDecoder` for stack-propagation interaction order:
298
+ ```python
299
+ from common.utils import HiddenData, OutputData
300
+ class StackPropagationDecoder(BaseDecoder):
301
+
302
+ def forward(self, hidden: HiddenData):
303
+ pred_intent = self.intent_classifier(hidden)
304
+ hidden = self.interaction(pred_intent, hidden)
305
+ pred_slot = self.slot_classifier(hidden)
306
+ return OutputData(pred_intent, pred_slot)
307
+ ```
308
+
309
+ 3. Then we can easily combine general model by `config/stack-propagation.yaml` configuration file:
310
+ ```yaml
311
+ base:
312
+ ...
313
+
314
+ ...
315
+
316
+ model:
317
+ _model_target_: model.OpenSLUModel
318
+
319
+ encoder:
320
+ ...
321
+
322
+ decoder:
323
+ _model_target_: model.decoder.StackPropagationDecoder
324
+ interaction:
325
+ _model_target_: model.decoder.interaction.StackInteraction
326
+ differentiable: false
327
+
328
+ intent_classifier:
329
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
330
+ ... # parameters needed __init__(*)
331
+ mode: "token-level-intent"
332
+ use_multi: false
333
+ return_sentence_level: true
334
+
335
+ slot_classifier:
336
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
337
+ ... # parameters needed __init__(*)
338
+ mode: "slot"
339
+ use_multi: false
340
+ return_sentence_level: false
341
+ ```
342
+ 4. You can run script as follows to train model:
343
+ ```shell
344
+ python run.py -cp config/stack-propagation.yaml
345
+ ```
346
+
347
+
348
+
config/app.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ host: 127.0.0.1
2
+ port: 7860
3
+
4
+ is_push_to_public: false
5
+ save-path: save/stack/outputs.jsonl
6
+ page-size: 2
config/decoder/interaction/stack-propagation.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ differentiable: false
config/examples/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Examples
2
+
3
+ Here we introduce some usage of our famework by configuration.
4
+
5
+ ## Reload to train
6
+
7
+ Firstly, you can run this script to train a `joint-bert` model:
8
+ ```shell
9
+ python run.py -cp config/examples/normal.yaml
10
+ ```
11
+
12
+ and you can use `kill` or `Ctrl+C` to kill the training process.
13
+
14
+ Then, to reload model and continue training, you can run `reload_to_train.yaml` to reload checkpoint and training state.
15
+ ```shell
16
+ python run.py -cp config/examples/reload_to_train.yaml
17
+ ```
18
+
19
+ The main difference in `reload_to_train.yaml` is the `model_manager` configuration item:
20
+ ```yaml
21
+ ...
22
+ model_manager:
23
+ load_train_state: True # set to True
24
+ load_dir: save/joint_bert # not null
25
+ ...
26
+ ...
27
+ ```
28
+
29
+ ## Load from Pre-finetuned model.
30
+ We upload all models to [LightChen2333](https://huggingface.co/LightChen2333). You can load those model by simple configuration.
31
+ In `from_pretrained.yaml` and `from_pretrained_multi.yaml`, we show two example scripts to load from hugging face in single- and multi-intent, respectively. The key configuration items are as below:
32
+ ```yaml
33
+ tokenizer:
34
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
35
+
36
+ model:
37
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
38
+ ```
config/examples/from_pretrained.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: false
6
+ test: true
7
+ device: cpu
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ logger:
13
+ logger_type: local # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
14
+
15
+ model_manager:
16
+ load_dir: null
17
+ save_dir: save/joint_bert
18
+ save_mode: save-by-eval # save-by-step
19
+ # save_step: 100
20
+ max_save_num: 1
21
+
22
+ accelerator:
23
+ use_accelerator: false
24
+
25
+ dataset:
26
+ dataset_name: atis
27
+
28
+ evaluator:
29
+ best_key: EMA
30
+ eval_by_epoch: true
31
+ # eval_step: 1800
32
+ metric:
33
+ - intent_acc
34
+ - slot_f1
35
+ - EMA
36
+
37
+ tokenizer:
38
+ _from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
39
+
40
+ optimizer:
41
+ _model_target_: torch.optim.Adam
42
+ _model_partial_: true
43
+ lr: 0.001
44
+ weight_decay: 1e-6
45
+
46
+ scheduler:
47
+ _model_target_: transformers.get_scheduler
48
+ _model_partial_: true
49
+ name : "linear"
50
+ num_warmup_steps: 0
51
+
52
+ model:
53
+ _from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
config/examples/from_pretrained_multi.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: false
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 300
11
+ batch_size: 16
12
+
13
+
14
+ logger:
15
+ logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
16
+
17
+ model_manager:
18
+ load_dir: null
19
+ save_dir: save/joint_bert
20
+ save_mode: save-by-eval # save-by-step
21
+ # save_step: 100
22
+ max_save_num: 1
23
+
24
+ accelerator:
25
+ use_accelerator: false
26
+
27
+ dataset:
28
+ dataset_name: atis
29
+
30
+ evaluator:
31
+ best_key: EMA
32
+ eval_by_epoch: true
33
+ # eval_step: 1800
34
+ metric:
35
+ - intent_acc
36
+ - slot_f1
37
+ - EMA
38
+
39
+ tokenizer:
40
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
41
+
42
+ optimizer:
43
+ _model_target_: torch.optim.Adam
44
+ _model_partial_: true
45
+ lr: 0.001
46
+ weight_decay: 1e-6
47
+
48
+ scheduler:
49
+ _model_target_: transformers.get_scheduler
50
+ _model_partial_: true
51
+ name : "linear"
52
+ num_warmup_steps: 0
53
+
54
+ model:
55
+ _from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
config/examples/normal.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLU-test"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/joint_bert
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: bert-base-uncased
33
+ _padding_side_: right
34
+ _align_mode_: general
35
+ add_special_tokens: true
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 4e-6
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: bert-base-uncased
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/examples/reload_to_train.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLU-test"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_train_state: True
14
+ load_dir: save/joint_bert
15
+ save_dir: save/joint_bert
16
+
17
+ evaluator:
18
+ best_key: EMA
19
+ eval_by_epoch: true
20
+ # eval_step: 1800
21
+ metric:
22
+ - intent_acc
23
+ - slot_f1
24
+ - EMA
25
+
26
+ accelerator:
27
+ use_accelerator: false
28
+
29
+ dataset:
30
+ dataset_name: atis
31
+
32
+ tokenizer:
33
+ _tokenizer_name_: bert-base-uncased
34
+ _padding_side_: right
35
+ _align_mode_: general
36
+ add_special_tokens: true
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.AdamW
40
+ _model_partial_: true
41
+ lr: 4e-6
42
+ weight_decay: 1e-8
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.open_slu_model.OpenSLUModel
52
+ ignore_index: -100
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: bert-base-uncased
56
+ output_dim: 768
57
+ return_with_input: true
58
+ return_sentence_level_hidden: true
59
+
60
+ decoder:
61
+ _model_target_: model.decoder.base_decoder.BaseDecoder
62
+ intent_classifier:
63
+ _model_target_: model.decoder.classifier.LinearClassifier
64
+ mode: "intent"
65
+ ignore_index: -100
66
+
67
+
68
+ slot_classifier:
69
+ _model_target_: model.decoder.classifier.LinearClassifier
70
+ mode: "slot"
71
+ ignore_index: -100
config/reproduction/atis/bi-model.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/bi-model-atis
15
+
16
+ accelerator:
17
+ use_accelerator: false
18
+
19
+ dataset:
20
+ dataset_name: atis
21
+
22
+ evaluator:
23
+ best_key: EMA
24
+ eval_by_epoch: true
25
+ # eval_step: 1800
26
+ metric:
27
+ - intent_acc
28
+ - slot_f1
29
+ - EMA
30
+
31
+
32
+ tokenizer:
33
+ _tokenizer_name_: word_tokenizer
34
+ _padding_side_: right
35
+ _align_mode_: fast
36
+ add_special_tokens: false
37
+ max_length: 512
38
+
39
+ optimizer:
40
+ _model_target_: torch.optim.Adam
41
+ _model_partial_: true
42
+ lr: 0.001
43
+ weight_decay: 1e-6
44
+
45
+ scheduler:
46
+ _model_target_: transformers.get_scheduler
47
+ _model_partial_: true
48
+ name : "linear"
49
+ num_warmup_steps: 0
50
+
51
+ model:
52
+ _model_target_: model.OpenSLUModel
53
+
54
+ encoder:
55
+ _model_target_: model.encoder.BiEncoder
56
+ intent_encoder:
57
+ _model_target_: model.encoder.AutoEncoder
58
+ encoder_name: lstm
59
+
60
+ embedding:
61
+ embedding_dim: 256
62
+ dropout_rate: 0.4
63
+
64
+ lstm:
65
+ dropout_rate: 0.5
66
+ output_dim: 256
67
+ layer_num: 2
68
+ bidirectional: true
69
+
70
+ return_with_input: true
71
+ return_sentence_level_hidden: false
72
+
73
+ slot_encoder:
74
+ _model_target_: model.encoder.AutoEncoder
75
+ encoder_name: lstm
76
+
77
+ embedding:
78
+ embedding_dim: 256
79
+ dropout_rate: 0.4
80
+
81
+ lstm:
82
+ dropout_rate: 0.5
83
+ output_dim: 256
84
+ layer_num: 2
85
+ bidirectional: true
86
+
87
+ return_with_input: true
88
+ return_sentence_level_hidden: false
89
+
90
+ decoder:
91
+ _model_target_: model.decoder.BaseDecoder
92
+ # teacher_forcing: true
93
+ interaction:
94
+ _model_target_: model.decoder.interaction.BiModelInteraction
95
+ output_dim: 256
96
+ dropout_rate: 0.4
97
+
98
+ intent_classifier:
99
+ _model_target_: model.decoder.classifier.LinearClassifier
100
+ mode: "intent"
101
+ ignore_index: -100
102
+
103
+ slot_classifier:
104
+ _model_target_: model.decoder.classifier.LinearClassifier
105
+ mode: "slot"
106
+ ignore_index: -100
config/reproduction/atis/dca-net.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla P100-PCIE-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/dca-net-atis
15
+
16
+ accelerator:
17
+ use_accelerator: false
18
+
19
+ dataset:
20
+ dataset_name: atis
21
+
22
+ evaluator:
23
+ best_key: EMA
24
+ eval_by_epoch: true
25
+ # eval_step: 1800
26
+ metric:
27
+ - intent_acc
28
+ - slot_f1
29
+ - EMA
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: lstm
55
+
56
+ embedding:
57
+ load_embedding_name: glove.6B.300d.txt
58
+ embedding_dim: 300
59
+ dropout_rate: 0.5
60
+
61
+ lstm:
62
+ dropout_rate: 0.5
63
+ output_dim: 128
64
+ layer_num: 2
65
+ bidirectional: true
66
+ output_dim: "{model.encoder.lstm.output_dim}"
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.DCANetDecoder
72
+ interaction:
73
+ _model_target_: model.decoder.interaction.DCANetInteraction
74
+ output_dim: "{model.encoder.output_dim}"
75
+ attention_dropout: 0.5
76
+ num_attention_heads: 8
77
+
78
+ intent_classifier:
79
+ _model_target_: model.decoder.classifier.LinearClassifier
80
+ mode: "intent"
81
+ input_dim: "{model.encoder.output_dim}"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ input_dim: "{model.encoder.output_dim}"
88
+ ignore_index: -100
config/reproduction/atis/deberta.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/deberta-atis
15
+
16
+ dataset:
17
+ dataset_name: atis
18
+
19
+ evaluator:
20
+ best_key: EMA
21
+ eval_by_epoch: true
22
+ # eval_step: 1800
23
+ metric:
24
+ - intent_acc
25
+ - slot_f1
26
+ - EMA
27
+
28
+ tokenizer:
29
+ _tokenizer_name_: microsoft/deberta-v3-base
30
+ _padding_side_: right
31
+ add_special_tokens: true
32
+ max_length: 512
33
+
34
+ optimizer:
35
+ _model_target_: torch.optim.AdamW
36
+ _model_partial_: true
37
+ lr: 2e-5
38
+ weight_decay: 1e-8
39
+
40
+ scheduler:
41
+ _model_target_: transformers.get_scheduler
42
+ _model_partial_: true
43
+ name : "linear"
44
+ num_warmup_steps: 0
45
+
46
+ model:
47
+ _model_target_: model.open_slu_model.OpenSLUModel
48
+ ignore_index: -100
49
+ encoder:
50
+ _model_target_: model.encoder.AutoEncoder
51
+ encoder_name: microsoft/deberta-v3-base
52
+ output_dim: 768
53
+ return_with_input: true
54
+ return_sentence_level_hidden: true
55
+
56
+ decoder:
57
+ _model_target_: model.decoder.base_decoder.BaseDecoder
58
+ intent_classifier:
59
+ _model_target_: model.decoder.classifier.LinearClassifier
60
+ mode: "intent"
61
+ ignore_index: -100
62
+
63
+
64
+ slot_classifier:
65
+ _model_target_: model.decoder.classifier.LinearClassifier
66
+ mode: "slot"
67
+ ignore_index: -100
config/reproduction/atis/electra.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/electra-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ dataset:
26
+ dataset_name: atis
27
+
28
+ tokenizer:
29
+ _tokenizer_name_: google/electra-small-discriminator
30
+ _padding_side_: right
31
+ add_special_tokens: true
32
+ max_length: 512
33
+
34
+ optimizer:
35
+ _model_target_: torch.optim.AdamW
36
+ _model_partial_: true
37
+ lr: 2e-5
38
+ weight_decay: 1e-8
39
+
40
+ scheduler:
41
+ _model_target_: transformers.get_scheduler
42
+ _model_partial_: true
43
+ name : "linear"
44
+ num_warmup_steps: 0
45
+
46
+ model:
47
+ _model_target_: model.open_slu_model.OpenSLUModel
48
+ ignore_index: -100
49
+ encoder:
50
+ _model_target_: model.encoder.AutoEncoder
51
+ encoder_name: google/electra-small-discriminator
52
+ output_dim: 256
53
+ return_with_input: true
54
+ return_sentence_level_hidden: true
55
+
56
+ decoder:
57
+ _model_target_: model.decoder.base_decoder.BaseDecoder
58
+ intent_classifier:
59
+ _model_target_: model.decoder.classifier.LinearClassifier
60
+ mode: "intent"
61
+ ignore_index: -100
62
+
63
+
64
+ slot_classifier:
65
+ _model_target_: model.decoder.classifier.LinearClassifier
66
+ mode: "slot"
67
+ ignore_index: -100
config/reproduction/atis/joint-bert.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/joint-bert-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: bert-base-uncased
33
+ _padding_side_: right
34
+ _align_mode_: general
35
+ add_special_tokens: true
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 4e-6
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: bert-base-uncased
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/atis/roberta.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB" #Useless info
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: True
6
+ test: True
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/roberta-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: roberta-base
33
+ _padding_side_: right
34
+ add_special_tokens: true
35
+ max_length: 512
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 2e-5
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: roberta-base
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/atis/slot-gated.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/slot-gated-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ ignore_index: -100
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: lstm
56
+
57
+ embedding:
58
+ embedding_dim: 256
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ dropout_rate: 0.5
63
+ output_dim: 256
64
+ layer_num: 2
65
+ bidirectional: true
66
+
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.BaseDecoder
72
+
73
+ interaction:
74
+ _model_target_: model.decoder.interaction.SlotGatedInteraction
75
+ remove_slot_attn: false
76
+ output_dim: 256
77
+ dropout_rate: 0.4
78
+
79
+ intent_classifier:
80
+ _model_target_: model.decoder.classifier.LinearClassifier
81
+ mode: "intent"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ ignore_index: -100
config/reproduction/atis/stack-propagation.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/stack-propagation-atis
15
+ save_mode: save-by-eval # save-by-step
16
+ # save_step: 100
17
+ max_save_num: 1
18
+
19
+ accelerator:
20
+ use_accelerator: false
21
+
22
+ dataset:
23
+ dataset_name: atis
24
+
25
+ evaluator:
26
+ best_key: EMA
27
+ eval_by_epoch: true
28
+ # eval_step: 1800
29
+ metric:
30
+ - intent_acc
31
+ - slot_f1
32
+ - EMA
33
+
34
+ tokenizer:
35
+ _tokenizer_name_: word_tokenizer
36
+ _padding_side_: right
37
+ _align_mode_: fast
38
+ _to_lower_case_: true
39
+ add_special_tokens: false
40
+ max_length: 512
41
+
42
+ optimizer:
43
+ _model_target_: torch.optim.Adam
44
+ _model_partial_: true
45
+ lr: 0.001
46
+ weight_decay: 1e-6
47
+
48
+ scheduler:
49
+ _model_target_: transformers.get_scheduler
50
+ _model_partial_: true
51
+ name : "linear"
52
+ num_warmup_steps: 0
53
+
54
+ model:
55
+ _model_target_: model.OpenSLUModel
56
+
57
+ encoder:
58
+ _model_target_: model.encoder.AutoEncoder
59
+ encoder_name: self-attention-lstm
60
+
61
+ embedding:
62
+ embedding_dim: 256
63
+ dropout_rate: 0.55
64
+
65
+ lstm:
66
+ layer_num: 1
67
+ bidirectional: true
68
+ output_dim: 256
69
+ dropout_rate: 0.5
70
+
71
+ attention:
72
+ hidden_dim: 1024
73
+ output_dim: 128
74
+ dropout_rate: 0.6
75
+
76
+ return_with_input: true
77
+ return_sentence_level_hidden: false
78
+
79
+ decoder:
80
+ _model_target_: model.decoder.StackPropagationDecoder
81
+ interaction:
82
+ _model_target_: model.decoder.interaction.StackInteraction
83
+ differentiable: false
84
+
85
+ intent_classifier:
86
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
87
+ layer_num: 1
88
+ bidirectional: false
89
+ force_ratio: 0.9
90
+ hidden_dim: 64
91
+ embedding_dim: 8
92
+ ignore_index: -100
93
+ dropout_rate: 0.5
94
+ mode: "token-level-intent"
95
+ use_multi: false
96
+ return_sentence_level: true
97
+
98
+ slot_classifier:
99
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
100
+ layer_num: 1
101
+ bidirectional: false
102
+ force_ratio: 0.9
103
+ hidden_dim: 64
104
+ embedding_dim: 32
105
+ ignore_index: -100
106
+ dropout_rate: 0.55
107
+ mode: "slot"
108
+ use_multi: false
109
+ return_sentence_level: false
config/reproduction/mix-atis/agif.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 3080"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 100
11
+ batch_size: 32
12
+ ignore_index: -100
13
+
14
+ model_manager:
15
+ load_dir: null
16
+ save_dir: save/agif-mix-atis
17
+
18
+ accelerator:
19
+ use_accelerator: false
20
+
21
+ dataset:
22
+ dataset_name: mix-atis
23
+
24
+ evaluator:
25
+ best_key: EMA
26
+ eval_by_epoch: true
27
+ # eval_step: 1800
28
+ metric:
29
+ - intent_acc
30
+ - intent_f1
31
+ - slot_f1
32
+ - EMA
33
+
34
+ tokenizer:
35
+ _tokenizer_name_: word_tokenizer
36
+ _padding_side_: right
37
+ _align_mode_: fast
38
+ add_special_tokens: false
39
+ max_length: 512
40
+
41
+ optimizer:
42
+ _model_target_: torch.optim.Adam
43
+ _model_partial_: true
44
+ lr: 0.001
45
+ weight_decay: 1e-6
46
+
47
+ scheduler:
48
+ _model_target_: transformers.get_scheduler
49
+ _model_partial_: true
50
+ name : "linear"
51
+ num_warmup_steps: 0
52
+
53
+ model:
54
+ _model_target_: model.OpenSLUModel
55
+
56
+ encoder:
57
+ _model_target_: model.encoder.AutoEncoder
58
+ encoder_name: self-attention-lstm
59
+
60
+ embedding:
61
+ embedding_dim: 128
62
+ dropout_rate: 0.4
63
+
64
+ lstm:
65
+ layer_num: 1
66
+ bidirectional: true
67
+ output_dim: 256
68
+ dropout_rate: 0.4
69
+
70
+ attention:
71
+ hidden_dim: 1024
72
+ output_dim: 128
73
+ dropout_rate: 0.4
74
+
75
+ unflat_attention:
76
+ dropout_rate: 0.4
77
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
78
+ return_with_input: true
79
+ return_sentence_level_hidden: true
80
+
81
+ decoder:
82
+ _model_target_: model.decoder.AGIFDecoder
83
+ # teacher_forcing: true
84
+ interaction:
85
+ _model_target_: model.decoder.interaction.AGIFInteraction
86
+ intent_embedding_dim: 128
87
+ input_dim: "{model.encoder.output_dim}"
88
+ hidden_dim: 128
89
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
90
+ dropout_rate: 0.4
91
+ alpha: 0.2
92
+ num_heads: 4
93
+ num_layers: 2
94
+ row_normalized: true
95
+
96
+ intent_classifier:
97
+ _model_target_: model.decoder.classifier.MLPClassifier
98
+ mode: "intent"
99
+ mlp:
100
+ - _model_target_: torch.nn.Linear
101
+ in_features: "{model.encoder.output_dim}"
102
+ out_features: 256
103
+ - _model_target_: torch.nn.LeakyReLU
104
+ negative_slope: 0.2
105
+ - _model_target_: torch.nn.Linear
106
+ in_features: 256
107
+ out_features: "{base.intent_label_num}"
108
+ dropout_rate: 0.4
109
+ loss_fn:
110
+ _model_target_: torch.nn.BCEWithLogitsLoss
111
+ use_multi: "{base.multi_intent}"
112
+ multi_threshold: 0.5
113
+ return_sentence_level: true
114
+ ignore_index: -100
115
+ weight: 0.3
116
+
117
+ slot_classifier:
118
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
119
+ mode: "slot"
120
+ input_dim: "{model.encoder.output_dim}"
121
+ layer_num: 1
122
+ bidirectional: false
123
+ force_ratio: 0.9
124
+ hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
125
+ embedding_dim: 128
126
+ # loss_fn:
127
+ # _model_target_: torch.nn.NLLLoss
128
+ ignore_index: -100
129
+ dropout_rate: 0.4
130
+ use_multi: false
131
+ multi_threshold: 0.5
132
+ return_sentence_level: false
133
+ weight: 0.7
config/reproduction/mix-atis/gl-gin.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 300
11
+ batch_size: 32
12
+ ignore_index: -100
13
+
14
+ model_manager:
15
+ load_dir: null
16
+ save_dir: save/gl-gin-mix-atis
17
+
18
+ evaluator:
19
+ best_key: EMA
20
+ eval_by_epoch: true
21
+ # eval_step: 1800
22
+ metric:
23
+ - intent_acc
24
+ - intent_f1
25
+ - slot_f1
26
+ - EMA
27
+
28
+ dataset:
29
+ dataset_name: mix-atis
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: self-attention-lstm
56
+
57
+ embedding:
58
+ embedding_dim: 128
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ layer_num: 1
63
+ bidirectional: true
64
+ output_dim: 256
65
+ dropout_rate: 0.4
66
+
67
+ attention:
68
+ hidden_dim: 1024
69
+ output_dim: 128
70
+ dropout_rate: 0.4
71
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
72
+ return_with_input: true
73
+ return_sentence_level_hidden: false
74
+
75
+ decoder:
76
+ _model_target_: model.decoder.GLGINDecoder
77
+ dropout_rate: 0.4
78
+ interaction:
79
+ _model_target_: model.decoder.interaction.GLGINInteraction
80
+ intent_embedding_dim: 64
81
+ input_dim: "{model.encoder.output_dim}"
82
+ hidden_dim: 256
83
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
84
+ dropout_rate: 0.4
85
+ alpha: 0.2
86
+ num_heads: 8
87
+ num_layers: 2
88
+ row_normalized: true
89
+ slot_graph_window: 1
90
+ intent_label_num: "{base.intent_label_num}"
91
+
92
+ intent_classifier:
93
+ _model_target_: model.decoder.classifier.MLPClassifier
94
+ mode: "token-level-intent"
95
+ mlp:
96
+ - _model_target_: torch.nn.Linear
97
+ in_features: "{model.encoder.output_dim}"
98
+ out_features: 256
99
+ - _model_target_: torch.nn.LeakyReLU
100
+ negative_slope: 0.2
101
+ - _model_target_: torch.nn.Linear
102
+ in_features: 256
103
+ out_features: "{base.intent_label_num}"
104
+ loss_fn:
105
+ _model_target_: torch.nn.BCEWithLogitsLoss
106
+ dropout_rate: 0.4
107
+ use_multi: "{base.multi_intent}"
108
+ multi_threshold: 0.5
109
+ return_sentence_level: true
110
+ ignore_index: "{base.ignore_index}"
111
+
112
+ slot_classifier:
113
+ _model_target_: model.decoder.classifier.MLPClassifier
114
+ mode: "slot"
115
+ mlp:
116
+ - _model_target_: torch.nn.Linear
117
+ in_features: "{model.decoder.interaction.output_dim}"
118
+ out_features: "{model.decoder.interaction.output_dim}"
119
+ - _model_target_: torch.nn.LeakyReLU
120
+ negative_slope: 0.2
121
+ - _model_target_: torch.nn.Linear
122
+ in_features: "{model.decoder.interaction.output_dim}"
123
+ out_features: "{base.slot_label_num}"
124
+ ignore_index: "{base.ignore_index}"
125
+ dropout_rate: 0.4
126
+ use_multi: false
127
+ multi_threshold: 0.5
128
+ return_sentence_level: false
config/reproduction/mix-atis/vanilla.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base:
2
+ name: "OpenSLUv1"
3
+ multi_intent: true
4
+ train: true
5
+ test: true
6
+ device: cuda
7
+ seed: 42
8
+ epoch_num: 100
9
+ batch_size: 16
10
+ ignore_index: -100
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/vanilla-mix-atis
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - intent_f1
23
+ - slot_f1
24
+ - EMA
25
+
26
+ dataset:
27
+ dataset_name: atis
28
+
29
+ tokenizer:
30
+ _tokenizer_name_: word_tokenizer
31
+ _padding_side_: right
32
+ _align_mode_: fast
33
+ add_special_tokens: false
34
+ max_length: 512
35
+
36
+ optimizer:
37
+ _model_target_: torch.optim.Adam
38
+ _model_partial_: true
39
+ lr: 0.001
40
+ weight_decay: 1e-6
41
+
42
+ scheduler:
43
+ _model_target_: transformers.get_scheduler
44
+ _model_partial_: true
45
+ name : "linear"
46
+ num_warmup_steps: 0
47
+
48
+ model:
49
+ _model_target_: model.OpenSLUModel
50
+
51
+ encoder:
52
+ _model_target_: model.encoder.AutoEncoder
53
+ encoder_name: self-attention-lstm
54
+
55
+ embedding:
56
+ embedding_dim: 128
57
+ dropout_rate: 0.4
58
+
59
+ lstm:
60
+ layer_num: 1
61
+ bidirectional: true
62
+ output_dim: 256
63
+ dropout_rate: 0.4
64
+
65
+ attention:
66
+ hidden_dim: 1024
67
+ output_dim: 128
68
+ dropout_rate: 0.4
69
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
70
+ return_with_input: true
71
+ return_sentence_level_hidden: true
72
+
73
+ decoder:
74
+ _model_target_: model.decoder.BaseDecoder
75
+
76
+ intent_classifier:
77
+ _model_target_: model.decoder.classifier.LinearClassifier
78
+ mode: "intent"
79
+ input_dim: "{model.encoder.output_dim}"
80
+ loss_fn:
81
+ _model_target_: torch.nn.BCEWithLogitsLoss
82
+ use_multi: "{base.multi_intent}"
83
+ multi_threshold: 0.5
84
+ return_sentence_level: true
85
+ ignore_index: "{base.ignore_index}"
86
+
87
+
88
+ slot_classifier:
89
+ _model_target_: model.decoder.classifier.LinearClassifier
90
+ mode: "slot"
91
+ input_dim: "{model.encoder.output_dim}"
92
+ use_multi: false
93
+ multi_threshold: 0.5
94
+ ignore_index: "{base.ignore_index}"
95
+ return_sentence_level: false
config/reproduction/mix-snips/agif.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla P100-PCIE-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 50
11
+ batch_size: 64
12
+ ignore_index: -100
13
+
14
+ model_manager:
15
+ load_dir: null
16
+ save_dir: save/agif-mix-snips
17
+
18
+ evaluator:
19
+ best_key: EMA
20
+ eval_by_epoch: true
21
+ # eval_step: 1800
22
+ metric:
23
+ - intent_acc
24
+ - intent_f1
25
+ - slot_f1
26
+ - EMA
27
+
28
+ accelerator:
29
+ use_accelerator: false
30
+
31
+ dataset:
32
+ dataset_name: mix-snips
33
+
34
+ tokenizer:
35
+ _tokenizer_name_: word_tokenizer
36
+ _padding_side_: right
37
+ _align_mode_: fast
38
+ add_special_tokens: false
39
+ max_length: 512
40
+
41
+ optimizer:
42
+ _model_target_: torch.optim.Adam
43
+ _model_partial_: true
44
+ lr: 0.001
45
+ weight_decay: 1e-6
46
+
47
+ scheduler:
48
+ _model_target_: transformers.get_scheduler
49
+ _model_partial_: true
50
+ name : "linear"
51
+ num_warmup_steps: 0
52
+
53
+ model:
54
+ _model_target_: model.OpenSLUModel
55
+
56
+ encoder:
57
+ _model_target_: model.encoder.AutoEncoder
58
+ encoder_name: self-attention-lstm
59
+
60
+ embedding:
61
+ embedding_dim: 128
62
+ dropout_rate: 0.4
63
+
64
+ lstm:
65
+ layer_num: 1
66
+ bidirectional: true
67
+ output_dim: 256
68
+ dropout_rate: 0.4
69
+
70
+ attention:
71
+ hidden_dim: 1024
72
+ output_dim: 128
73
+ dropout_rate: 0.4
74
+
75
+ unflat_attention:
76
+ dropout_rate: 0.4
77
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
78
+ return_with_input: true
79
+ return_sentence_level_hidden: true
80
+
81
+ decoder:
82
+ _model_target_: model.decoder.AGIFDecoder
83
+ # teacher_forcing: true
84
+ interaction:
85
+ _model_target_: model.decoder.interaction.AGIFInteraction
86
+ intent_embedding_dim: 128
87
+ input_dim: "{model.encoder.output_dim}"
88
+ hidden_dim: 128
89
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
90
+ dropout_rate: 0.4
91
+ alpha: 0.2
92
+ num_heads: 4
93
+ num_layers: 2
94
+ row_normalized: true
95
+
96
+ intent_classifier:
97
+ _model_target_: model.decoder.classifier.MLPClassifier
98
+ mode: "intent"
99
+ mlp:
100
+ - _model_target_: torch.nn.Linear
101
+ in_features: "{model.encoder.output_dim}"
102
+ out_features: 256
103
+ - _model_target_: torch.nn.LeakyReLU
104
+ negative_slope: 0.2
105
+ - _model_target_: torch.nn.Linear
106
+ in_features: 256
107
+ out_features: "{base.intent_label_num}"
108
+ dropout_rate: 0.4
109
+ loss_fn:
110
+ _model_target_: torch.nn.BCEWithLogitsLoss
111
+ use_multi: "{base.multi_intent}"
112
+ multi_threshold: 0.5
113
+ return_sentence_level: true
114
+ ignore_index: -100
115
+ weight: 0.3
116
+
117
+ slot_classifier:
118
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
119
+ mode: "slot"
120
+ input_dim: "{model.encoder.output_dim}"
121
+ layer_num: 1
122
+ bidirectional: false
123
+ force_ratio: 0.9
124
+ hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
125
+ embedding_dim: 128
126
+ ignore_index: -100
127
+ dropout_rate: 0.4
128
+ use_multi: false
129
+ multi_threshold: 0.5
130
+ return_sentence_level: false
131
+ weight: 0.7
config/reproduction/mix-snips/gl-gin.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ multi_intent: true
6
+ train: true
7
+ test: true
8
+ device: cuda
9
+ seed: 42
10
+ epoch_num: 50
11
+ batch_size: 32
12
+ ignore_index: -100
13
+
14
+
15
+ model_manager:
16
+ load_dir: null
17
+ save_dir: save/gl-gin-mix-snips
18
+
19
+ evaluator:
20
+ best_key: EMA
21
+ eval_by_epoch: false
22
+ eval_step: 1800
23
+ metric:
24
+ - intent_acc
25
+ - intent_f1
26
+ - slot_f1
27
+ - EMA
28
+
29
+ dataset:
30
+ dataset_name: mix-snips
31
+
32
+ tokenizer:
33
+ _tokenizer_name_: word_tokenizer
34
+ _padding_side_: right
35
+ _align_mode_: fast
36
+ add_special_tokens: false
37
+ max_length: 512
38
+
39
+ optimizer:
40
+ _model_target_: torch.optim.Adam
41
+ _model_partial_: true
42
+ lr: 0.001
43
+ weight_decay: 1e-6
44
+
45
+ scheduler:
46
+ _model_target_: transformers.get_scheduler
47
+ _model_partial_: true
48
+ name : "linear"
49
+ num_warmup_steps: 0
50
+
51
+ model:
52
+ _model_target_: model.OpenSLUModel
53
+
54
+ encoder:
55
+ _model_target_: model.encoder.AutoEncoder
56
+ encoder_name: self-attention-lstm
57
+
58
+ embedding:
59
+ embedding_dim: 128
60
+ dropout_rate: 0.4
61
+
62
+ lstm:
63
+ layer_num: 2
64
+ bidirectional: true
65
+ output_dim: 256
66
+ dropout_rate: 0.4
67
+
68
+ attention:
69
+ hidden_dim: 1024
70
+ output_dim: 128
71
+ dropout_rate: 0.4
72
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
73
+ return_with_input: true
74
+ return_sentence_level_hidden: false
75
+
76
+ decoder:
77
+ _model_target_: model.decoder.GLGINDecoder
78
+ dropout_rate: 0.4
79
+ interaction:
80
+ _model_target_: model.decoder.interaction.GLGINInteraction
81
+ intent_embedding_dim: 256
82
+ input_dim: "{model.encoder.output_dim}"
83
+ hidden_dim: 256
84
+ output_dim: "{model.decoder.interaction.intent_embedding_dim}"
85
+ dropout_rate: 0.4
86
+ alpha: 0.2
87
+ num_heads: 4
88
+ num_layers: 2
89
+ row_normalized: true
90
+ slot_graph_window: 1
91
+ intent_label_num: "{base.intent_label_num}"
92
+
93
+ intent_classifier:
94
+ _model_target_: model.decoder.classifier.MLPClassifier
95
+ mode: "token-level-intent"
96
+ mlp:
97
+ - _model_target_: torch.nn.Linear
98
+ in_features: "{model.encoder.output_dim}"
99
+ out_features: 256
100
+ - _model_target_: torch.nn.LeakyReLU
101
+ negative_slope: 0.2
102
+ - _model_target_: torch.nn.Linear
103
+ in_features: 256
104
+ out_features: "{base.intent_label_num}"
105
+ loss_fn:
106
+ _model_target_: torch.nn.BCEWithLogitsLoss
107
+ dropout_rate: 0.4
108
+ use_multi: "{base.multi_intent}"
109
+ multi_threshold: 0.5
110
+ return_sentence_level: true
111
+ ignore_index: "{base.ignore_index}"
112
+ weight: 0.2
113
+
114
+ slot_classifier:
115
+ _model_target_: model.decoder.classifier.MLPClassifier
116
+ mode: "slot"
117
+ mlp:
118
+ - _model_target_: torch.nn.Linear
119
+ in_features: "{model.decoder.interaction.output_dim}"
120
+ out_features: "{model.decoder.interaction.output_dim}"
121
+ - _model_target_: torch.nn.LeakyReLU
122
+ negative_slope: 0.2
123
+ - _model_target_: torch.nn.Linear
124
+ in_features: "{model.decoder.interaction.output_dim}"
125
+ out_features: "{base.slot_label_num}"
126
+ ignore_index: "{base.ignore_index}"
127
+ dropout_rate: 0.4
128
+ use_multi: false
129
+ multi_threshold: 0.5
130
+ weight: 0.8
131
+ return_sentence_level: false
config/reproduction/mix-snips/vanilla.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base:
2
+ name: "OpenSLUv1"
3
+ multi_intent: true
4
+ train: true
5
+ test: true
6
+ device: cuda
7
+ seed: 42
8
+ epoch_num: 100
9
+ batch_size: 16
10
+ ignore_index: -100
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/vanilla-mix-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - intent_f1
23
+ - slot_f1
24
+ - EMA
25
+
26
+ dataset:
27
+ dataset_name: atis
28
+
29
+ tokenizer:
30
+ _tokenizer_name_: word_tokenizer
31
+ _padding_side_: right
32
+ _align_mode_: fast
33
+ add_special_tokens: false
34
+ max_length: 512
35
+
36
+ optimizer:
37
+ _model_target_: torch.optim.Adam
38
+ _model_partial_: true
39
+ lr: 0.001
40
+ weight_decay: 1e-6
41
+
42
+ scheduler:
43
+ _model_target_: transformers.get_scheduler
44
+ _model_partial_: true
45
+ name : "linear"
46
+ num_warmup_steps: 0
47
+
48
+ model:
49
+ _model_target_: model.OpenSLUModel
50
+
51
+ encoder:
52
+ _model_target_: model.encoder.AutoEncoder
53
+ encoder_name: self-attention-lstm
54
+
55
+ embedding:
56
+ embedding_dim: 128
57
+ dropout_rate: 0.4
58
+
59
+ lstm:
60
+ layer_num: 1
61
+ bidirectional: true
62
+ output_dim: 256
63
+ dropout_rate: 0.4
64
+
65
+ attention:
66
+ hidden_dim: 1024
67
+ output_dim: 128
68
+ dropout_rate: 0.4
69
+ output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
70
+ return_with_input: true
71
+ return_sentence_level_hidden: true
72
+
73
+ decoder:
74
+ _model_target_: model.decoder.BaseDecoder
75
+
76
+ intent_classifier:
77
+ _model_target_: model.decoder.classifier.LinearClassifier
78
+ mode: "intent"
79
+ input_dim: "{model.encoder.output_dim}"
80
+ loss_fn:
81
+ _model_target_: torch.nn.BCEWithLogitsLoss
82
+ use_multi: "{base.multi_intent}"
83
+ multi_threshold: 0.5
84
+ return_sentence_level: true
85
+ ignore_index: "{base.ignore_index}"
86
+
87
+
88
+ slot_classifier:
89
+ _model_target_: model.decoder.classifier.LinearClassifier
90
+ mode: "slot"
91
+ input_dim: "{model.encoder.output_dim}"
92
+ use_multi: false
93
+ multi_threshold: 0.5
94
+ ignore_index: "{base.ignore_index}"
95
+ return_sentence_level: false
config/reproduction/snips/bi-model.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/bi-model-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+
53
+ encoder:
54
+ _model_target_: model.encoder.BiEncoder
55
+ intent_encoder:
56
+ _model_target_: model.encoder.AutoEncoder
57
+ encoder_name: lstm
58
+
59
+ embedding:
60
+ embedding_dim: 256
61
+ dropout_rate: 0.5
62
+
63
+ lstm:
64
+ dropout_rate: 0.5
65
+ output_dim: 256
66
+ layer_num: 2
67
+ bidirectional: true
68
+
69
+ return_with_input: true
70
+ return_sentence_level_hidden: false
71
+
72
+ slot_encoder:
73
+ _model_target_: model.encoder.AutoEncoder
74
+ encoder_name: lstm
75
+
76
+ embedding:
77
+ embedding_dim: 256
78
+ dropout_rate: 0.5
79
+
80
+ lstm:
81
+ dropout_rate: 0.5
82
+ output_dim: 256
83
+ layer_num: 2
84
+ bidirectional: true
85
+
86
+ return_with_input: true
87
+ return_sentence_level_hidden: false
88
+
89
+ decoder:
90
+ _model_target_: model.decoder.BaseDecoder
91
+ interaction:
92
+ _model_target_: model.decoder.interaction.BiModelInteraction
93
+ output_dim: 256
94
+ dropout_rate: 0.5
95
+
96
+ intent_classifier:
97
+ _model_target_: model.decoder.classifier.LinearClassifier
98
+ mode: "intent"
99
+ ignore_index: -100
100
+
101
+ slot_classifier:
102
+ _model_target_: model.decoder.classifier.LinearClassifier
103
+ mode: "slot"
104
+ ignore_index: -100
config/reproduction/snips/dca_net.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/dca-net-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: lstm
55
+
56
+ embedding:
57
+ load_embedding_name: glove.6B.300d.txt
58
+ embedding_dim: 300
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ dropout_rate: 0.4
63
+ output_dim: 128
64
+ layer_num: 2
65
+ bidirectional: true
66
+ output_dim: "{model.encoder.lstm.output_dim}"
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.DCANetDecoder
72
+ interaction:
73
+ _model_target_: model.decoder.interaction.DCANetInteraction
74
+ output_dim: "{model.encoder.output_dim}"
75
+ attention_dropout: 0.4
76
+ num_attention_heads: 8
77
+
78
+ intent_classifier:
79
+ _model_target_: model.decoder.classifier.LinearClassifier
80
+ mode: "intent"
81
+ input_dim: "{model.encoder.output_dim}"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ input_dim: "{model.encoder.output_dim}"
88
+ ignore_index: -100
config/reproduction/snips/deberta.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/deberta-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: microsoft/deberta-v3-base
33
+ _padding_side_: right
34
+ add_special_tokens: true
35
+ max_length: 512
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 2e-5
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: microsoft/deberta-v3-base
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/snips/electra.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+ base:
3
+ name: "OpenSLUv1"
4
+ train: true
5
+ test: true
6
+ device: cuda
7
+ seed: 42
8
+ epoch_num: 300
9
+ batch_size: 32
10
+
11
+ model_manager:
12
+ load_dir: null
13
+ save_dir: save/electra-snips
14
+
15
+ evaluator:
16
+ best_key: EMA
17
+ eval_by_epoch: true
18
+ # eval_step: 1800
19
+ metric:
20
+ - intent_acc
21
+ - slot_f1
22
+ - EMA
23
+
24
+ accelerator:
25
+ use_accelerator: false
26
+
27
+ dataset:
28
+ dataset_name: snips
29
+
30
+ tokenizer:
31
+ _tokenizer_name_: google/electra-small-discriminator
32
+ _padding_side_: right
33
+ add_special_tokens: true
34
+ max_length: 512
35
+
36
+ optimizer:
37
+ _model_target_: torch.optim.AdamW
38
+ _model_partial_: true
39
+ lr: 2e-5
40
+ weight_decay: 1e-8
41
+
42
+ scheduler:
43
+ _model_target_: transformers.get_scheduler
44
+ _model_partial_: true
45
+ name : "linear"
46
+ num_warmup_steps: 0
47
+
48
+ model:
49
+ _model_target_: model.open_slu_model.OpenSLUModel
50
+ ignore_index: -100
51
+ encoder:
52
+ _model_target_: model.encoder.AutoEncoder
53
+ encoder_name: google/electra-small-discriminator
54
+ output_dim: 256
55
+ return_with_input: true
56
+ return_sentence_level_hidden: true
57
+
58
+ decoder:
59
+ _model_target_: model.decoder.base_decoder.BaseDecoder
60
+ intent_classifier:
61
+ _model_target_: model.decoder.classifier.LinearClassifier
62
+ mode: "intent"
63
+ ignore_index: -100
64
+
65
+
66
+ slot_classifier:
67
+ _model_target_: model.decoder.classifier.LinearClassifier
68
+ mode: "slot"
69
+ ignore_index: -100
config/reproduction/snips/joint-bert.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 128
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/joint-bert-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ metric:
32
+ - intent_acc
33
+ - slot_f1
34
+ - EMA
35
+
36
+ tokenizer:
37
+ _tokenizer_name_: bert-base-uncased
38
+ _padding_side_: right
39
+ _align_mode_: general
40
+ add_special_tokens: true
41
+
42
+ optimizer:
43
+ _model_target_: torch.optim.AdamW
44
+ _model_partial_: true
45
+ lr: 4e-6
46
+ weight_decay: 1e-8
47
+
48
+ scheduler:
49
+ _model_target_: transformers.get_scheduler
50
+ _model_partial_: true
51
+ name : "linear"
52
+ num_warmup_steps: 0
53
+
54
+ model:
55
+ _model_target_: model.open_slu_model.OpenSLUModel
56
+ ignore_index: -100
57
+ encoder:
58
+ _model_target_: model.encoder.AutoEncoder
59
+ encoder_name: bert-base-uncased
60
+ output_dim: 768
61
+ return_with_input: true
62
+ return_sentence_level_hidden: true
63
+
64
+ decoder:
65
+ _model_target_: model.decoder.base_decoder.BaseDecoder
66
+ intent_classifier:
67
+ _model_target_: model.decoder.classifier.LinearClassifier
68
+ mode: "intent"
69
+ ignore_index: -100
70
+
71
+
72
+ slot_classifier:
73
+ _model_target_: model.decoder.classifier.LinearClassifier
74
+ mode: "slot"
75
+ ignore_index: -100
config/reproduction/snips/roberta.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 32
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/roberta-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: roberta-base
33
+ _padding_side_: right
34
+ add_special_tokens: true
35
+ max_length: 512
36
+
37
+ optimizer:
38
+ _model_target_: torch.optim.AdamW
39
+ _model_partial_: true
40
+ lr: 2e-5
41
+ weight_decay: 1e-8
42
+
43
+ scheduler:
44
+ _model_target_: transformers.get_scheduler
45
+ _model_partial_: true
46
+ name : "linear"
47
+ num_warmup_steps: 0
48
+
49
+ model:
50
+ _model_target_: model.open_slu_model.OpenSLUModel
51
+ ignore_index: -100
52
+ encoder:
53
+ _model_target_: model.encoder.AutoEncoder
54
+ encoder_name: roberta-base
55
+ output_dim: 768
56
+ return_with_input: true
57
+ return_sentence_level_hidden: true
58
+
59
+ decoder:
60
+ _model_target_: model.decoder.base_decoder.BaseDecoder
61
+ intent_classifier:
62
+ _model_target_: model.decoder.classifier.LinearClassifier
63
+ mode: "intent"
64
+ ignore_index: -100
65
+
66
+
67
+ slot_classifier:
68
+ _model_target_: model.decoder.classifier.LinearClassifier
69
+ mode: "slot"
70
+ ignore_index: -100
config/reproduction/snips/slot-gated.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "NVIDIA GeForce RTX 2080 Ti"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/slot-gated-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+ ignore_index: -100
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: lstm
56
+
57
+ embedding:
58
+ embedding_dim: 256
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ dropout_rate: 0.5
63
+ output_dim: 256
64
+ layer_num: 2
65
+ bidirectional: true
66
+
67
+ return_with_input: true
68
+ return_sentence_level_hidden: false
69
+
70
+ decoder:
71
+ _model_target_: model.decoder.BaseDecoder
72
+
73
+ interaction:
74
+ _model_target_: model.decoder.interaction.SlotGatedInteraction
75
+ remove_slot_attn: false
76
+ output_dim: 256
77
+ dropout_rate: 0.4
78
+
79
+ intent_classifier:
80
+ _model_target_: model.decoder.classifier.LinearClassifier
81
+ mode: "intent"
82
+ ignore_index: -100
83
+
84
+ slot_classifier:
85
+ _model_target_: model.decoder.classifier.LinearClassifier
86
+ mode: "slot"
87
+ ignore_index: -100
config/reproduction/snips/stack-propagation.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ device: "Tesla V100-SXM2-16GB"
2
+
3
+ base:
4
+ name: "OpenSLUv1"
5
+ train: true
6
+ test: true
7
+ device: cuda
8
+ seed: 42
9
+ epoch_num: 300
10
+ batch_size: 16
11
+
12
+ model_manager:
13
+ load_dir: null
14
+ save_dir: save/stack-propagation-snips
15
+
16
+ evaluator:
17
+ best_key: EMA
18
+ eval_by_epoch: true
19
+ # eval_step: 1800
20
+ metric:
21
+ - intent_acc
22
+ - slot_f1
23
+ - EMA
24
+
25
+ accelerator:
26
+ use_accelerator: false
27
+
28
+ dataset:
29
+ dataset_name: snips
30
+
31
+ tokenizer:
32
+ _tokenizer_name_: word_tokenizer
33
+ _padding_side_: right
34
+ _align_mode_: fast
35
+ add_special_tokens: false
36
+ max_length: 512
37
+
38
+ optimizer:
39
+ _model_target_: torch.optim.Adam
40
+ _model_partial_: true
41
+ lr: 0.001
42
+ weight_decay: 1e-6
43
+
44
+ scheduler:
45
+ _model_target_: transformers.get_scheduler
46
+ _model_partial_: true
47
+ name : "linear"
48
+ num_warmup_steps: 0
49
+
50
+ model:
51
+ _model_target_: model.OpenSLUModel
52
+
53
+ encoder:
54
+ _model_target_: model.encoder.AutoEncoder
55
+ encoder_name: self-attention-lstm
56
+
57
+ embedding:
58
+ embedding_dim: 256
59
+ dropout_rate: 0.4
60
+
61
+ lstm:
62
+ layer_num: 1
63
+ bidirectional: true
64
+ output_dim: 256
65
+ dropout_rate: 0.4
66
+
67
+ attention:
68
+ hidden_dim: 1024
69
+ output_dim: 128
70
+ dropout_rate: 0.4
71
+
72
+ return_with_input: true
73
+ return_sentence_level_hidden: false
74
+
75
+ decoder:
76
+ _model_target_: model.decoder.StackPropagationDecoder
77
+ interaction:
78
+ _model_target_: model.decoder.interaction.StackInteraction
79
+ differentiable: false
80
+
81
+ intent_classifier:
82
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
83
+ layer_num: 1
84
+ bidirectional: false
85
+ force_ratio: 0.9
86
+ hidden_dim: 64
87
+ embedding_dim: 8
88
+ ignore_index: -100
89
+ dropout_rate: 0.4
90
+ mode: "token-level-intent"
91
+ use_multi: false
92
+ return_sentence_level: true
93
+
94
+ slot_classifier:
95
+ _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
96
+ layer_num: 1
97
+ bidirectional: false
98
+ force_ratio: 0.9
99
+ hidden_dim: 64
100
+ embedding_dim: 32
101
+ ignore_index: -100
102
+ dropout_rate: 0.4
103
+ mode: "slot"
104
+ use_multi: false
105
+ return_sentence_level: false
config/visual.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ host: 127.0.0.1
2
+ port: 7861
3
+
4
+ is_push_to_public: true
5
+ output_path: save/stack/outputs.jsonl
6
+ page-size: 2
model/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from model.open_slu_model import OpenSLUModel
2
+
3
+ __all__ = ["OpenSLUModel"]
model/decoder/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from model.decoder.agif_decoder import AGIFDecoder
2
+ from model.decoder.base_decoder import StackPropagationDecoder, BaseDecoder, DCANetDecoder
3
+ from model.decoder.gl_gin_decoder import GLGINDecoder
4
+
5
+ __all__ = ["StackPropagationDecoder", "BaseDecoder", "DCANetDecoder", "AGIFDecoder", "GLGINDecoder"]