Spaces:
Runtime error
Runtime error
LightChen2333
commited on
Commit
•
223340a
1
Parent(s):
ae0617b
Upload 78 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +136 -0
- README.md +259 -10
- __init__.py +1 -0
- accelerate/config-old.yaml +16 -0
- accelerate/config.yaml +22 -0
- app.py +28 -7
- common/config.py +5 -4
- common/global_pool.py +26 -0
- common/loader.py +2 -3
- common/logger.py +46 -6
- common/metric.py +4 -1
- common/model_manager.py +221 -125
- common/saver.py +80 -0
- common/tokenizer.py +14 -2
- common/utils.py +12 -2
- config/README.md +348 -0
- config/app.yaml +1 -104
- config/decoder/interaction/stack-propagation.yaml +1 -0
- config/examples/README.md +38 -0
- config/examples/from_pretrained.yaml +53 -0
- config/examples/from_pretrained_multi.yaml +55 -0
- config/examples/normal.yaml +70 -0
- config/examples/reload_to_train.yaml +71 -0
- config/reproduction/atis/bi-model.yaml +106 -0
- config/reproduction/atis/dca-net.yaml +88 -0
- config/reproduction/atis/deberta.yaml +67 -0
- config/reproduction/atis/electra.yaml +67 -0
- config/reproduction/atis/joint-bert.yaml +70 -0
- config/reproduction/atis/roberta.yaml +70 -0
- config/reproduction/atis/slot-gated.yaml +87 -0
- config/reproduction/atis/stack-propagation.yaml +109 -0
- config/reproduction/mix-atis/agif.yaml +133 -0
- config/reproduction/mix-atis/gl-gin.yaml +128 -0
- config/reproduction/mix-atis/vanilla.yaml +95 -0
- config/reproduction/mix-snips/agif.yaml +131 -0
- config/reproduction/mix-snips/gl-gin.yaml +131 -0
- config/reproduction/mix-snips/vanilla.yaml +95 -0
- config/reproduction/snips/bi-model.yaml +104 -0
- config/reproduction/snips/dca_net.yaml +88 -0
- config/reproduction/snips/deberta.yaml +70 -0
- config/reproduction/snips/electra.yaml +69 -0
- config/reproduction/snips/joint-bert.yaml +75 -0
- config/reproduction/snips/roberta.yaml +70 -0
- config/reproduction/snips/slot-gated.yaml +87 -0
- config/reproduction/snips/stack-propagation.yaml +105 -0
- config/visual.yaml +6 -0
- model/decoder/base_decoder.py +24 -11
- model/encoder/auto_encoder.py +1 -1
- model/encoder/non_pretrained_encoder.py +2 -2
- model/encoder/pretrained_encoder.py +8 -3
.gitignore
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
.idea/
|
6 |
+
wandb/*
|
7 |
+
save/*
|
8 |
+
!save/.gitkeep
|
9 |
+
logs/*
|
10 |
+
!logs/.gitkeep
|
11 |
+
test
|
12 |
+
# C extensions
|
13 |
+
*.so
|
14 |
+
|
15 |
+
# Distribution / packaging
|
16 |
+
.Python
|
17 |
+
build/
|
18 |
+
develop-eggs/
|
19 |
+
dist/
|
20 |
+
downloads/
|
21 |
+
eggs/
|
22 |
+
.eggs/
|
23 |
+
lib/
|
24 |
+
lib64/
|
25 |
+
parts/
|
26 |
+
sdist/
|
27 |
+
var/
|
28 |
+
wheels/
|
29 |
+
pip-wheel-metadata/
|
30 |
+
share/python-wheels/
|
31 |
+
*.egg-info/
|
32 |
+
.installed.cfg
|
33 |
+
*.egg
|
34 |
+
MANIFEST
|
35 |
+
|
36 |
+
# PyInstaller
|
37 |
+
# Usually these files are written by a python script from a template
|
38 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
39 |
+
*.manifest
|
40 |
+
*.spec
|
41 |
+
|
42 |
+
# Installer logs
|
43 |
+
pip-log.txt
|
44 |
+
pip-delete-this-directory.txt
|
45 |
+
|
46 |
+
# Unit test / coverage reports
|
47 |
+
htmlcov/
|
48 |
+
.tox/
|
49 |
+
.nox/
|
50 |
+
.coverage
|
51 |
+
.coverage.*
|
52 |
+
.cache
|
53 |
+
nosetests.xml
|
54 |
+
coverage.xml
|
55 |
+
*.cover
|
56 |
+
*.py,cover
|
57 |
+
.hypothesis/
|
58 |
+
.pytest_cache/
|
59 |
+
|
60 |
+
# Translations
|
61 |
+
*.mo
|
62 |
+
*.pot
|
63 |
+
|
64 |
+
# Django stuff:
|
65 |
+
*.log
|
66 |
+
local_settings.py
|
67 |
+
db.sqlite3
|
68 |
+
db.sqlite3-journal
|
69 |
+
|
70 |
+
# Flask stuff:
|
71 |
+
instance/
|
72 |
+
.webassets-cache
|
73 |
+
|
74 |
+
# Scrapy stuff:
|
75 |
+
.scrapy
|
76 |
+
|
77 |
+
# Sphinx documentation
|
78 |
+
docs/_build/
|
79 |
+
|
80 |
+
# PyBuilder
|
81 |
+
target/
|
82 |
+
|
83 |
+
# Jupyter Notebook
|
84 |
+
.ipynb_checkpoints
|
85 |
+
|
86 |
+
# IPython
|
87 |
+
profile_default/
|
88 |
+
ipython_config.py
|
89 |
+
|
90 |
+
# pyenv
|
91 |
+
.python-version
|
92 |
+
|
93 |
+
# pipenv
|
94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97 |
+
# install all needed dependencies.
|
98 |
+
#Pipfile.lock
|
99 |
+
|
100 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
101 |
+
__pypackages__/
|
102 |
+
|
103 |
+
# Celery stuff
|
104 |
+
celerybeat-schedule
|
105 |
+
celerybeat.pid
|
106 |
+
|
107 |
+
# SageMath parsed files
|
108 |
+
*.sage.py
|
109 |
+
|
110 |
+
# Environments
|
111 |
+
.env
|
112 |
+
.venv
|
113 |
+
env/
|
114 |
+
venv/
|
115 |
+
ENV/
|
116 |
+
env.bak/
|
117 |
+
venv.bak/
|
118 |
+
|
119 |
+
# Spyder project settings
|
120 |
+
.spyderproject
|
121 |
+
.spyproject
|
122 |
+
|
123 |
+
# Rope project settings
|
124 |
+
.ropeproject
|
125 |
+
|
126 |
+
# mkdocs documentation
|
127 |
+
/site
|
128 |
+
|
129 |
+
# mypy
|
130 |
+
.mypy_cache/
|
131 |
+
.dmypy.json
|
132 |
+
dmypy.json
|
133 |
+
|
134 |
+
# Pyre type checker
|
135 |
+
.pyre/
|
136 |
+
.vscode/
|
README.md
CHANGED
@@ -1,13 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
|
|
1 |
+
<img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/OpenSLU.jpg" alt=""/>
|
2 |
+
|
3 |
+
---
|
4 |
+
|
5 |
+
<p align="center">
|
6 |
+
<a >
|
7 |
+
<img alt="version" src="https://img.shields.io/badge/version-v0.1.0-blue?color=FF8000?color=009922" />
|
8 |
+
</a>
|
9 |
+
<a >
|
10 |
+
<img alt="Status-building" src="https://img.shields.io/badge/Status-building-blue" />
|
11 |
+
</a>
|
12 |
+
<a href=""><img src="https://img.shields.io/badge/python-3.6.2+-orange.svg"></a>
|
13 |
+
<a >
|
14 |
+
<img alt="PRs-Welcome" src="https://img.shields.io/badge/PRs-Welcome-red" />
|
15 |
+
</a>
|
16 |
+
<a>
|
17 |
+
<img alt="stars" src="https://img.shields.io/github/stars/LightChen233/OpenSLU" />
|
18 |
+
</a>
|
19 |
+
<a href="https://github.com/LightChen233/OpenSLU/network/members">
|
20 |
+
<img alt="FORK" src="https://img.shields.io/github/forks/LightChen233/OpenSLU?color=FF8000" />
|
21 |
+
</a>
|
22 |
+
<a href="https://github.com/LightChen233/OpenSLU/issues">
|
23 |
+
<img alt="Issues" src="https://img.shields.io/github/issues/LightChen233/OpenSLU?color=0088ff"/>
|
24 |
+
</a>
|
25 |
+
<br />
|
26 |
+
</p>
|
27 |
+
|
28 |
+
## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/motivation.png" width="25" /> Motivation
|
29 |
+
|
30 |
+
Spoken Language Understanding (SLU) is one of the core components of a task-oriented dialogue system, which aims to extract the semantic meaning of user queries (e.g., intents and slots).
|
31 |
+
|
32 |
+
In this work, we introduce __OpenSLU__, an open-source toolkit to provide a unified, modularized, and extensible toolkit for spoken language understanding. Specifically, OpenSLU unifies 10 SLU baselines for both single-intent and multi-intent scenarios, which support both non-pretrained and pretrained models simultaneously. Additionally, OpenSLU is highly modularized and extensible by decomposing the model architecture, inference, and learning process into reusable modules, which allows researchers to quickly set up SLU experiments with highly flexible configurations. We hope OpenSLU can help researcher to quickly initiate experiments and spur more breakthroughs in SLU.
|
33 |
+
|
34 |
+
## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/notes.png" width="25" /> Changelog
|
35 |
+
- 2023-02-09
|
36 |
+
- We build the first version and release it.
|
37 |
+
|
38 |
+
## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/resource.png" width="25" /> Installation
|
39 |
+
### System requirements
|
40 |
+
OpenSLU requires `Python>=3.8`, and `torch>=1.12.0`.
|
41 |
+
### Install from git
|
42 |
+
```bash
|
43 |
+
git clone https://github.com/LightChen2333/OpenSLU.git && cd OpenSLU/
|
44 |
+
pip install -r requirements.txt
|
45 |
+
```
|
46 |
+
|
47 |
+
|
48 |
+
## File Structure
|
49 |
+
|
50 |
+
```yaml
|
51 |
+
root
|
52 |
+
├── common
|
53 |
+
│ ├── config.py # load configuration and auto preprocess ignored config
|
54 |
+
│ ├── loader.py # load data from hugging face
|
55 |
+
│ ├── logger.py # log predict result, support [fitlog], [wandb], [local logging]
|
56 |
+
│ ├── metric.py # evalutation metric, support [intent acc], [slot F1], [EMA]
|
57 |
+
│ ├── model_manager.py # help to prepare data, prebuild training progress.
|
58 |
+
│ ├── tokenizer.py # tokenizer also support no-pretrained model for word tokenizer.
|
59 |
+
│ └── utils.py # canonical model communication data structure and other common tool function
|
60 |
+
├── config
|
61 |
+
│ ├── reproduction # configurations for reproducted SLU model.
|
62 |
+
│ └── **.yaml # configuration for SLU model.
|
63 |
+
├── logs # local log storage dir path.
|
64 |
+
├── model
|
65 |
+
│ ├── encoder
|
66 |
+
│ │ ├── base_encoder.py # base encoder model. All implemented encoder models need to inherit the BaseEncoder class
|
67 |
+
│ │ ├── auto_encoder.py # auto-encoder to autoload provided encoder model
|
68 |
+
│ │ ├── non_pretrained_encoder.py # all common-used no pretrained encoder like lstm, lstm+self-attention
|
69 |
+
│ │ └── pretrained_encoder.py # all common-used pretrained encoder, implemented by hugging-face [AutoModel].
|
70 |
+
│ ├── decoder
|
71 |
+
│ │ ├── interaction
|
72 |
+
│ │ │ ├── base_interaction.py # base interaction model. All implemented encoder models need to inherit the BaseInteraction class
|
73 |
+
│ │ │ └── *_interaction.py # some SOTA SLU interaction module. You can easily reuse or rewrite to implement your own idea.
|
74 |
+
│ │ ├── base_decoder.py # decoder class, [BaseDecoder] support classification after interaction, also you can rewrite for your own interaction order
|
75 |
+
│ │ └── classifier.py # classifier class, support linear and LSTM classification. Also support token-level intent.
|
76 |
+
│ └── open_slu_model.py # the general model class, can automatically build the model through configuration.
|
77 |
+
├── save # model checkpoint storage dir path and dir to automatically save glove embedding.
|
78 |
+
└── run.py # run script for all function.
|
79 |
+
```
|
80 |
+
|
81 |
+
## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/catalogue.png" width="27" /> Quick Start
|
82 |
+
|
83 |
+
### 1. Reproducing Existing Models
|
84 |
+
Example for reproduction of `slot-gated` model:
|
85 |
+
|
86 |
+
```bash
|
87 |
+
python run.py --dataset atis --model slot-gated
|
88 |
+
```
|
89 |
+
|
90 |
+
### 2. Customizable Combination Existing Components
|
91 |
+
1. First, you can freely combine and build your own model through config files. For details, see [Configuration](config/README.md).
|
92 |
+
2. Then, you can assign the configuration path to train your own model.
|
93 |
+
|
94 |
+
Example for `stack-propagation` fine-tuning:
|
95 |
+
|
96 |
+
```bash
|
97 |
+
python run.py -cp config/stack-propagation.yaml
|
98 |
+
```
|
99 |
+
|
100 |
+
Example for multi-GPU fine-tuning:
|
101 |
+
|
102 |
+
```bash
|
103 |
+
accelerate config
|
104 |
+
accelerate launch run.py -cp config/stack-propagation.yaml
|
105 |
+
```
|
106 |
+
|
107 |
+
Or you can assign `accelerate` yaml configuration.
|
108 |
+
|
109 |
+
```bash
|
110 |
+
accelerate launch [--config_file ./accelerate/config.yaml] run.py -cp config/stack-propagation.yaml
|
111 |
+
```
|
112 |
+
|
113 |
+
### 3. Implementing a New SLU Model
|
114 |
+
In OpenSLU, you are only needed to rewrite required commponents and assign them in configuration instead of rewriting all commponents.
|
115 |
+
|
116 |
+
In most cases, rewriting Interaction module is enough for building a new SLU model.
|
117 |
+
This module accepts [HiddenData](./common/utils.py) as input and return with `HiddenData`, which contains the `hidden_states` for `intent` and `slot`, and other helpful information. The example is as follows:
|
118 |
+
```python
|
119 |
+
class NewInteraction(BaseInteraction):
|
120 |
+
def __init__(self, **config):
|
121 |
+
self.config = config
|
122 |
+
...
|
123 |
+
|
124 |
+
def forward(self, hiddens: HiddenData):
|
125 |
+
...
|
126 |
+
intent, slot = self.func(hiddens)
|
127 |
+
hiddens.update_slot_hidden_state(slot)
|
128 |
+
hiddens.update_intent_hidden_state(intent)
|
129 |
+
return hiddens
|
130 |
+
```
|
131 |
+
|
132 |
+
To further meet the
|
133 |
+
needs of complex exploration, we provide the
|
134 |
+
[BaseDecoder](./model/decoder/base_decoder.py) class, and the user can simply override the `forward()` function in class, which accepts `HiddenData` as input and `OutputData` as output. The example is as follows:
|
135 |
+
```python
|
136 |
+
class NewDecoder(BaseDecoder):
|
137 |
+
def __init__(self,
|
138 |
+
intent_classifier,
|
139 |
+
slot_classifier,
|
140 |
+
interaction=None):
|
141 |
+
...
|
142 |
+
self.int_cls = intent_classifier
|
143 |
+
self.slot_cls = slot_classifier
|
144 |
+
self.interaction = interaction
|
145 |
+
|
146 |
+
def forward(self, hiddens: HiddenData):
|
147 |
+
...
|
148 |
+
interact = self.interaction(hiddens)
|
149 |
+
slot = self.slot_cls(interact.slot)
|
150 |
+
intent = self.int_cls(interact.intent)
|
151 |
+
return OutputData(intent, slot)
|
152 |
+
```
|
153 |
+
|
154 |
+
|
155 |
+
## Modules
|
156 |
+
|
157 |
+
### 1. Encoder Modules
|
158 |
+
|
159 |
+
- **No Pretrained Encoder**
|
160 |
+
- GloVe Embedding
|
161 |
+
- BiLSTM Encoder
|
162 |
+
- BiLSTM + Self-Attention Encoder
|
163 |
+
- Bi-Encoder (support two encoders for intent and slot, respectively)
|
164 |
+
- **Pretrained Encoder**
|
165 |
+
- `bert-base-uncased`
|
166 |
+
- `roberta-base`
|
167 |
+
- `microsoft/deberta-v3-base`
|
168 |
+
- other hugging-face supported encoder model...
|
169 |
+
|
170 |
+
### 2. Decoder Modules
|
171 |
+
|
172 |
+
#### 2.1 Interaction Modules
|
173 |
+
|
174 |
+
- DCA Net Interaction
|
175 |
+
- Stack Propagation Interaction
|
176 |
+
- Bi-Model Interaction(with decoder/without decoder)
|
177 |
+
- Slot Gated Interaction
|
178 |
+
|
179 |
+
#### 2.2 Classification Modules
|
180 |
+
All classifier support `Token-level Intent` and `Sentence-level intent`. What's more, our decode function supports to both `Single-Intent` and `Multi-Intent`.
|
181 |
+
- LinearClassifier
|
182 |
+
- AutoregressiveLSTMClassifier
|
183 |
+
- MLPClassifier
|
184 |
+
|
185 |
+
### 3. Supported Models
|
186 |
+
We implement various 10 common-used SLU baselines:
|
187 |
+
|
188 |
---
|
189 |
+
**Single-Intent Model**
|
190 |
+
- Bi-Model \[ [Wang et al., 2018](https://aclanthology.org/N18-2050/) \] :
|
191 |
+
- `bi-model.yaml`
|
192 |
+
- Slot-Gated \[ [Goo et al., 2018](https://www.csie.ntu.edu.tw/~yvchen/doc/NAACL18_SlotGated.pdf) \] :
|
193 |
+
- `slot-gated.yaml`
|
194 |
+
- Stack-Propagation \[ [Qin et al., 2019](https://www.aclweb.org/anthology/D19-1214/) \] :
|
195 |
+
- `stack-propagation.yaml`
|
196 |
+
- Joint Bert \[ [Chen et al., 2019](https://arxiv.org/abs/1902.10909) \] :
|
197 |
+
- `joint-bert.yaml`
|
198 |
+
- RoBERTa \[ [Liu et al., 2019](https://arxiv.org/abs/1907.11692) \] :
|
199 |
+
- `roberta.yaml`
|
200 |
+
- ELECTRA \[ [Clark et al., 2020](https://arxiv.org/abs/2003.10555) \] :
|
201 |
+
- `electra.yaml`
|
202 |
+
- DCA-Net \[ [Qin et al., 2021](https://arxiv.org/abs/2010.03880) \] :
|
203 |
+
- `dca_net.yaml`
|
204 |
+
- DeBERTa \[ [He et al., 2021](https://arxiv.org/abs/2111.09543) \] :
|
205 |
+
- `deberta.yaml`
|
206 |
+
|
207 |
---
|
208 |
+
**Multi-Intent Model**
|
209 |
+
- AGIF \[ [Qin et al., 2020](https://arxiv.org/pdf/2004.10087.pdf) \] :
|
210 |
+
- `agif.yaml`
|
211 |
+
- GL-GIN \[ [Qin et al., 2021](https://arxiv.org/abs/2106.01925) \] :
|
212 |
+
- `gl-gin.yaml`
|
213 |
+
|
214 |
+
|
215 |
+
## Application
|
216 |
+
### 1. Visualization Tools
|
217 |
+
Model metrics tests alone no longer adequately reflect the model's performance. To help researchers further improve their models, we provide a tool for visual error analysis.
|
218 |
+
|
219 |
+
We provide an analysis interface with three main parts:
|
220 |
+
- (a) error distribution analysis;
|
221 |
+
- (b) label transfer analysis;
|
222 |
+
- (c) instance analysis.
|
223 |
+
|
224 |
+
<img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/visual_analysis.png" />
|
225 |
+
|
226 |
+
```bash
|
227 |
+
python tools/visualization.py \
|
228 |
+
--config_path config/visual.yaml \
|
229 |
+
--output_path {ckpt_dir}/outputs.jsonl
|
230 |
+
```
|
231 |
+
Visualization configuration can be set as below:
|
232 |
+
```yaml
|
233 |
+
host: 127.0.0.1
|
234 |
+
port: 7861
|
235 |
+
is_push_to_public: true # whether to push to gradio platform(public network)
|
236 |
+
output_path: save/stack/outputs.jsonl # output prediction file path
|
237 |
+
page-size: 2 # the number of instances of each page in instance anlysis.
|
238 |
+
```
|
239 |
+
### 2. Deployment
|
240 |
+
|
241 |
+
We provide an script to deploy your model automatically. You are only needed to run the command as below to deploy your own model:
|
242 |
+
|
243 |
+
```bash
|
244 |
+
python app.py --config_path config/reproduction/atis/bi-model.yaml
|
245 |
+
```
|
246 |
+
|
247 |
+
<img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/app.png" />
|
248 |
+
|
249 |
+
### 3. Publish your model to hugging face
|
250 |
+
|
251 |
+
We also offer an script to transfer models trained by OpenSLU to hugging face format automatically. And you can upload the model to your `Model` space.
|
252 |
+
|
253 |
+
```shell
|
254 |
+
python tools/parse_to_hugging_face.py -cp config/reproduction/atis/bi-model.yaml -op save/temp
|
255 |
+
```
|
256 |
+
|
257 |
+
It will generate 5 files, and you should only need to upload `config.json`, `pytorch_model.bin` and `tokenizer.pkl`.
|
258 |
+
After that, others can reproduction your model just by adjust `_from_pretrained_` parameters in Configuration.
|
259 |
+
|
260 |
+
## <img src="https://cdn.jsdelivr.net/gh/LightChen233/blog-img/intro.png" width="25" /> Contact
|
261 |
|
262 |
+
Please create Github issues here or email [Libo Qin](mailto:lbqin@ir.hit.edu.cn) or [Qiguang Chen](mailto:charleschen2333@gmail.com) if you have any questions or suggestions.
|
__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
accelerate/config-old.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
compute_environment: LOCAL_MACHINE
|
2 |
+
deepspeed_config: {}
|
3 |
+
distributed_type: MULTI_GPU
|
4 |
+
downcast_bf16: 'no'
|
5 |
+
fsdp_config: {}
|
6 |
+
gpu_ids: all
|
7 |
+
machine_rank: 0
|
8 |
+
main_process_ip: null
|
9 |
+
main_process_port: 9001
|
10 |
+
main_training_function: main
|
11 |
+
mixed_precision: 'no'
|
12 |
+
num_machines: 0
|
13 |
+
num_processes: 2
|
14 |
+
rdzv_backend: static
|
15 |
+
same_network: true
|
16 |
+
use_cpu: false
|
accelerate/config.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
command_file: null
|
2 |
+
commands: null
|
3 |
+
compute_environment: LOCAL_MACHINE
|
4 |
+
deepspeed_config: {}
|
5 |
+
distributed_type: 'NO'
|
6 |
+
downcast_bf16: 'no'
|
7 |
+
dynamo_backend: 'NO'
|
8 |
+
fsdp_config: {}
|
9 |
+
gpu_ids: all
|
10 |
+
machine_rank: 0
|
11 |
+
main_process_ip: null
|
12 |
+
main_process_port: null
|
13 |
+
main_training_function: main
|
14 |
+
megatron_lm_config: {}
|
15 |
+
mixed_precision: 'no'
|
16 |
+
num_machines: 1
|
17 |
+
num_processes: 2
|
18 |
+
rdzv_backend: static
|
19 |
+
same_network: true
|
20 |
+
tpu_name: null
|
21 |
+
tpu_zone: null
|
22 |
+
use_cpu: false
|
app.py
CHANGED
@@ -1,11 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
from common.config import Config
|
4 |
from common.model_manager import ModelManager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
config = Config.load_from_yaml("config/app.yaml")
|
7 |
model_manager = ModelManager(config)
|
8 |
-
model_manager.
|
9 |
|
10 |
|
11 |
def text_analysis(text):
|
@@ -34,9 +54,10 @@ demo = gr.Interface(
|
|
34 |
gr.Textbox(placeholder="Enter sentence here..."),
|
35 |
["html"],
|
36 |
examples=[
|
37 |
-
["
|
38 |
-
["It was the best of times, it was the worst of times."],
|
39 |
],
|
40 |
)
|
41 |
-
|
42 |
-
demo.launch()
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Author: Qiguang Chen
|
3 |
+
LastEditors: Qiguang Chen
|
4 |
+
Date: 2023-02-07 15:42:32
|
5 |
+
LastEditTime: 2023-02-19 21:04:03
|
6 |
+
Description:
|
7 |
+
|
8 |
+
'''
|
9 |
+
import argparse
|
10 |
import gradio as gr
|
11 |
+
|
12 |
from common.config import Config
|
13 |
from common.model_manager import ModelManager
|
14 |
+
from common.utils import str2bool
|
15 |
+
|
16 |
+
|
17 |
+
parser = argparse.ArgumentParser()
|
18 |
+
parser.add_argument('--config_path', '-cp', type=str, default="config/examples/from_pretrained.yaml")
|
19 |
+
parser.add_argument('--push_to_public', '-p', type=str2bool, nargs='?',
|
20 |
+
const=True, default=False,
|
21 |
+
help="Push to public network.")
|
22 |
+
args = parser.parse_args()
|
23 |
+
config = Config.load_from_yaml(args.config_path)
|
24 |
+
config.base["train"] = False
|
25 |
+
config.base["test"] = False
|
26 |
|
|
|
27 |
model_manager = ModelManager(config)
|
28 |
+
model_manager.init_model()
|
29 |
|
30 |
|
31 |
def text_analysis(text):
|
|
|
54 |
gr.Textbox(placeholder="Enter sentence here..."),
|
55 |
["html"],
|
56 |
examples=[
|
57 |
+
["i would like to find a flight from charlotte to las vegas that makes a stop in st louis"],
|
|
|
58 |
],
|
59 |
)
|
60 |
+
if args.push_to_public:
|
61 |
+
demo.launch(share=True)
|
62 |
+
else:
|
63 |
+
demo.launch()
|
common/config.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-
|
6 |
Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
|
7 |
|
8 |
'''
|
@@ -18,7 +18,8 @@ class Config(dict):
|
|
18 |
dict.__init__(self, *args, **kwargs)
|
19 |
self.__dict__ = self
|
20 |
self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
|
21 |
-
self.
|
|
|
22 |
|
23 |
@staticmethod
|
24 |
def load_from_yaml(file_path:str)->"Config":
|
@@ -46,8 +47,8 @@ class Config(dict):
|
|
46 |
Returns:
|
47 |
Config: _description_
|
48 |
"""
|
49 |
-
if args.model is not None:
|
50 |
-
args.config_path = "config/
|
51 |
config = Config.load_from_yaml(args.config_path)
|
52 |
if args.dataset is not None:
|
53 |
config.__update_dataset(args.dataset)
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-15 17:58:53
|
6 |
Description: Configuration class to manage all process in OpenSLU like model construction, learning processing and so on.
|
7 |
|
8 |
'''
|
|
|
18 |
dict.__init__(self, *args, **kwargs)
|
19 |
self.__dict__ = self
|
20 |
self.start_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
|
21 |
+
if not self.model.get("_from_pretrained_"):
|
22 |
+
self.__autowired()
|
23 |
|
24 |
@staticmethod
|
25 |
def load_from_yaml(file_path:str)->"Config":
|
|
|
47 |
Returns:
|
48 |
Config: _description_
|
49 |
"""
|
50 |
+
if args.model is not None and args.dataset is not None:
|
51 |
+
args.config_path = f"config/reproduction/{args.dataset}/{args.model}.yaml"
|
52 |
config = Config.load_from_yaml(args.config_path)
|
53 |
if args.dataset is not None:
|
54 |
config.__update_dataset(args.dataset)
|
common/global_pool.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Author: Qiguang Chen
|
3 |
+
LastEditors: Qiguang Chen
|
4 |
+
Date: 2023-02-12 14:35:37
|
5 |
+
LastEditTime: 2023-02-12 14:37:40
|
6 |
+
Description:
|
7 |
+
|
8 |
+
'''
|
9 |
+
def _init():
|
10 |
+
global _global_dict
|
11 |
+
_global_dict = {}
|
12 |
+
|
13 |
+
|
14 |
+
def set_value(key, value):
|
15 |
+
# set gobal value to object pool
|
16 |
+
_global_dict[key] = value
|
17 |
+
|
18 |
+
|
19 |
+
def get_value(key):
|
20 |
+
# get gobal value from object pool
|
21 |
+
try:
|
22 |
+
return _global_dict[key]
|
23 |
+
except:
|
24 |
+
print('读取' + key + '失败\r\n')
|
25 |
+
|
26 |
+
|
common/loader.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-02-
|
6 |
Description: all class for load data.
|
7 |
|
8 |
'''
|
@@ -36,14 +36,13 @@ class DataFactory(object):
|
|
36 |
return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
|
37 |
|
38 |
def load_dataset(self, dataset_config, split="train"):
|
39 |
-
# TODO: 关闭use_auth_token
|
40 |
dataset_name = None
|
41 |
if split not in dataset_config:
|
42 |
dataset_name = dataset_config.get("dataset_name")
|
43 |
elif self.__is_supported_datasets(dataset_config[split]):
|
44 |
dataset_name = dataset_config[split].lower()
|
45 |
if dataset_name is not None:
|
46 |
-
return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split
|
47 |
else:
|
48 |
data_file = dataset_config[split]
|
49 |
data_dict = {"text": [], "slot": [], "intent":[]}
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-19 15:39:48
|
6 |
Description: all class for load data.
|
7 |
|
8 |
'''
|
|
|
36 |
return dataset_name.lower() in ["atis", "snips", "mix-atis", "mix-atis"]
|
37 |
|
38 |
def load_dataset(self, dataset_config, split="train"):
|
|
|
39 |
dataset_name = None
|
40 |
if split not in dataset_config:
|
41 |
dataset_name = dataset_config.get("dataset_name")
|
42 |
elif self.__is_supported_datasets(dataset_config[split]):
|
43 |
dataset_name = dataset_config[split].lower()
|
44 |
if dataset_name is not None:
|
45 |
+
return load_dataset("LightChen2333/OpenSLU", dataset_name, split=split)
|
46 |
else:
|
47 |
data_file = dataset_config[split]
|
48 |
data_dict = {"text": [], "slot": [], "intent":[]}
|
common/logger.py
CHANGED
@@ -2,14 +2,17 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-02-
|
6 |
Description: log manager
|
7 |
|
8 |
'''
|
|
|
9 |
import json
|
10 |
import os
|
11 |
import time
|
12 |
from common.config import Config
|
|
|
|
|
13 |
|
14 |
def mkdirs(dir_names):
|
15 |
for dir_name in dir_names:
|
@@ -71,7 +74,7 @@ class Logger():
|
|
71 |
self.other_log_file = os.path.join(self.output_dir, "/other_log.jsonl")
|
72 |
with open(self.other_log_file, "w", encoding="utf8") as f:
|
73 |
print(f"Other Log Result will be written to {self.other_log_file}")
|
74 |
-
|
75 |
LOGGING_LEVEL_MAP = {
|
76 |
"CRITICAL": logging.CRITICAL,
|
77 |
"FATAL": logging.FATAL,
|
@@ -82,10 +85,47 @@ class Logger():
|
|
82 |
"DEBUG": logging.DEBUG,
|
83 |
"NOTSET": logging.NOTSET,
|
84 |
}
|
85 |
-
logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def set_config(self, config: Config):
|
90 |
"""save config
|
91 |
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-17 20:38:38
|
6 |
Description: log manager
|
7 |
|
8 |
'''
|
9 |
+
import datetime
|
10 |
import json
|
11 |
import os
|
12 |
import time
|
13 |
from common.config import Config
|
14 |
+
import logging
|
15 |
+
import colorlog
|
16 |
|
17 |
def mkdirs(dir_names):
|
18 |
for dir_name in dir_names:
|
|
|
74 |
self.other_log_file = os.path.join(self.output_dir, "/other_log.jsonl")
|
75 |
with open(self.other_log_file, "w", encoding="utf8") as f:
|
76 |
print(f"Other Log Result will be written to {self.other_log_file}")
|
77 |
+
|
78 |
LOGGING_LEVEL_MAP = {
|
79 |
"CRITICAL": logging.CRITICAL,
|
80 |
"FATAL": logging.FATAL,
|
|
|
85 |
"DEBUG": logging.DEBUG,
|
86 |
"NOTSET": logging.NOTSET,
|
87 |
}
|
88 |
+
# logging.basicConfig(format='[%(levelname)s - %(asctime)s]\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
|
89 |
+
# filename=os.path.join(self.output_dir, "log.log"), level=LOGGING_LEVEL_MAP[logging_level])
|
90 |
+
|
91 |
+
# logger = logging.getLogger()
|
92 |
+
# KZT = logging.StreamHandler()
|
93 |
+
# KZT.setLevel(logging.DEBUG)
|
94 |
+
# logger.addHandler(KZT)
|
95 |
+
|
96 |
+
self.logging = self._get_logging_logger(logging_level)
|
97 |
+
|
98 |
+
def _get_logging_logger(self, level="INFO"):
|
99 |
+
log_colors_config = {
|
100 |
+
'DEBUG': 'cyan',
|
101 |
+
'INFO': 'blue',
|
102 |
+
'WARNING': 'yellow',
|
103 |
+
'ERROR': 'red',
|
104 |
+
'CRITICAL': 'red,bg_white',
|
105 |
+
}
|
106 |
+
|
107 |
+
logger = logging.getLogger()
|
108 |
+
logger.setLevel(level)
|
109 |
+
|
110 |
+
log_path = os.path.join(self.output_dir, "log.log")
|
111 |
+
|
112 |
+
if not logger.handlers:
|
113 |
+
sh = logging.StreamHandler()
|
114 |
+
fh = logging.FileHandler(filename=log_path, mode='a', encoding="utf-8")
|
115 |
+
fmt = logging.Formatter(
|
116 |
+
fmt='[%(levelname)s - %(asctime)s]\t%(message)s',
|
117 |
+
datefmt='%m/%d/%Y %I:%M:%S %p')
|
118 |
+
|
119 |
+
sh_fmt = colorlog.ColoredFormatter(
|
120 |
+
fmt='%(log_color)s[%(levelname)s - %(asctime)s]\t%(message)s',
|
121 |
+
datefmt='%m/%d/%Y %I:%M:%S %p',
|
122 |
+
log_colors=log_colors_config)
|
123 |
+
sh.setFormatter(fmt=sh_fmt)
|
124 |
+
fh.setFormatter(fmt=fmt)
|
125 |
+
logger.addHandler(sh)
|
126 |
+
logger.addHandler(fh)
|
127 |
+
return logger
|
128 |
+
|
129 |
def set_config(self, config: Config):
|
130 |
"""save config
|
131 |
|
common/metric.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-
|
6 |
Description: Metric calculation class
|
7 |
|
8 |
'''
|
@@ -198,6 +198,8 @@ class Evaluator(object):
|
|
198 |
lastPredTag = 'O'
|
199 |
lastPredType = ''
|
200 |
for c, p in zip(correct_slot, pred_slot):
|
|
|
|
|
201 |
correctTag, correctType = Evaluator.__splitTagType(c)
|
202 |
predTag, predType = Evaluator.__splitTagType(p)
|
203 |
|
@@ -317,6 +319,7 @@ class Evaluator(object):
|
|
317 |
use_intent = output.intent_ids is not None and len(
|
318 |
output.intent_ids) > 0
|
319 |
if use_slot and "slot_f1" in metric_list:
|
|
|
320 |
res_dict["slot_f1"] = Evaluator.computeF1Score(
|
321 |
output.slot_ids, inps.slot)
|
322 |
if use_intent and "intent_acc" in metric_list:
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-17 19:39:22
|
6 |
Description: Metric calculation class
|
7 |
|
8 |
'''
|
|
|
198 |
lastPredTag = 'O'
|
199 |
lastPredType = ''
|
200 |
for c, p in zip(correct_slot, pred_slot):
|
201 |
+
c = str(c)
|
202 |
+
p = str(p)
|
203 |
correctTag, correctType = Evaluator.__splitTagType(c)
|
204 |
predTag, predType = Evaluator.__splitTagType(p)
|
205 |
|
|
|
319 |
use_intent = output.intent_ids is not None and len(
|
320 |
output.intent_ids) > 0
|
321 |
if use_slot and "slot_f1" in metric_list:
|
322 |
+
|
323 |
res_dict["slot_f1"] = Evaluator.computeF1Score(
|
324 |
output.slot_ids, inps.slot)
|
325 |
if use_intent and "intent_acc" in metric_list:
|
common/model_manager.py
CHANGED
@@ -2,11 +2,13 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-02-
|
6 |
Description: manage all process of model training and prediction.
|
7 |
|
8 |
'''
|
|
|
9 |
import os
|
|
|
10 |
import random
|
11 |
|
12 |
import numpy as np
|
@@ -18,11 +20,15 @@ from common import utils
|
|
18 |
from common.loader import DataFactory
|
19 |
from common.logger import Logger
|
20 |
from common.metric import Evaluator
|
|
|
21 |
from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
|
22 |
from common.utils import InputData, instantiate
|
23 |
from common.utils import OutputData
|
24 |
from common.config import Config
|
25 |
import dill
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
class ModelManager(object):
|
@@ -33,45 +39,101 @@ class ModelManager(object):
|
|
33 |
config (Config): configuration to manage all process in OpenSLU
|
34 |
"""
|
35 |
# init config
|
|
|
36 |
self.config = config
|
37 |
self.__set_seed(self.config.base.get("seed"))
|
38 |
self.device = self.config.base.get("device")
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
# enable accelerator
|
41 |
if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
|
42 |
from accelerate import Accelerator
|
43 |
-
self.accelerator = Accelerator(log_with=
|
44 |
else:
|
45 |
self.accelerator = None
|
|
|
|
|
46 |
if self.config.base.get("train"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
self.tokenizer = get_tokenizer(
|
48 |
self.config.tokenizer.get("_tokenizer_name_"))
|
49 |
-
self.
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
# init dataloader & load data
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
if not os.path.exists("save/"):
|
57 |
-
os.mkdir("save/")
|
58 |
-
self.model_save_dir = "save/" + config.start_time
|
59 |
-
if not os.path.exists(self.model_save_dir):
|
60 |
-
os.mkdir(self.model_save_dir)
|
61 |
-
batch_size = self.config.base["batch_size"]
|
62 |
-
df = DataFactory(tokenizer=self.tokenizer,
|
63 |
-
use_multi_intent=self.config.base.get("multi_intent"),
|
64 |
-
to_lower_case=self.config.base.get("_to_lower_case_"))
|
65 |
-
train_dataset = df.load_dataset(self.config.dataset, split="train")
|
66 |
|
67 |
-
# update label and vocabulary
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
|
74 |
-
self.train_dataloader = df.get_data_loader(train_dataset,
|
75 |
batch_size,
|
76 |
shuffle=True,
|
77 |
device=self.device,
|
@@ -80,9 +142,9 @@ class ModelManager(object):
|
|
80 |
"_align_mode_"),
|
81 |
label2tensor=True,
|
82 |
**tokenizer_config)
|
83 |
-
|
84 |
-
|
85 |
-
self.dev_dataloader =
|
86 |
batch_size,
|
87 |
shuffle=False,
|
88 |
device=self.device,
|
@@ -91,16 +153,22 @@ class ModelManager(object):
|
|
91 |
"_align_mode_"),
|
92 |
label2tensor=False,
|
93 |
**tokenizer_config)
|
94 |
-
|
|
|
|
|
|
|
|
|
95 |
# add intent label num and slot label num to config
|
96 |
-
if
|
97 |
-
self.intent_list =
|
98 |
-
self.intent_dict =
|
99 |
self.config.set_intent_label_num(len(self.intent_list))
|
100 |
-
|
101 |
-
self.
|
|
|
102 |
self.config.set_slot_label_num(len(self.slot_list))
|
103 |
-
|
|
|
104 |
|
105 |
# autoload embedding for non-pretrained encoder
|
106 |
if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
|
@@ -114,19 +182,13 @@ class ModelManager(object):
|
|
114 |
self.config.autoload_template()
|
115 |
# save config
|
116 |
self.logger.set_config(self.config)
|
117 |
-
|
118 |
-
self.
|
119 |
-
self.
|
120 |
-
|
121 |
-
self.lr_scheduler = None
|
122 |
-
if self.config.tokenizer.get("_tokenizer_name_") == "word_tokenizer":
|
123 |
-
self.tokenizer.save(os.path.join(self.model_save_dir, "tokenizer.json"))
|
124 |
-
utils.save_json(os.path.join(
|
125 |
-
self.model_save_dir, "label.json"), {"intent": self.intent_list,"slot": self.slot_list})
|
126 |
if self.config.base.get("test"):
|
127 |
-
self.test_dataset =
|
128 |
-
|
129 |
-
self.test_dataloader = df.get_data_loader(self.test_dataset,
|
130 |
batch_size,
|
131 |
shuffle=False,
|
132 |
device=self.device,
|
@@ -136,30 +198,6 @@ class ModelManager(object):
|
|
136 |
label2tensor=False,
|
137 |
**tokenizer_config)
|
138 |
|
139 |
-
def init_model(self, model):
|
140 |
-
"""init model, optimizer, lr_scheduler
|
141 |
-
|
142 |
-
Args:
|
143 |
-
model (Any): pytorch model
|
144 |
-
"""
|
145 |
-
self.model = model
|
146 |
-
self.model.to(self.device)
|
147 |
-
if self.config.base.get("train"):
|
148 |
-
self.optimizer = instantiate(
|
149 |
-
self.config["optimizer"])(self.model.parameters())
|
150 |
-
self.total_step = int(self.config.base.get(
|
151 |
-
"epoch_num")) * len(self.train_dataloader)
|
152 |
-
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
153 |
-
optimizer=self.optimizer,
|
154 |
-
num_training_steps=self.total_step
|
155 |
-
)
|
156 |
-
if self.accelerator is not None:
|
157 |
-
self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
|
158 |
-
self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
|
159 |
-
if self.config.base.get("load_dir_path"):
|
160 |
-
self.accelerator.load_state(self.config.base.get("load_dir_path"))
|
161 |
-
# self.dev_dataloader = self.accelerator.prepare(self.dev_dataloader)
|
162 |
-
|
163 |
def eval(self, step: int, best_metric: float) -> float:
|
164 |
""" evaluation models.
|
165 |
|
@@ -171,31 +209,21 @@ class ModelManager(object):
|
|
171 |
float: updated best metric value
|
172 |
"""
|
173 |
# TODO: save dev
|
174 |
-
_, res = self.__evaluate(self.model, self.dev_dataloader)
|
175 |
self.logger.log_metric(res, metric_split="dev", step=step)
|
176 |
-
if res[self.config.
|
177 |
-
best_metric = res[self.config.
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
torch.save(step, os.path.join(
|
190 |
-
self.model_save_dir, "step.pkl"))
|
191 |
-
else:
|
192 |
-
self.accelerator.wait_for_everyone()
|
193 |
-
unwrapped_model = self.accelerator.unwrap_model(self.model)
|
194 |
-
self.accelerator.save(unwrapped_model.state_dict(
|
195 |
-
), os.path.join(self.model_save_dir, "model.pkl"))
|
196 |
-
self.accelerator.save_state(output_dir=self.model_save_dir)
|
197 |
-
outputs.save(self.model_save_dir, self.test_dataset)
|
198 |
-
self.logger.log_metric(test_res, metric_split="test", step=step)
|
199 |
return best_metric
|
200 |
|
201 |
def train(self) -> float:
|
@@ -204,9 +232,23 @@ class ModelManager(object):
|
|
204 |
Returns:
|
205 |
float: updated best metric value
|
206 |
"""
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
progress_bar = tqdm(range(self.total_step))
|
|
|
|
|
210 |
for _ in range(int(self.config.base.get("epoch_num"))):
|
211 |
for data in self.train_dataloader:
|
212 |
if step == 0:
|
@@ -230,16 +272,25 @@ class ModelManager(object):
|
|
230 |
loss.backward()
|
231 |
self.optimizer.step()
|
232 |
self.lr_scheduler.step()
|
233 |
-
|
234 |
-
|
235 |
-
best_metric
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
step += 1
|
237 |
progress_bar.update(1)
|
238 |
-
if self.config.
|
239 |
-
best_metric = self.eval(step, best_metric)
|
240 |
self.logger.finish()
|
241 |
-
return best_metric
|
242 |
|
|
|
|
|
|
|
243 |
def __set_seed(self, seed_value: int):
|
244 |
"""Manually set random seeds.
|
245 |
|
@@ -258,7 +309,7 @@ class ModelManager(object):
|
|
258 |
torch.backends.cudnn.benchmark = True
|
259 |
return
|
260 |
|
261 |
-
def __evaluate(self, model, dataloader):
|
262 |
model.eval()
|
263 |
inps = InputData()
|
264 |
outputs = OutputData()
|
@@ -272,52 +323,97 @@ class ModelManager(object):
|
|
272 |
|
273 |
decode_output.map_output(slot_map=self.slot_list,
|
274 |
intent_map=self.intent_list)
|
275 |
-
|
276 |
-
data, decode_output
|
|
|
277 |
|
278 |
inps.merge_input_data(data)
|
279 |
outputs.merge_output_data(decode_output)
|
280 |
-
if "metric" in self.config:
|
281 |
res = Evaluator.compute_all_metric(
|
282 |
-
inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.metric)
|
283 |
else:
|
284 |
res = Evaluator.compute_all_metric(
|
285 |
inps, outputs, intent_label_map=self.intent_dict)
|
|
|
286 |
model.train()
|
287 |
return outputs, res
|
288 |
|
289 |
def load(self):
|
290 |
-
|
291 |
-
if self.
|
292 |
-
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
else:
|
295 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
self.model.to(self.device)
|
297 |
-
label =
|
|
|
298 |
self.intent_list = label["intent"]
|
299 |
self.slot_list = label["slot"]
|
300 |
-
self.
|
301 |
-
|
302 |
-
to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
|
303 |
|
304 |
def predict(self, text_data):
|
305 |
self.model.eval()
|
306 |
tokenizer_config = {key: self.config.tokenizer[key]
|
307 |
-
|
308 |
align_mode = self.config.tokenizer.get("_align_mode_")
|
309 |
inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
output = self.model(inputs)
|
316 |
decode_output = self.model.decode(output, inputs)
|
317 |
decode_output.map_output(slot_map=self.slot_list,
|
318 |
-
|
319 |
if self.config.base.get("multi_intent"):
|
320 |
intent = decode_output.intent_ids[0]
|
321 |
else:
|
322 |
intent = [decode_output.intent_ids[0]]
|
323 |
-
|
|
|
|
|
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-19 18:50:11
|
6 |
Description: manage all process of model training and prediction.
|
7 |
|
8 |
'''
|
9 |
+
import math
|
10 |
import os
|
11 |
+
import queue
|
12 |
import random
|
13 |
|
14 |
import numpy as np
|
|
|
20 |
from common.loader import DataFactory
|
21 |
from common.logger import Logger
|
22 |
from common.metric import Evaluator
|
23 |
+
from common.saver import Saver
|
24 |
from common.tokenizer import get_tokenizer, get_tokenizer_class, load_embedding
|
25 |
from common.utils import InputData, instantiate
|
26 |
from common.utils import OutputData
|
27 |
from common.config import Config
|
28 |
import dill
|
29 |
+
from common import global_pool
|
30 |
+
from tools.load_from_hugging_face import PreTrainedTokenizerForSLU, PretrainedModelForSLU
|
31 |
+
# from tools.hugging_face_parser import load_model, load_tokenizer
|
32 |
|
33 |
|
34 |
class ModelManager(object):
|
|
|
39 |
config (Config): configuration to manage all process in OpenSLU
|
40 |
"""
|
41 |
# init config
|
42 |
+
global_pool._init()
|
43 |
self.config = config
|
44 |
self.__set_seed(self.config.base.get("seed"))
|
45 |
self.device = self.config.base.get("device")
|
46 |
+
self.load_dir = self.config.model_manager.get("load_dir")
|
47 |
+
if self.config.get("logger") and self.config["logger"].get("logger_type"):
|
48 |
+
logger_type = self.config["logger"].get("logger_type")
|
49 |
+
else:
|
50 |
+
logger_type = "wandb"
|
51 |
# enable accelerator
|
52 |
if "accelerator" in self.config and self.config["accelerator"].get("use_accelerator"):
|
53 |
from accelerate import Accelerator
|
54 |
+
self.accelerator = Accelerator(log_with=logger_type)
|
55 |
else:
|
56 |
self.accelerator = None
|
57 |
+
self.tokenizer = None
|
58 |
+
self.saver = Saver(self.config.model_manager, start_time=self.config.start_time)
|
59 |
if self.config.base.get("train"):
|
60 |
+
self.model = None
|
61 |
+
self.optimizer = None
|
62 |
+
self.total_step = None
|
63 |
+
self.lr_scheduler = None
|
64 |
+
self.init_step = 0
|
65 |
+
self.best_metric = 0
|
66 |
+
self.logger = Logger(logger_type=logger_type,
|
67 |
+
logger_name=self.config.base["name"],
|
68 |
+
start_time=self.config.start_time,
|
69 |
+
accelerator=self.accelerator)
|
70 |
+
global_pool.set_value("logger", self.logger)
|
71 |
+
|
72 |
+
def init_model(self):
|
73 |
+
"""init model, optimizer, lr_scheduler
|
74 |
+
|
75 |
+
Args:
|
76 |
+
model (Any): pytorch model
|
77 |
+
"""
|
78 |
+
self.prepared = False
|
79 |
+
if self.load_dir is not None:
|
80 |
+
self.load()
|
81 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
82 |
+
self.init_data()
|
83 |
+
if self.config.base.get("train") and self.config.model_manager.get("load_train_state"):
|
84 |
+
train_state = torch.load(os.path.join(
|
85 |
+
self.load_dir, "train_state.pkl"), pickle_module=dill)
|
86 |
+
self.optimizer = instantiate(
|
87 |
+
self.config["optimizer"])(self.model.parameters())
|
88 |
+
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
89 |
+
optimizer=self.optimizer,
|
90 |
+
num_training_steps=self.total_step
|
91 |
+
)
|
92 |
+
self.optimizer.load_state_dict(train_state["optimizer"])
|
93 |
+
self.optimizer.zero_grad()
|
94 |
+
self.lr_scheduler.load_state_dict(train_state["lr_scheduler"])
|
95 |
+
self.init_step = train_state["step"]
|
96 |
+
self.best_metric = train_state["best_metric"]
|
97 |
+
elif self.config.model.get("_from_pretrained_") and self.config.tokenizer.get("_from_pretrained_"):
|
98 |
+
self.from_pretrained()
|
99 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
100 |
+
self.init_data()
|
101 |
+
else:
|
102 |
self.tokenizer = get_tokenizer(
|
103 |
self.config.tokenizer.get("_tokenizer_name_"))
|
104 |
+
self.init_data()
|
105 |
+
self.model = instantiate(self.config.model)
|
106 |
+
self.model.to(self.device)
|
107 |
+
if self.config.base.get("train"):
|
108 |
+
self.optimizer = instantiate(
|
109 |
+
self.config["optimizer"])(self.model.parameters())
|
110 |
+
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
111 |
+
optimizer=self.optimizer,
|
112 |
+
num_training_steps=self.total_step
|
113 |
+
)
|
114 |
|
115 |
+
|
116 |
+
def init_data(self):
|
117 |
+
self.data_factory = DataFactory(tokenizer=self.tokenizer,
|
118 |
+
use_multi_intent=self.config.base.get("multi_intent"),
|
119 |
+
to_lower_case=self.config.tokenizer.get("_to_lower_case_"))
|
120 |
+
batch_size = self.config.base["batch_size"]
|
121 |
+
# init tokenizer config and dataloaders
|
122 |
+
tokenizer_config = {key: self.config.tokenizer[key]
|
123 |
+
for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
|
124 |
+
|
125 |
+
if self.config.base.get("train"):
|
126 |
# init dataloader & load data
|
127 |
+
|
128 |
+
|
129 |
+
train_dataset = self.data_factory.load_dataset(self.config.dataset, split="train")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
# update label and vocabulary (ONLY SUPPORT FOR "word_tokenizer")
|
132 |
+
self.data_factory.update_label_names(train_dataset)
|
133 |
+
self.data_factory.update_vocabulary(train_dataset)
|
134 |
|
135 |
+
|
136 |
+
self.train_dataloader = self.data_factory.get_data_loader(train_dataset,
|
|
|
|
|
137 |
batch_size,
|
138 |
shuffle=True,
|
139 |
device=self.device,
|
|
|
142 |
"_align_mode_"),
|
143 |
label2tensor=True,
|
144 |
**tokenizer_config)
|
145 |
+
self.total_step = int(self.config.base.get("epoch_num")) * len(self.train_dataloader)
|
146 |
+
dev_dataset = self.data_factory.load_dataset(self.config.dataset, split="validation")
|
147 |
+
self.dev_dataloader = self.data_factory.get_data_loader(dev_dataset,
|
148 |
batch_size,
|
149 |
shuffle=False,
|
150 |
device=self.device,
|
|
|
153 |
"_align_mode_"),
|
154 |
label2tensor=False,
|
155 |
**tokenizer_config)
|
156 |
+
self.data_factory.update_vocabulary(dev_dataset)
|
157 |
+
self.intent_list = None
|
158 |
+
self.intent_dict = None
|
159 |
+
self.slot_list = None
|
160 |
+
self.slot_dict = None
|
161 |
# add intent label num and slot label num to config
|
162 |
+
if self.config.model["decoder"].get("intent_classifier") and int(self.config.get_intent_label_num()) == 0:
|
163 |
+
self.intent_list = self.data_factory.intent_label_list
|
164 |
+
self.intent_dict = self.data_factory.intent_label_dict
|
165 |
self.config.set_intent_label_num(len(self.intent_list))
|
166 |
+
if self.config.model["decoder"].get("slot_classifier") and int(self.config.get_slot_label_num()) == 0:
|
167 |
+
self.slot_list = self.data_factory.slot_label_list
|
168 |
+
self.slot_dict = self.data_factory.slot_label_dict
|
169 |
self.config.set_slot_label_num(len(self.slot_list))
|
170 |
+
|
171 |
+
|
172 |
|
173 |
# autoload embedding for non-pretrained encoder
|
174 |
if self.config["model"]["encoder"].get("embedding") and self.config["model"]["encoder"]["embedding"].get(
|
|
|
182 |
self.config.autoload_template()
|
183 |
# save config
|
184 |
self.logger.set_config(self.config)
|
185 |
+
self.saver.save_tokenizer(self.tokenizer)
|
186 |
+
self.saver.save_label(self.intent_list, self.slot_list)
|
187 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
188 |
+
|
|
|
|
|
|
|
|
|
|
|
189 |
if self.config.base.get("test"):
|
190 |
+
self.test_dataset = self.data_factory.load_dataset(self.config.dataset, split="test")
|
191 |
+
self.test_dataloader = self.data_factory.get_data_loader(self.test_dataset,
|
|
|
192 |
batch_size,
|
193 |
shuffle=False,
|
194 |
device=self.device,
|
|
|
198 |
label2tensor=False,
|
199 |
**tokenizer_config)
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
def eval(self, step: int, best_metric: float) -> float:
|
202 |
""" evaluation models.
|
203 |
|
|
|
209 |
float: updated best metric value
|
210 |
"""
|
211 |
# TODO: save dev
|
212 |
+
_, res = self.__evaluate(self.model, self.dev_dataloader, mode="dev")
|
213 |
self.logger.log_metric(res, metric_split="dev", step=step)
|
214 |
+
if res[self.config.evaluator.get("best_key")] > best_metric:
|
215 |
+
best_metric = res[self.config.evaluator.get("best_key")]
|
216 |
+
train_state = {
|
217 |
+
"step": step,
|
218 |
+
"best_metric": best_metric,
|
219 |
+
"optimizer": self.optimizer.state_dict(),
|
220 |
+
"lr_scheduler": self.lr_scheduler.state_dict()
|
221 |
+
}
|
222 |
+
self.saver.save_model(self.model, train_state, self.accelerator)
|
223 |
+
if self.config.base.get("test"):
|
224 |
+
outputs, test_res = self.__evaluate(self.model, self.test_dataloader, mode="test")
|
225 |
+
self.saver.save_output(outputs, self.test_dataset)
|
226 |
+
self.logger.log_metric(test_res, metric_split="test", step=step)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
return best_metric
|
228 |
|
229 |
def train(self) -> float:
|
|
|
232 |
Returns:
|
233 |
float: updated best metric value
|
234 |
"""
|
235 |
+
self.model.train()
|
236 |
+
if self.accelerator is not None:
|
237 |
+
self.total_step = math.ceil(self.total_step / self.accelerator.num_processes)
|
238 |
+
if self.optimizer is None:
|
239 |
+
self.optimizer = instantiate(self.config["optimizer"])(self.model.parameters())
|
240 |
+
if self.lr_scheduler is None:
|
241 |
+
self.lr_scheduler = instantiate(self.config["scheduler"])(
|
242 |
+
optimizer=self.optimizer,
|
243 |
+
num_training_steps=self.total_step
|
244 |
+
)
|
245 |
+
if not self.prepared and self.accelerator is not None:
|
246 |
+
self.model, self.optimizer, self.train_dataloader, self.lr_scheduler = self.accelerator.prepare(
|
247 |
+
self.model, self.optimizer, self.train_dataloader, self.lr_scheduler)
|
248 |
+
step = self.init_step
|
249 |
progress_bar = tqdm(range(self.total_step))
|
250 |
+
progress_bar.update(self.init_step)
|
251 |
+
self.optimizer.zero_grad()
|
252 |
for _ in range(int(self.config.base.get("epoch_num"))):
|
253 |
for data in self.train_dataloader:
|
254 |
if step == 0:
|
|
|
272 |
loss.backward()
|
273 |
self.optimizer.step()
|
274 |
self.lr_scheduler.step()
|
275 |
+
train_state = {
|
276 |
+
"step": step,
|
277 |
+
"best_metric": self.best_metric,
|
278 |
+
"optimizer": self.optimizer.state_dict(),
|
279 |
+
"lr_scheduler": self.lr_scheduler.state_dict()
|
280 |
+
}
|
281 |
+
if not self.saver.auto_save_step(self.model, train_state, self.accelerator):
|
282 |
+
if not self.config.evaluator.get("eval_by_epoch") and step % self.config.evaluator.get("eval_step") == 0 and step != 0:
|
283 |
+
self.best_metric = self.eval(step, self.best_metric)
|
284 |
step += 1
|
285 |
progress_bar.update(1)
|
286 |
+
if self.config.evaluator.get("eval_by_epoch"):
|
287 |
+
self.best_metric = self.eval(step, self.best_metric)
|
288 |
self.logger.finish()
|
289 |
+
return self.best_metric
|
290 |
|
291 |
+
def test(self):
|
292 |
+
return self.__evaluate(self.model, self.test_dataloader, mode="test")
|
293 |
+
|
294 |
def __set_seed(self, seed_value: int):
|
295 |
"""Manually set random seeds.
|
296 |
|
|
|
309 |
torch.backends.cudnn.benchmark = True
|
310 |
return
|
311 |
|
312 |
+
def __evaluate(self, model, dataloader, mode="dev"):
|
313 |
model.eval()
|
314 |
inps = InputData()
|
315 |
outputs = OutputData()
|
|
|
323 |
|
324 |
decode_output.map_output(slot_map=self.slot_list,
|
325 |
intent_map=self.intent_list)
|
326 |
+
if self.config.model["decoder"].get("slot_classifier"):
|
327 |
+
data, decode_output = utils.remove_slot_ignore_index(
|
328 |
+
data, decode_output, ignore_index="#")
|
329 |
|
330 |
inps.merge_input_data(data)
|
331 |
outputs.merge_output_data(decode_output)
|
332 |
+
if "metric" in self.config.evaluator:
|
333 |
res = Evaluator.compute_all_metric(
|
334 |
+
inps, outputs, intent_label_map=self.intent_dict, metric_list=self.config.evaluator["metric"])
|
335 |
else:
|
336 |
res = Evaluator.compute_all_metric(
|
337 |
inps, outputs, intent_label_map=self.intent_dict)
|
338 |
+
self.logger.info(f"Best {mode} metric: "+str(res))
|
339 |
model.train()
|
340 |
return outputs, res
|
341 |
|
342 |
def load(self):
|
343 |
+
|
344 |
+
if self.tokenizer is None:
|
345 |
+
with open(os.path.join(self.load_dir, "tokenizer.pkl"), 'rb') as f:
|
346 |
+
self.tokenizer = dill.load(f)
|
347 |
+
label = utils.load_json(os.path.join(self.load_dir, "label.json"))
|
348 |
+
if label["intent"] is None:
|
349 |
+
self.intent_list = None
|
350 |
+
self.intent_dict = None
|
351 |
+
else:
|
352 |
+
self.intent_list = label["intent"]
|
353 |
+
self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
|
354 |
+
self.config.set_intent_label_num(len(self.intent_list))
|
355 |
+
if label["slot"] is None:
|
356 |
+
self.slot_list = None
|
357 |
+
self.slot_dict = None
|
358 |
+
else:
|
359 |
+
self.slot_list = label["slot"]
|
360 |
+
self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
|
361 |
+
self.config.set_slot_label_num(len(self.slot_list))
|
362 |
+
self.config.set_vocab_size(self.tokenizer.vocab_size)
|
363 |
+
if self.accelerator is not None and self.load_dir is not None:
|
364 |
+
self.model = torch.load(os.path.join(self.load_dir, "model.pkl"), map_location=torch.device(self.device))
|
365 |
+
self.prepared = True
|
366 |
+
self.accelerator.load_state(self.load_dir)
|
367 |
+
self.accelerator.prepare_model(self.model)
|
368 |
else:
|
369 |
+
self.model = torch.load(os.path.join(
|
370 |
+
self.load_dir, "model.pkl"), map_location=torch.device(self.device))
|
371 |
+
# if self.config.tokenizer["_tokenizer_name_"] == "word_tokenizer":
|
372 |
+
# self.tokenizer = get_tokenizer_class(self.config.tokenizer["_tokenizer_name_"]).from_file(os.path.join(self.load_dir, "tokenizer.json"))
|
373 |
+
# else:
|
374 |
+
# self.tokenizer = get_tokenizer(self.config.tokenizer["_tokenizer_name_"])
|
375 |
+
self.model.to(self.device)
|
376 |
+
|
377 |
+
|
378 |
+
def from_pretrained(self):
|
379 |
+
self.config.autoload_template()
|
380 |
+
model = PretrainedModelForSLU.from_pretrained(self.config.model["_from_pretrained_"])
|
381 |
+
# model = load_model(self.config.model["_from_pretrained_"])
|
382 |
+
self.model = model.model
|
383 |
+
if self.tokenizer is None:
|
384 |
+
self.tokenizer = PreTrainedTokenizerForSLU.from_pretrained(
|
385 |
+
self.config.tokenizer["_from_pretrained_"])
|
386 |
+
self.config.tokenizer = model.config.tokenizer
|
387 |
+
# self.tokenizer = load_tokenizer(self.config.tokenizer["_from_pretrained_"])
|
388 |
+
|
389 |
self.model.to(self.device)
|
390 |
+
label = model.config._id2label
|
391 |
+
self.config.model = model.config.model
|
392 |
self.intent_list = label["intent"]
|
393 |
self.slot_list = label["slot"]
|
394 |
+
self.intent_dict = {x: i for i, x in enumerate(label["intent"])}
|
395 |
+
self.slot_dict = {x: i for i, x in enumerate(label["slot"])}
|
|
|
396 |
|
397 |
def predict(self, text_data):
|
398 |
self.model.eval()
|
399 |
tokenizer_config = {key: self.config.tokenizer[key]
|
400 |
+
for key in self.config.tokenizer if key[0] != "_" and key[-1] != "_"}
|
401 |
align_mode = self.config.tokenizer.get("_align_mode_")
|
402 |
inputs = self.data_factory.batch_fn(batch=[{"text": text_data.split(" ")}],
|
403 |
+
device=self.device,
|
404 |
+
config=tokenizer_config,
|
405 |
+
enable_label=False,
|
406 |
+
align_mode=align_mode if align_mode is not None else "general",
|
407 |
+
label2tensor=False)
|
408 |
output = self.model(inputs)
|
409 |
decode_output = self.model.decode(output, inputs)
|
410 |
decode_output.map_output(slot_map=self.slot_list,
|
411 |
+
intent_map=self.intent_list)
|
412 |
if self.config.base.get("multi_intent"):
|
413 |
intent = decode_output.intent_ids[0]
|
414 |
else:
|
415 |
intent = [decode_output.intent_ids[0]]
|
416 |
+
input_ids = inputs.input_ids[0].tolist()
|
417 |
+
tokens = [self.tokenizer.decode(ids) for ids in input_ids]
|
418 |
+
slots = decode_output.slot_ids[0]
|
419 |
+
return {"intent": intent, "slot": slots, "text": tokens}
|
common/saver.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Author: Qiguang Chen
|
3 |
+
LastEditors: Qiguang Chen
|
4 |
+
Date: 2023-02-12 22:23:58
|
5 |
+
LastEditTime: 2023-02-19 14:14:56
|
6 |
+
Description:
|
7 |
+
|
8 |
+
'''
|
9 |
+
import json
|
10 |
+
import os
|
11 |
+
import queue
|
12 |
+
import shutil
|
13 |
+
import torch
|
14 |
+
import dill
|
15 |
+
from common import utils
|
16 |
+
|
17 |
+
|
18 |
+
class Saver():
|
19 |
+
def __init__(self, config, start_time=None) -> None:
|
20 |
+
self.config = config
|
21 |
+
if self.config.get("save_dir"):
|
22 |
+
self.model_save_dir = self.config["save_dir"]
|
23 |
+
else:
|
24 |
+
if not os.path.exists("save/"):
|
25 |
+
os.mkdir("save/")
|
26 |
+
self.model_save_dir = "save/" + start_time
|
27 |
+
if not os.path.exists(self.model_save_dir):
|
28 |
+
os.mkdir(self.model_save_dir)
|
29 |
+
save_mode = config.get("save_mode")
|
30 |
+
self.save_mode = save_mode if save_mode is not None else "save-by-eval"
|
31 |
+
|
32 |
+
max_save_num = self.config.get("max_save_num")
|
33 |
+
self.max_save_num = max_save_num if max_save_num is not None else 1
|
34 |
+
self.save_pool = queue.Queue(maxsize=max_save_num)
|
35 |
+
|
36 |
+
def save_tokenizer(self, tokenizer):
|
37 |
+
with open(os.path.join(self.model_save_dir, "tokenizer.pkl"), 'wb') as f:
|
38 |
+
dill.dump(tokenizer, f)
|
39 |
+
|
40 |
+
def save_label(self, intent_list, slot_list):
|
41 |
+
utils.save_json(os.path.join(self.model_save_dir, "label.json"), {"intent": intent_list, "slot": slot_list})
|
42 |
+
|
43 |
+
|
44 |
+
def save_model(self, model, train_state, accelerator=None):
|
45 |
+
step = train_state["step"]
|
46 |
+
if self.max_save_num != 1:
|
47 |
+
|
48 |
+
model_save_dir =os.path.join(self.model_save_dir, str(step))
|
49 |
+
if self.save_pool.full():
|
50 |
+
delete_dir = self.save_pool.get()
|
51 |
+
shutil.rmtree(delete_dir)
|
52 |
+
self.save_pool.put(model_save_dir)
|
53 |
+
else:
|
54 |
+
self.save_pool.put(model_save_dir)
|
55 |
+
if not os.path.exists(model_save_dir):
|
56 |
+
os.mkdir(model_save_dir)
|
57 |
+
else:
|
58 |
+
model_save_dir = self.model_save_dir
|
59 |
+
if not os.path.exists(model_save_dir):
|
60 |
+
os.mkdir(model_save_dir)
|
61 |
+
if accelerator is None:
|
62 |
+
torch.save(model, os.path.join(model_save_dir, "model.pkl"))
|
63 |
+
torch.save(train_state, os.path.join(model_save_dir, "train_state.pkl"), pickle_module=dill)
|
64 |
+
else:
|
65 |
+
accelerator.wait_for_everyone()
|
66 |
+
unwrapped_model = accelerator.unwrap_model(model)
|
67 |
+
accelerator.save(unwrapped_model, os.path.join(model_save_dir, "model.pkl"))
|
68 |
+
accelerator.save_state(output_dir=model_save_dir)
|
69 |
+
|
70 |
+
def auto_save_step(self, model, train_state, accelerator=None):
|
71 |
+
step = train_state["step"]
|
72 |
+
if self.save_mode == "save-by-step" and step % self.config.get("save_step")==0 and step != 0:
|
73 |
+
self.save_model(model, train_state, accelerator)
|
74 |
+
return True
|
75 |
+
else:
|
76 |
+
return False
|
77 |
+
|
78 |
+
|
79 |
+
def save_output(self, outputs, dataset):
|
80 |
+
outputs.save(self.model_save_dir, dataset)
|
common/tokenizer.py
CHANGED
@@ -103,7 +103,7 @@ class WordTokenizer(object):
|
|
103 |
assert isinstance(instance, str)
|
104 |
|
105 |
# count the frequency of instances.
|
106 |
-
self.counter[instance] += 1
|
107 |
|
108 |
if instance not in self.index2instance:
|
109 |
self.instance2index[instance] = len(self.index2instance)
|
@@ -190,6 +190,18 @@ class WordTokenizer(object):
|
|
190 |
index = index.tolist()
|
191 |
return self.decode(index)
|
192 |
return self.index2instance[index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
def save(self, path):
|
195 |
""" Save the content of alphabet to files.
|
@@ -214,7 +226,7 @@ class WordTokenizer(object):
|
|
214 |
obj = json.load(fw)
|
215 |
tokenizer = WordTokenizer(obj["name"])
|
216 |
tokenizer.instance2index = OrderedDict(obj["token_map"])
|
217 |
-
tokenizer.counter = len(tokenizer.instance2index)
|
218 |
tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
|
219 |
return tokenizer
|
220 |
|
|
|
103 |
assert isinstance(instance, str)
|
104 |
|
105 |
# count the frequency of instances.
|
106 |
+
# self.counter[instance] += 1
|
107 |
|
108 |
if instance not in self.index2instance:
|
109 |
self.instance2index[instance] = len(self.index2instance)
|
|
|
190 |
index = index.tolist()
|
191 |
return self.decode(index)
|
192 |
return self.index2instance[index]
|
193 |
+
|
194 |
+
def decode_batch(self, index, **kargs):
|
195 |
+
""" Get corresponding instance of query index.
|
196 |
+
|
197 |
+
if index is invalid, then throws exception.
|
198 |
+
|
199 |
+
Args:
|
200 |
+
index (int): is query index, possibly iterable.
|
201 |
+
Returns:
|
202 |
+
is corresponding instance.
|
203 |
+
"""
|
204 |
+
return self.decode(index)
|
205 |
|
206 |
def save(self, path):
|
207 |
""" Save the content of alphabet to files.
|
|
|
226 |
obj = json.load(fw)
|
227 |
tokenizer = WordTokenizer(obj["name"])
|
228 |
tokenizer.instance2index = OrderedDict(obj["token_map"])
|
229 |
+
# tokenizer.counter = len(tokenizer.instance2index)
|
230 |
tokenizer.index2instance = OrderedSet(tokenizer.instance2index.keys())
|
231 |
return tokenizer
|
232 |
|
common/utils.py
CHANGED
@@ -12,7 +12,7 @@ import torch
|
|
12 |
from torch.nn.utils.rnn import pad_sequence
|
13 |
from tqdm import tqdm
|
14 |
from torch import Tensor
|
15 |
-
|
16 |
class InputData():
|
17 |
"""input datas class
|
18 |
"""
|
@@ -486,4 +486,14 @@ def save_json(file_path, obj):
|
|
486 |
def load_json(file_path):
|
487 |
with open(file_path, 'r', encoding="utf8") as fw:
|
488 |
res =json.load(fw)
|
489 |
-
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from torch.nn.utils.rnn import pad_sequence
|
13 |
from tqdm import tqdm
|
14 |
from torch import Tensor
|
15 |
+
import argparse
|
16 |
class InputData():
|
17 |
"""input datas class
|
18 |
"""
|
|
|
486 |
def load_json(file_path):
|
487 |
with open(file_path, 'r', encoding="utf8") as fw:
|
488 |
res =json.load(fw)
|
489 |
+
return res
|
490 |
+
|
491 |
+
def str2bool(v):
|
492 |
+
if isinstance(v, bool):
|
493 |
+
return v
|
494 |
+
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
495 |
+
return True
|
496 |
+
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
497 |
+
return False
|
498 |
+
else:
|
499 |
+
raise argparse.ArgumentTypeError('Boolean value expected.')
|
config/README.md
ADDED
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Configuation
|
2 |
+
|
3 |
+
## 1. Introduction
|
4 |
+
|
5 |
+
Configuration is divided into fine-grained reusable modules:
|
6 |
+
|
7 |
+
- `base`: basic configuration
|
8 |
+
- `logger`: logger setting
|
9 |
+
- `model_manager`: loading and saving model parameters
|
10 |
+
- `accelerator`: whether to enable multi-GPU
|
11 |
+
- `dataset`: dataset management
|
12 |
+
- `evaluator`: evaluation and metrics setting.
|
13 |
+
- `tokenizer`: Tokenizer initiation and tokenizing setting.
|
14 |
+
- `optimizer`: Optimizer initiation setting.
|
15 |
+
- `scheduler`: scheduler initiation setting.
|
16 |
+
- `model`: model construction setting.
|
17 |
+
|
18 |
+
From Sec. 2 to Sec. 11, we will describe the configuration in detail. Or you can see [Examples](examples/README.md) for Quick Start.
|
19 |
+
|
20 |
+
NOTE: `_*_` config are reserved fields in OpenSLU.
|
21 |
+
|
22 |
+
## Configuration Item Script
|
23 |
+
In OpenSLU configuration, we support simple calculation script for each configuration item. For example, we can get `dataset_name` by using `{dataset.dataset_name}`, and fill its value into python script `'LightChen2333/agif-slu-' + '*'`.(Without '', `{dataset.dataset_name}` value will be treated as a variable).
|
24 |
+
|
25 |
+
NOTE: each item with `{}` will be treated as python script.
|
26 |
+
```yaml
|
27 |
+
tokenizer:
|
28 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
|
29 |
+
|
30 |
+
```
|
31 |
+
|
32 |
+
## `base` Config
|
33 |
+
```yaml
|
34 |
+
# `start_time` will generated automatically when start any config script, needless to be assigned.
|
35 |
+
# start_time: xxxxxxxx
|
36 |
+
base:
|
37 |
+
name: "OpenSLU" # project/logger name
|
38 |
+
multi_intent: false # whether to enable multi-intent setting
|
39 |
+
train: True # enable train else enable zero-shot
|
40 |
+
test: True # enable test during train.
|
41 |
+
device: cuda # device for cuda/cpu
|
42 |
+
seed: 42 # random seed
|
43 |
+
best_key: EMA # save model by which metric[intent_acc/slot_f1/EMA]
|
44 |
+
tokenizer_name: word_tokenizer # tokenizer: word_tokenizer for no pretrained model, else use [AutoTokenizer] tokenizer name
|
45 |
+
add_special_tokens: false # whether add [CLS], [SEP] special tokens
|
46 |
+
epoch_num: 300 # train epoch num
|
47 |
+
# eval_step: 280 # if eval_by_epoch = false and eval_step > 0, will evaluate model by steps
|
48 |
+
eval_by_epoch: true # evaluate model by epoch
|
49 |
+
batch_size: 16 # batch size
|
50 |
+
```
|
51 |
+
## `logger` Config
|
52 |
+
```yaml
|
53 |
+
logger:
|
54 |
+
# `wandb` is supported both in single- multi-GPU,
|
55 |
+
# `tensorboard` is only supported in multi-GPU,
|
56 |
+
# and `fitlog` is only supported in single-GPU
|
57 |
+
logger_type: wandb
|
58 |
+
```
|
59 |
+
## `model_manager` Config
|
60 |
+
```yaml
|
61 |
+
model_manager:
|
62 |
+
# if load_dir != `null`, OpenSLU will try to load checkpoint to continue training,
|
63 |
+
# if load_dir == `null`, OpenSLU will restart training.
|
64 |
+
load_dir: null
|
65 |
+
# The dir path to save model and training state.
|
66 |
+
# if save_dir == `null` model will be saved to `save/{start_time}`
|
67 |
+
save_dir: save/stack
|
68 |
+
# save_mode can be selected in [save-by-step, save-by-eval]
|
69 |
+
# `save-by-step` means save model only by {save_step} steps without evaluation.
|
70 |
+
# `save-by-eval` means save model by best validation performance
|
71 |
+
save_mode: save-by-eval
|
72 |
+
# save_step: 100 # only enabled when save_mode == `save-by-step`
|
73 |
+
max_save_num: 1 # The number of best models will be saved.
|
74 |
+
```
|
75 |
+
## `accelerator` Config
|
76 |
+
```yaml
|
77 |
+
accelerator:
|
78 |
+
use_accelerator: false # will enable `accelerator` if use_accelerator is `true`
|
79 |
+
```
|
80 |
+
## `dataset` Config
|
81 |
+
```yaml
|
82 |
+
dataset:
|
83 |
+
# support load model from hugging-face.
|
84 |
+
# dataset_name can be selected in [atis, snips, mix-atis, mix-snips]
|
85 |
+
dataset_name: atis
|
86 |
+
# support assign any one of dataset path and other dataset split is the same as split in `dataset_name`
|
87 |
+
# train: atis # support load model from hugging-face or assigned local data path.
|
88 |
+
# validation: {root}/ATIS/dev.jsonl
|
89 |
+
# test: {root}/ATIS/test.jsonl
|
90 |
+
```
|
91 |
+
## `evaluator` Config
|
92 |
+
```yaml
|
93 |
+
evaluator:
|
94 |
+
best_key: EMA # the metric to judge the best model
|
95 |
+
eval_by_epoch: true # Evaluate after an epoch if `true`.
|
96 |
+
# Evaluate after {eval_step} steps if eval_by_epoch == `false`.
|
97 |
+
# eval_step: 1800
|
98 |
+
# metric is supported the metric as below:
|
99 |
+
# - intent_acc
|
100 |
+
# - slot_f1
|
101 |
+
# - EMA
|
102 |
+
# - intent_f1
|
103 |
+
# - macro_intent_f1
|
104 |
+
# - micro_intent_f1
|
105 |
+
# NOTE: [intent_f1, macro_intent_f1, micro_intent_f1] is only supported in multi-intent setting. intent_f1 and macro_intent_f1 is the same metric.
|
106 |
+
metric:
|
107 |
+
- intent_acc
|
108 |
+
- slot_f1
|
109 |
+
- EMA
|
110 |
+
```
|
111 |
+
## `tokenizer` Config
|
112 |
+
```yaml
|
113 |
+
tokenizer:
|
114 |
+
# Init tokenizer. Support `word_tokenizer` and other tokenizers in huggingface.
|
115 |
+
_tokenizer_name_: word_tokenizer
|
116 |
+
# if `_tokenizer_name_` is not assigned, you can load pretrained tokenizer from hugging-face.
|
117 |
+
# _from_pretrained_: LightChen2333/stack-propagation-slu-atis
|
118 |
+
_padding_side_: right # the padding side of tokenizer, support [left/ right]
|
119 |
+
# Align mode between text and slot, support [fast/ general],
|
120 |
+
# `general` is supported in most tokenizer, `fast` is supported only in small portion of tokenizers.
|
121 |
+
_align_mode_: fast
|
122 |
+
_to_lower_case_: true
|
123 |
+
add_special_tokens: false # other tokenizer args, you can add other args to tokenizer initialization except `_*_` format args
|
124 |
+
max_length: 512
|
125 |
+
|
126 |
+
```
|
127 |
+
## `optimizer` Config
|
128 |
+
```yaml
|
129 |
+
optimizer:
|
130 |
+
_model_target_: torch.optim.Adam # Optimizer class/ function return Optimizer object
|
131 |
+
_model_partial_: true # partial load configuration. Here will add model.parameters() to complete all Optimizer parameters
|
132 |
+
lr: 0.001 # learning rate
|
133 |
+
weight_decay: 1e-6 # weight decay
|
134 |
+
```
|
135 |
+
## `scheduler` Config
|
136 |
+
```yaml
|
137 |
+
scheduler:
|
138 |
+
_model_target_: transformers.get_scheduler
|
139 |
+
_model_partial_: true # partial load configuration. Here will add optimizer, num_training_steps to complete all Optimizer parameters
|
140 |
+
name : "linear"
|
141 |
+
num_warmup_steps: 0
|
142 |
+
```
|
143 |
+
## `model` Config
|
144 |
+
```yaml
|
145 |
+
model:
|
146 |
+
# _from_pretrained_: LightChen2333/stack-propagation-slu-atis # load model from hugging-face and is not need to assigned any parameters below.
|
147 |
+
_model_target_: model.OpenSLUModel # the general model class, can automatically build the model through configuration.
|
148 |
+
|
149 |
+
encoder:
|
150 |
+
_model_target_: model.encoder.AutoEncoder # auto-encoder to autoload provided encoder model
|
151 |
+
encoder_name: self-attention-lstm # support [lstm/ self-attention-lstm] and other pretrained models those hugging-face supported
|
152 |
+
|
153 |
+
embedding: # word embedding layer
|
154 |
+
# load_embedding_name: glove.6B.300d.txt # support autoload glove embedding.
|
155 |
+
embedding_dim: 256 # embedding dim
|
156 |
+
dropout_rate: 0.5 # dropout ratio after embedding
|
157 |
+
|
158 |
+
lstm:
|
159 |
+
layer_num: 1 # lstm configuration
|
160 |
+
bidirectional: true
|
161 |
+
output_dim: 256 # module should set output_dim for autoload input_dim in next module. You can also set input_dim manually.
|
162 |
+
dropout_rate: 0.5
|
163 |
+
|
164 |
+
attention: # self-attention configuration
|
165 |
+
hidden_dim: 1024
|
166 |
+
output_dim: 128
|
167 |
+
dropout_rate: 0.5
|
168 |
+
|
169 |
+
return_with_input: true # add inputs information, like attention_mask, to decoder module.
|
170 |
+
return_sentence_level_hidden: false # if return sentence representation to decoder module
|
171 |
+
|
172 |
+
decoder:
|
173 |
+
_model_target_: model.decoder.StackPropagationDecoder # decoder name
|
174 |
+
interaction:
|
175 |
+
_model_target_: model.decoder.interaction.StackInteraction # interaction module name
|
176 |
+
differentiable: false # interaction module config
|
177 |
+
|
178 |
+
intent_classifier:
|
179 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier # intent classifier module name
|
180 |
+
layer_num: 1
|
181 |
+
bidirectional: false
|
182 |
+
hidden_dim: 64
|
183 |
+
force_ratio: 0.9 # teacher-force ratio
|
184 |
+
embedding_dim: 8 # intent embedding dim
|
185 |
+
ignore_index: -100 # ignore index to compute loss and metric
|
186 |
+
dropout_rate: 0.5
|
187 |
+
mode: "token-level-intent" # decode mode, support [token-level-intent, intent, slot]
|
188 |
+
use_multi: "{base.multi_intent}"
|
189 |
+
return_sentence_level: true # whether to return sentence level prediction as decoded input
|
190 |
+
|
191 |
+
slot_classifier:
|
192 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
193 |
+
layer_num: 1
|
194 |
+
bidirectional: false
|
195 |
+
force_ratio: 0.9
|
196 |
+
hidden_dim: 64
|
197 |
+
embedding_dim: 32
|
198 |
+
ignore_index: -100
|
199 |
+
dropout_rate: 0.5
|
200 |
+
mode: "slot"
|
201 |
+
use_multi: false
|
202 |
+
return_sentence_level: false
|
203 |
+
```
|
204 |
+
|
205 |
+
## Implementing a New Model
|
206 |
+
|
207 |
+
### 1. Interaction Re-Implement
|
208 |
+
Here we take `DCA-Net` as an example:
|
209 |
+
|
210 |
+
In most cases, you just need to rewrite `Interaction` module:
|
211 |
+
|
212 |
+
```python
|
213 |
+
from common.utils import HiddenData
|
214 |
+
from model.decoder.interaction import BaseInteraction
|
215 |
+
class DCANetInteraction(BaseInteraction):
|
216 |
+
def __init__(self, **config):
|
217 |
+
super().__init__(**config)
|
218 |
+
self.T_block1 = I_S_Block(self.config["output_dim"], self.config["attention_dropout"], self.config["num_attention_heads"])
|
219 |
+
...
|
220 |
+
|
221 |
+
def forward(self, encode_hidden: HiddenData, **kwargs):
|
222 |
+
...
|
223 |
+
```
|
224 |
+
|
225 |
+
and then you should configure your module:
|
226 |
+
```yaml
|
227 |
+
base:
|
228 |
+
...
|
229 |
+
|
230 |
+
optimizer:
|
231 |
+
...
|
232 |
+
|
233 |
+
scheduler:
|
234 |
+
...
|
235 |
+
|
236 |
+
model:
|
237 |
+
_model_target_: model.OpenSLUModel
|
238 |
+
encoder:
|
239 |
+
_model_target_: model.encoder.AutoEncoder
|
240 |
+
encoder_name: lstm
|
241 |
+
|
242 |
+
embedding:
|
243 |
+
load_embedding_name: glove.6B.300d.txt
|
244 |
+
embedding_dim: 300
|
245 |
+
dropout_rate: 0.5
|
246 |
+
|
247 |
+
lstm:
|
248 |
+
dropout_rate: 0.5
|
249 |
+
output_dim: 128
|
250 |
+
layer_num: 2
|
251 |
+
bidirectional: true
|
252 |
+
output_dim: "{model.encoder.lstm.output_dim}"
|
253 |
+
return_with_input: true
|
254 |
+
return_sentence_level_hidden: false
|
255 |
+
|
256 |
+
decoder:
|
257 |
+
_model_target_: model.decoder.DCANetDecoder
|
258 |
+
interaction:
|
259 |
+
_model_target_: model.decoder.interaction.DCANetInteraction
|
260 |
+
output_dim: "{model.encoder.output_dim}"
|
261 |
+
attention_dropout: 0.5
|
262 |
+
num_attention_heads: 8
|
263 |
+
|
264 |
+
intent_classifier:
|
265 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
266 |
+
mode: "intent"
|
267 |
+
input_dim: "{model.decoder.output_dim.output_dim}"
|
268 |
+
ignore_index: -100
|
269 |
+
|
270 |
+
slot_classifier:
|
271 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
272 |
+
mode: "slot"
|
273 |
+
input_dim: "{model.decoder.output_dim.output_dim}"
|
274 |
+
ignore_index: -100
|
275 |
+
```
|
276 |
+
|
277 |
+
Oops, you finish all model construction. You can run script as follows to train model:
|
278 |
+
```shell
|
279 |
+
python run.py -cp config/dca_net.yaml [-ds atis]
|
280 |
+
```
|
281 |
+
### 2. Decoder Re-Implement
|
282 |
+
Sometimes, `interaction then classification` order can not meet your needs. Therefore, you should simply rewrite decoder for flexible interaction order:
|
283 |
+
|
284 |
+
Here, we take `stack-propagation` as an example:
|
285 |
+
1. We should rewrite interaction module for `stack-propagation`
|
286 |
+
```python
|
287 |
+
from common.utils import ClassifierOutputData, HiddenData
|
288 |
+
from model.decoder.interaction.base_interaction import BaseInteraction
|
289 |
+
class StackInteraction(BaseInteraction):
|
290 |
+
def __init__(self, **config):
|
291 |
+
super().__init__(**config)
|
292 |
+
...
|
293 |
+
|
294 |
+
def forward(self, intent_output: ClassifierOutputData, encode_hidden: HiddenData):
|
295 |
+
...
|
296 |
+
```
|
297 |
+
2. We should rewrite `StackPropagationDecoder` for stack-propagation interaction order:
|
298 |
+
```python
|
299 |
+
from common.utils import HiddenData, OutputData
|
300 |
+
class StackPropagationDecoder(BaseDecoder):
|
301 |
+
|
302 |
+
def forward(self, hidden: HiddenData):
|
303 |
+
pred_intent = self.intent_classifier(hidden)
|
304 |
+
hidden = self.interaction(pred_intent, hidden)
|
305 |
+
pred_slot = self.slot_classifier(hidden)
|
306 |
+
return OutputData(pred_intent, pred_slot)
|
307 |
+
```
|
308 |
+
|
309 |
+
3. Then we can easily combine general model by `config/stack-propagation.yaml` configuration file:
|
310 |
+
```yaml
|
311 |
+
base:
|
312 |
+
...
|
313 |
+
|
314 |
+
...
|
315 |
+
|
316 |
+
model:
|
317 |
+
_model_target_: model.OpenSLUModel
|
318 |
+
|
319 |
+
encoder:
|
320 |
+
...
|
321 |
+
|
322 |
+
decoder:
|
323 |
+
_model_target_: model.decoder.StackPropagationDecoder
|
324 |
+
interaction:
|
325 |
+
_model_target_: model.decoder.interaction.StackInteraction
|
326 |
+
differentiable: false
|
327 |
+
|
328 |
+
intent_classifier:
|
329 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
330 |
+
... # parameters needed __init__(*)
|
331 |
+
mode: "token-level-intent"
|
332 |
+
use_multi: false
|
333 |
+
return_sentence_level: true
|
334 |
+
|
335 |
+
slot_classifier:
|
336 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
337 |
+
... # parameters needed __init__(*)
|
338 |
+
mode: "slot"
|
339 |
+
use_multi: false
|
340 |
+
return_sentence_level: false
|
341 |
+
```
|
342 |
+
4. You can run script as follows to train model:
|
343 |
+
```shell
|
344 |
+
python run.py -cp config/stack-propagation.yaml
|
345 |
+
```
|
346 |
+
|
347 |
+
|
348 |
+
|
config/app.yaml
CHANGED
@@ -1,109 +1,6 @@
|
|
1 |
-
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
-
|
3 |
host: 127.0.0.1
|
4 |
port: 7860
|
5 |
|
6 |
is_push_to_public: false
|
7 |
save-path: save/stack/outputs.jsonl
|
8 |
-
page-size: 2
|
9 |
-
|
10 |
-
base:
|
11 |
-
name: "OpenSLUv1"
|
12 |
-
train: false
|
13 |
-
test: false
|
14 |
-
device: cpu
|
15 |
-
ckpt_path: null
|
16 |
-
seed: 42
|
17 |
-
best_key: EMA
|
18 |
-
epoch_num: 300
|
19 |
-
batch_size: 16
|
20 |
-
eval_by_epoch: true
|
21 |
-
model_dir: save/stack
|
22 |
-
template: application.html
|
23 |
-
accelerator:
|
24 |
-
use_accelerator: false
|
25 |
-
|
26 |
-
dataset:
|
27 |
-
dataset_name: atis
|
28 |
-
|
29 |
-
metric:
|
30 |
-
- intent_acc
|
31 |
-
- slot_f1
|
32 |
-
- EMA
|
33 |
-
|
34 |
-
tokenizer:
|
35 |
-
_tokenizer_name_: word_tokenizer
|
36 |
-
_padding_side_: right
|
37 |
-
_align_mode_: fast
|
38 |
-
_to_lower_case_: true
|
39 |
-
add_special_tokens: false
|
40 |
-
max_length: 512
|
41 |
-
|
42 |
-
optimizer:
|
43 |
-
_model_target_: torch.optim.Adam
|
44 |
-
_model_partial_: true
|
45 |
-
lr: 0.001
|
46 |
-
weight_decay: 1e-6
|
47 |
-
|
48 |
-
scheduler:
|
49 |
-
_model_target_: transformers.get_scheduler
|
50 |
-
_model_partial_: true
|
51 |
-
name : "linear"
|
52 |
-
num_warmup_steps: 0
|
53 |
-
|
54 |
-
model:
|
55 |
-
_model_target_: model.OpenSLUModel
|
56 |
-
|
57 |
-
encoder:
|
58 |
-
_model_target_: model.encoder.AutoEncoder
|
59 |
-
encoder_name: self-attention-lstm
|
60 |
-
|
61 |
-
embedding:
|
62 |
-
embedding_dim: 256
|
63 |
-
dropout_rate: 0.55
|
64 |
-
|
65 |
-
lstm:
|
66 |
-
layer_num: 1
|
67 |
-
bidirectional: true
|
68 |
-
output_dim: 256
|
69 |
-
dropout_rate: 0.5
|
70 |
-
|
71 |
-
attention:
|
72 |
-
hidden_dim: 1024
|
73 |
-
output_dim: 128
|
74 |
-
dropout_rate: 0.6
|
75 |
-
|
76 |
-
return_with_input: true
|
77 |
-
return_sentence_level_hidden: false
|
78 |
-
|
79 |
-
decoder:
|
80 |
-
_model_target_: model.decoder.StackPropagationDecoder
|
81 |
-
interaction:
|
82 |
-
_model_target_: model.decoder.interaction.StackInteraction
|
83 |
-
differentiable: false
|
84 |
-
|
85 |
-
intent_classifier:
|
86 |
-
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
87 |
-
layer_num: 1
|
88 |
-
bidirectional: false
|
89 |
-
force_ratio: 0.9
|
90 |
-
hidden_dim: 64
|
91 |
-
embedding_dim: 8
|
92 |
-
ignore_index: -100
|
93 |
-
dropout_rate: 0.5
|
94 |
-
mode: "token-level-intent"
|
95 |
-
use_multi: false
|
96 |
-
return_sentence_level: true
|
97 |
-
|
98 |
-
slot_classifier:
|
99 |
-
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
100 |
-
layer_num: 1
|
101 |
-
bidirectional: false
|
102 |
-
force_ratio: 0.9
|
103 |
-
hidden_dim: 64
|
104 |
-
embedding_dim: 32
|
105 |
-
ignore_index: -100
|
106 |
-
dropout_rate: 0.55
|
107 |
-
mode: "slot"
|
108 |
-
use_multi: false
|
109 |
-
return_sentence_level: false
|
|
|
|
|
|
|
1 |
host: 127.0.0.1
|
2 |
port: 7860
|
3 |
|
4 |
is_push_to_public: false
|
5 |
save-path: save/stack/outputs.jsonl
|
6 |
+
page-size: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config/decoder/interaction/stack-propagation.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
differentiable: false
|
config/examples/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Examples
|
2 |
+
|
3 |
+
Here we introduce some usage of our famework by configuration.
|
4 |
+
|
5 |
+
## Reload to train
|
6 |
+
|
7 |
+
Firstly, you can run this script to train a `joint-bert` model:
|
8 |
+
```shell
|
9 |
+
python run.py -cp config/examples/normal.yaml
|
10 |
+
```
|
11 |
+
|
12 |
+
and you can use `kill` or `Ctrl+C` to kill the training process.
|
13 |
+
|
14 |
+
Then, to reload model and continue training, you can run `reload_to_train.yaml` to reload checkpoint and training state.
|
15 |
+
```shell
|
16 |
+
python run.py -cp config/examples/reload_to_train.yaml
|
17 |
+
```
|
18 |
+
|
19 |
+
The main difference in `reload_to_train.yaml` is the `model_manager` configuration item:
|
20 |
+
```yaml
|
21 |
+
...
|
22 |
+
model_manager:
|
23 |
+
load_train_state: True # set to True
|
24 |
+
load_dir: save/joint_bert # not null
|
25 |
+
...
|
26 |
+
...
|
27 |
+
```
|
28 |
+
|
29 |
+
## Load from Pre-finetuned model.
|
30 |
+
We upload all models to [LightChen2333](https://huggingface.co/LightChen2333). You can load those model by simple configuration.
|
31 |
+
In `from_pretrained.yaml` and `from_pretrained_multi.yaml`, we show two example scripts to load from hugging face in single- and multi-intent, respectively. The key configuration items are as below:
|
32 |
+
```yaml
|
33 |
+
tokenizer:
|
34 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'" # Support simple calculation script
|
35 |
+
|
36 |
+
model:
|
37 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
|
38 |
+
```
|
config/examples/from_pretrained.yaml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: false
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
logger:
|
13 |
+
logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
|
14 |
+
|
15 |
+
model_manager:
|
16 |
+
load_dir: null
|
17 |
+
save_dir: save/joint_bert
|
18 |
+
save_mode: save-by-eval # save-by-step
|
19 |
+
# save_step: 100
|
20 |
+
max_save_num: 1
|
21 |
+
|
22 |
+
accelerator:
|
23 |
+
use_accelerator: false
|
24 |
+
|
25 |
+
dataset:
|
26 |
+
dataset_name: atis
|
27 |
+
|
28 |
+
evaluator:
|
29 |
+
best_key: EMA
|
30 |
+
eval_by_epoch: true
|
31 |
+
# eval_step: 1800
|
32 |
+
metric:
|
33 |
+
- intent_acc
|
34 |
+
- slot_f1
|
35 |
+
- EMA
|
36 |
+
|
37 |
+
tokenizer:
|
38 |
+
_from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
|
39 |
+
|
40 |
+
optimizer:
|
41 |
+
_model_target_: torch.optim.Adam
|
42 |
+
_model_partial_: true
|
43 |
+
lr: 0.001
|
44 |
+
weight_decay: 1e-6
|
45 |
+
|
46 |
+
scheduler:
|
47 |
+
_model_target_: transformers.get_scheduler
|
48 |
+
_model_partial_: true
|
49 |
+
name : "linear"
|
50 |
+
num_warmup_steps: 0
|
51 |
+
|
52 |
+
model:
|
53 |
+
_from_pretrained_: "'LightChen2333/joint-bert-slu-' + '{dataset.dataset_name}'"
|
config/examples/from_pretrained_multi.yaml
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
multi_intent: true
|
6 |
+
train: false
|
7 |
+
test: true
|
8 |
+
device: cuda
|
9 |
+
seed: 42
|
10 |
+
epoch_num: 300
|
11 |
+
batch_size: 16
|
12 |
+
|
13 |
+
|
14 |
+
logger:
|
15 |
+
logger_type: wandb # wandb is supported both in single- multi-GPU, tensorboard is only supported in multi-GPU, and fitlog is only supported in single-GPU
|
16 |
+
|
17 |
+
model_manager:
|
18 |
+
load_dir: null
|
19 |
+
save_dir: save/joint_bert
|
20 |
+
save_mode: save-by-eval # save-by-step
|
21 |
+
# save_step: 100
|
22 |
+
max_save_num: 1
|
23 |
+
|
24 |
+
accelerator:
|
25 |
+
use_accelerator: false
|
26 |
+
|
27 |
+
dataset:
|
28 |
+
dataset_name: atis
|
29 |
+
|
30 |
+
evaluator:
|
31 |
+
best_key: EMA
|
32 |
+
eval_by_epoch: true
|
33 |
+
# eval_step: 1800
|
34 |
+
metric:
|
35 |
+
- intent_acc
|
36 |
+
- slot_f1
|
37 |
+
- EMA
|
38 |
+
|
39 |
+
tokenizer:
|
40 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
|
41 |
+
|
42 |
+
optimizer:
|
43 |
+
_model_target_: torch.optim.Adam
|
44 |
+
_model_partial_: true
|
45 |
+
lr: 0.001
|
46 |
+
weight_decay: 1e-6
|
47 |
+
|
48 |
+
scheduler:
|
49 |
+
_model_target_: transformers.get_scheduler
|
50 |
+
_model_partial_: true
|
51 |
+
name : "linear"
|
52 |
+
num_warmup_steps: 0
|
53 |
+
|
54 |
+
model:
|
55 |
+
_from_pretrained_: "'LightChen2333/agif-slu-' + '{dataset.dataset_name}'"
|
config/examples/normal.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLU-test"
|
5 |
+
train: True
|
6 |
+
test: True
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 128
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/joint_bert
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: atis
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: bert-base-uncased
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: general
|
35 |
+
add_special_tokens: true
|
36 |
+
|
37 |
+
optimizer:
|
38 |
+
_model_target_: torch.optim.AdamW
|
39 |
+
_model_partial_: true
|
40 |
+
lr: 4e-6
|
41 |
+
weight_decay: 1e-8
|
42 |
+
|
43 |
+
scheduler:
|
44 |
+
_model_target_: transformers.get_scheduler
|
45 |
+
_model_partial_: true
|
46 |
+
name : "linear"
|
47 |
+
num_warmup_steps: 0
|
48 |
+
|
49 |
+
model:
|
50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
51 |
+
ignore_index: -100
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: bert-base-uncased
|
55 |
+
output_dim: 768
|
56 |
+
return_with_input: true
|
57 |
+
return_sentence_level_hidden: true
|
58 |
+
|
59 |
+
decoder:
|
60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
61 |
+
intent_classifier:
|
62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
63 |
+
mode: "intent"
|
64 |
+
ignore_index: -100
|
65 |
+
|
66 |
+
|
67 |
+
slot_classifier:
|
68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
69 |
+
mode: "slot"
|
70 |
+
ignore_index: -100
|
config/examples/reload_to_train.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLU-test"
|
5 |
+
train: True
|
6 |
+
test: True
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 128
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_train_state: True
|
14 |
+
load_dir: save/joint_bert
|
15 |
+
save_dir: save/joint_bert
|
16 |
+
|
17 |
+
evaluator:
|
18 |
+
best_key: EMA
|
19 |
+
eval_by_epoch: true
|
20 |
+
# eval_step: 1800
|
21 |
+
metric:
|
22 |
+
- intent_acc
|
23 |
+
- slot_f1
|
24 |
+
- EMA
|
25 |
+
|
26 |
+
accelerator:
|
27 |
+
use_accelerator: false
|
28 |
+
|
29 |
+
dataset:
|
30 |
+
dataset_name: atis
|
31 |
+
|
32 |
+
tokenizer:
|
33 |
+
_tokenizer_name_: bert-base-uncased
|
34 |
+
_padding_side_: right
|
35 |
+
_align_mode_: general
|
36 |
+
add_special_tokens: true
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.AdamW
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 4e-6
|
42 |
+
weight_decay: 1e-8
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
52 |
+
ignore_index: -100
|
53 |
+
encoder:
|
54 |
+
_model_target_: model.encoder.AutoEncoder
|
55 |
+
encoder_name: bert-base-uncased
|
56 |
+
output_dim: 768
|
57 |
+
return_with_input: true
|
58 |
+
return_sentence_level_hidden: true
|
59 |
+
|
60 |
+
decoder:
|
61 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
62 |
+
intent_classifier:
|
63 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
64 |
+
mode: "intent"
|
65 |
+
ignore_index: -100
|
66 |
+
|
67 |
+
|
68 |
+
slot_classifier:
|
69 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
70 |
+
mode: "slot"
|
71 |
+
ignore_index: -100
|
config/reproduction/atis/bi-model.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/bi-model-atis
|
15 |
+
|
16 |
+
accelerator:
|
17 |
+
use_accelerator: false
|
18 |
+
|
19 |
+
dataset:
|
20 |
+
dataset_name: atis
|
21 |
+
|
22 |
+
evaluator:
|
23 |
+
best_key: EMA
|
24 |
+
eval_by_epoch: true
|
25 |
+
# eval_step: 1800
|
26 |
+
metric:
|
27 |
+
- intent_acc
|
28 |
+
- slot_f1
|
29 |
+
- EMA
|
30 |
+
|
31 |
+
|
32 |
+
tokenizer:
|
33 |
+
_tokenizer_name_: word_tokenizer
|
34 |
+
_padding_side_: right
|
35 |
+
_align_mode_: fast
|
36 |
+
add_special_tokens: false
|
37 |
+
max_length: 512
|
38 |
+
|
39 |
+
optimizer:
|
40 |
+
_model_target_: torch.optim.Adam
|
41 |
+
_model_partial_: true
|
42 |
+
lr: 0.001
|
43 |
+
weight_decay: 1e-6
|
44 |
+
|
45 |
+
scheduler:
|
46 |
+
_model_target_: transformers.get_scheduler
|
47 |
+
_model_partial_: true
|
48 |
+
name : "linear"
|
49 |
+
num_warmup_steps: 0
|
50 |
+
|
51 |
+
model:
|
52 |
+
_model_target_: model.OpenSLUModel
|
53 |
+
|
54 |
+
encoder:
|
55 |
+
_model_target_: model.encoder.BiEncoder
|
56 |
+
intent_encoder:
|
57 |
+
_model_target_: model.encoder.AutoEncoder
|
58 |
+
encoder_name: lstm
|
59 |
+
|
60 |
+
embedding:
|
61 |
+
embedding_dim: 256
|
62 |
+
dropout_rate: 0.4
|
63 |
+
|
64 |
+
lstm:
|
65 |
+
dropout_rate: 0.5
|
66 |
+
output_dim: 256
|
67 |
+
layer_num: 2
|
68 |
+
bidirectional: true
|
69 |
+
|
70 |
+
return_with_input: true
|
71 |
+
return_sentence_level_hidden: false
|
72 |
+
|
73 |
+
slot_encoder:
|
74 |
+
_model_target_: model.encoder.AutoEncoder
|
75 |
+
encoder_name: lstm
|
76 |
+
|
77 |
+
embedding:
|
78 |
+
embedding_dim: 256
|
79 |
+
dropout_rate: 0.4
|
80 |
+
|
81 |
+
lstm:
|
82 |
+
dropout_rate: 0.5
|
83 |
+
output_dim: 256
|
84 |
+
layer_num: 2
|
85 |
+
bidirectional: true
|
86 |
+
|
87 |
+
return_with_input: true
|
88 |
+
return_sentence_level_hidden: false
|
89 |
+
|
90 |
+
decoder:
|
91 |
+
_model_target_: model.decoder.BaseDecoder
|
92 |
+
# teacher_forcing: true
|
93 |
+
interaction:
|
94 |
+
_model_target_: model.decoder.interaction.BiModelInteraction
|
95 |
+
output_dim: 256
|
96 |
+
dropout_rate: 0.4
|
97 |
+
|
98 |
+
intent_classifier:
|
99 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
100 |
+
mode: "intent"
|
101 |
+
ignore_index: -100
|
102 |
+
|
103 |
+
slot_classifier:
|
104 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
105 |
+
mode: "slot"
|
106 |
+
ignore_index: -100
|
config/reproduction/atis/dca-net.yaml
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla P100-PCIE-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/dca-net-atis
|
15 |
+
|
16 |
+
accelerator:
|
17 |
+
use_accelerator: false
|
18 |
+
|
19 |
+
dataset:
|
20 |
+
dataset_name: atis
|
21 |
+
|
22 |
+
evaluator:
|
23 |
+
best_key: EMA
|
24 |
+
eval_by_epoch: true
|
25 |
+
# eval_step: 1800
|
26 |
+
metric:
|
27 |
+
- intent_acc
|
28 |
+
- slot_f1
|
29 |
+
- EMA
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: lstm
|
55 |
+
|
56 |
+
embedding:
|
57 |
+
load_embedding_name: glove.6B.300d.txt
|
58 |
+
embedding_dim: 300
|
59 |
+
dropout_rate: 0.5
|
60 |
+
|
61 |
+
lstm:
|
62 |
+
dropout_rate: 0.5
|
63 |
+
output_dim: 128
|
64 |
+
layer_num: 2
|
65 |
+
bidirectional: true
|
66 |
+
output_dim: "{model.encoder.lstm.output_dim}"
|
67 |
+
return_with_input: true
|
68 |
+
return_sentence_level_hidden: false
|
69 |
+
|
70 |
+
decoder:
|
71 |
+
_model_target_: model.decoder.DCANetDecoder
|
72 |
+
interaction:
|
73 |
+
_model_target_: model.decoder.interaction.DCANetInteraction
|
74 |
+
output_dim: "{model.encoder.output_dim}"
|
75 |
+
attention_dropout: 0.5
|
76 |
+
num_attention_heads: 8
|
77 |
+
|
78 |
+
intent_classifier:
|
79 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
80 |
+
mode: "intent"
|
81 |
+
input_dim: "{model.encoder.output_dim}"
|
82 |
+
ignore_index: -100
|
83 |
+
|
84 |
+
slot_classifier:
|
85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
86 |
+
mode: "slot"
|
87 |
+
input_dim: "{model.encoder.output_dim}"
|
88 |
+
ignore_index: -100
|
config/reproduction/atis/deberta.yaml
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 32
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/deberta-atis
|
15 |
+
|
16 |
+
dataset:
|
17 |
+
dataset_name: atis
|
18 |
+
|
19 |
+
evaluator:
|
20 |
+
best_key: EMA
|
21 |
+
eval_by_epoch: true
|
22 |
+
# eval_step: 1800
|
23 |
+
metric:
|
24 |
+
- intent_acc
|
25 |
+
- slot_f1
|
26 |
+
- EMA
|
27 |
+
|
28 |
+
tokenizer:
|
29 |
+
_tokenizer_name_: microsoft/deberta-v3-base
|
30 |
+
_padding_side_: right
|
31 |
+
add_special_tokens: true
|
32 |
+
max_length: 512
|
33 |
+
|
34 |
+
optimizer:
|
35 |
+
_model_target_: torch.optim.AdamW
|
36 |
+
_model_partial_: true
|
37 |
+
lr: 2e-5
|
38 |
+
weight_decay: 1e-8
|
39 |
+
|
40 |
+
scheduler:
|
41 |
+
_model_target_: transformers.get_scheduler
|
42 |
+
_model_partial_: true
|
43 |
+
name : "linear"
|
44 |
+
num_warmup_steps: 0
|
45 |
+
|
46 |
+
model:
|
47 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
48 |
+
ignore_index: -100
|
49 |
+
encoder:
|
50 |
+
_model_target_: model.encoder.AutoEncoder
|
51 |
+
encoder_name: microsoft/deberta-v3-base
|
52 |
+
output_dim: 768
|
53 |
+
return_with_input: true
|
54 |
+
return_sentence_level_hidden: true
|
55 |
+
|
56 |
+
decoder:
|
57 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
58 |
+
intent_classifier:
|
59 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
60 |
+
mode: "intent"
|
61 |
+
ignore_index: -100
|
62 |
+
|
63 |
+
|
64 |
+
slot_classifier:
|
65 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
66 |
+
mode: "slot"
|
67 |
+
ignore_index: -100
|
config/reproduction/atis/electra.yaml
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: True
|
6 |
+
test: True
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 32
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/electra-atis
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
dataset:
|
26 |
+
dataset_name: atis
|
27 |
+
|
28 |
+
tokenizer:
|
29 |
+
_tokenizer_name_: google/electra-small-discriminator
|
30 |
+
_padding_side_: right
|
31 |
+
add_special_tokens: true
|
32 |
+
max_length: 512
|
33 |
+
|
34 |
+
optimizer:
|
35 |
+
_model_target_: torch.optim.AdamW
|
36 |
+
_model_partial_: true
|
37 |
+
lr: 2e-5
|
38 |
+
weight_decay: 1e-8
|
39 |
+
|
40 |
+
scheduler:
|
41 |
+
_model_target_: transformers.get_scheduler
|
42 |
+
_model_partial_: true
|
43 |
+
name : "linear"
|
44 |
+
num_warmup_steps: 0
|
45 |
+
|
46 |
+
model:
|
47 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
48 |
+
ignore_index: -100
|
49 |
+
encoder:
|
50 |
+
_model_target_: model.encoder.AutoEncoder
|
51 |
+
encoder_name: google/electra-small-discriminator
|
52 |
+
output_dim: 256
|
53 |
+
return_with_input: true
|
54 |
+
return_sentence_level_hidden: true
|
55 |
+
|
56 |
+
decoder:
|
57 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
58 |
+
intent_classifier:
|
59 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
60 |
+
mode: "intent"
|
61 |
+
ignore_index: -100
|
62 |
+
|
63 |
+
|
64 |
+
slot_classifier:
|
65 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
66 |
+
mode: "slot"
|
67 |
+
ignore_index: -100
|
config/reproduction/atis/joint-bert.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: True
|
6 |
+
test: True
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 128
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/joint-bert-atis
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: atis
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: bert-base-uncased
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: general
|
35 |
+
add_special_tokens: true
|
36 |
+
|
37 |
+
optimizer:
|
38 |
+
_model_target_: torch.optim.AdamW
|
39 |
+
_model_partial_: true
|
40 |
+
lr: 4e-6
|
41 |
+
weight_decay: 1e-8
|
42 |
+
|
43 |
+
scheduler:
|
44 |
+
_model_target_: transformers.get_scheduler
|
45 |
+
_model_partial_: true
|
46 |
+
name : "linear"
|
47 |
+
num_warmup_steps: 0
|
48 |
+
|
49 |
+
model:
|
50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
51 |
+
ignore_index: -100
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: bert-base-uncased
|
55 |
+
output_dim: 768
|
56 |
+
return_with_input: true
|
57 |
+
return_sentence_level_hidden: true
|
58 |
+
|
59 |
+
decoder:
|
60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
61 |
+
intent_classifier:
|
62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
63 |
+
mode: "intent"
|
64 |
+
ignore_index: -100
|
65 |
+
|
66 |
+
|
67 |
+
slot_classifier:
|
68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
69 |
+
mode: "slot"
|
70 |
+
ignore_index: -100
|
config/reproduction/atis/roberta.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB" #Useless info
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: True
|
6 |
+
test: True
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 32
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/roberta-atis
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: atis
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: roberta-base
|
33 |
+
_padding_side_: right
|
34 |
+
add_special_tokens: true
|
35 |
+
max_length: 512
|
36 |
+
|
37 |
+
optimizer:
|
38 |
+
_model_target_: torch.optim.AdamW
|
39 |
+
_model_partial_: true
|
40 |
+
lr: 2e-5
|
41 |
+
weight_decay: 1e-8
|
42 |
+
|
43 |
+
scheduler:
|
44 |
+
_model_target_: transformers.get_scheduler
|
45 |
+
_model_partial_: true
|
46 |
+
name : "linear"
|
47 |
+
num_warmup_steps: 0
|
48 |
+
|
49 |
+
model:
|
50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
51 |
+
ignore_index: -100
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: roberta-base
|
55 |
+
output_dim: 768
|
56 |
+
return_with_input: true
|
57 |
+
return_sentence_level_hidden: true
|
58 |
+
|
59 |
+
decoder:
|
60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
61 |
+
intent_classifier:
|
62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
63 |
+
mode: "intent"
|
64 |
+
ignore_index: -100
|
65 |
+
|
66 |
+
|
67 |
+
slot_classifier:
|
68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
69 |
+
mode: "slot"
|
70 |
+
ignore_index: -100
|
config/reproduction/atis/slot-gated.yaml
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/slot-gated-atis
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: atis
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
ignore_index: -100
|
53 |
+
encoder:
|
54 |
+
_model_target_: model.encoder.AutoEncoder
|
55 |
+
encoder_name: lstm
|
56 |
+
|
57 |
+
embedding:
|
58 |
+
embedding_dim: 256
|
59 |
+
dropout_rate: 0.4
|
60 |
+
|
61 |
+
lstm:
|
62 |
+
dropout_rate: 0.5
|
63 |
+
output_dim: 256
|
64 |
+
layer_num: 2
|
65 |
+
bidirectional: true
|
66 |
+
|
67 |
+
return_with_input: true
|
68 |
+
return_sentence_level_hidden: false
|
69 |
+
|
70 |
+
decoder:
|
71 |
+
_model_target_: model.decoder.BaseDecoder
|
72 |
+
|
73 |
+
interaction:
|
74 |
+
_model_target_: model.decoder.interaction.SlotGatedInteraction
|
75 |
+
remove_slot_attn: false
|
76 |
+
output_dim: 256
|
77 |
+
dropout_rate: 0.4
|
78 |
+
|
79 |
+
intent_classifier:
|
80 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
81 |
+
mode: "intent"
|
82 |
+
ignore_index: -100
|
83 |
+
|
84 |
+
slot_classifier:
|
85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
86 |
+
mode: "slot"
|
87 |
+
ignore_index: -100
|
config/reproduction/atis/stack-propagation.yaml
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/stack-propagation-atis
|
15 |
+
save_mode: save-by-eval # save-by-step
|
16 |
+
# save_step: 100
|
17 |
+
max_save_num: 1
|
18 |
+
|
19 |
+
accelerator:
|
20 |
+
use_accelerator: false
|
21 |
+
|
22 |
+
dataset:
|
23 |
+
dataset_name: atis
|
24 |
+
|
25 |
+
evaluator:
|
26 |
+
best_key: EMA
|
27 |
+
eval_by_epoch: true
|
28 |
+
# eval_step: 1800
|
29 |
+
metric:
|
30 |
+
- intent_acc
|
31 |
+
- slot_f1
|
32 |
+
- EMA
|
33 |
+
|
34 |
+
tokenizer:
|
35 |
+
_tokenizer_name_: word_tokenizer
|
36 |
+
_padding_side_: right
|
37 |
+
_align_mode_: fast
|
38 |
+
_to_lower_case_: true
|
39 |
+
add_special_tokens: false
|
40 |
+
max_length: 512
|
41 |
+
|
42 |
+
optimizer:
|
43 |
+
_model_target_: torch.optim.Adam
|
44 |
+
_model_partial_: true
|
45 |
+
lr: 0.001
|
46 |
+
weight_decay: 1e-6
|
47 |
+
|
48 |
+
scheduler:
|
49 |
+
_model_target_: transformers.get_scheduler
|
50 |
+
_model_partial_: true
|
51 |
+
name : "linear"
|
52 |
+
num_warmup_steps: 0
|
53 |
+
|
54 |
+
model:
|
55 |
+
_model_target_: model.OpenSLUModel
|
56 |
+
|
57 |
+
encoder:
|
58 |
+
_model_target_: model.encoder.AutoEncoder
|
59 |
+
encoder_name: self-attention-lstm
|
60 |
+
|
61 |
+
embedding:
|
62 |
+
embedding_dim: 256
|
63 |
+
dropout_rate: 0.55
|
64 |
+
|
65 |
+
lstm:
|
66 |
+
layer_num: 1
|
67 |
+
bidirectional: true
|
68 |
+
output_dim: 256
|
69 |
+
dropout_rate: 0.5
|
70 |
+
|
71 |
+
attention:
|
72 |
+
hidden_dim: 1024
|
73 |
+
output_dim: 128
|
74 |
+
dropout_rate: 0.6
|
75 |
+
|
76 |
+
return_with_input: true
|
77 |
+
return_sentence_level_hidden: false
|
78 |
+
|
79 |
+
decoder:
|
80 |
+
_model_target_: model.decoder.StackPropagationDecoder
|
81 |
+
interaction:
|
82 |
+
_model_target_: model.decoder.interaction.StackInteraction
|
83 |
+
differentiable: false
|
84 |
+
|
85 |
+
intent_classifier:
|
86 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
87 |
+
layer_num: 1
|
88 |
+
bidirectional: false
|
89 |
+
force_ratio: 0.9
|
90 |
+
hidden_dim: 64
|
91 |
+
embedding_dim: 8
|
92 |
+
ignore_index: -100
|
93 |
+
dropout_rate: 0.5
|
94 |
+
mode: "token-level-intent"
|
95 |
+
use_multi: false
|
96 |
+
return_sentence_level: true
|
97 |
+
|
98 |
+
slot_classifier:
|
99 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
100 |
+
layer_num: 1
|
101 |
+
bidirectional: false
|
102 |
+
force_ratio: 0.9
|
103 |
+
hidden_dim: 64
|
104 |
+
embedding_dim: 32
|
105 |
+
ignore_index: -100
|
106 |
+
dropout_rate: 0.55
|
107 |
+
mode: "slot"
|
108 |
+
use_multi: false
|
109 |
+
return_sentence_level: false
|
config/reproduction/mix-atis/agif.yaml
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 3080"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
multi_intent: true
|
6 |
+
train: true
|
7 |
+
test: true
|
8 |
+
device: cuda
|
9 |
+
seed: 42
|
10 |
+
epoch_num: 100
|
11 |
+
batch_size: 32
|
12 |
+
ignore_index: -100
|
13 |
+
|
14 |
+
model_manager:
|
15 |
+
load_dir: null
|
16 |
+
save_dir: save/agif-mix-atis
|
17 |
+
|
18 |
+
accelerator:
|
19 |
+
use_accelerator: false
|
20 |
+
|
21 |
+
dataset:
|
22 |
+
dataset_name: mix-atis
|
23 |
+
|
24 |
+
evaluator:
|
25 |
+
best_key: EMA
|
26 |
+
eval_by_epoch: true
|
27 |
+
# eval_step: 1800
|
28 |
+
metric:
|
29 |
+
- intent_acc
|
30 |
+
- intent_f1
|
31 |
+
- slot_f1
|
32 |
+
- EMA
|
33 |
+
|
34 |
+
tokenizer:
|
35 |
+
_tokenizer_name_: word_tokenizer
|
36 |
+
_padding_side_: right
|
37 |
+
_align_mode_: fast
|
38 |
+
add_special_tokens: false
|
39 |
+
max_length: 512
|
40 |
+
|
41 |
+
optimizer:
|
42 |
+
_model_target_: torch.optim.Adam
|
43 |
+
_model_partial_: true
|
44 |
+
lr: 0.001
|
45 |
+
weight_decay: 1e-6
|
46 |
+
|
47 |
+
scheduler:
|
48 |
+
_model_target_: transformers.get_scheduler
|
49 |
+
_model_partial_: true
|
50 |
+
name : "linear"
|
51 |
+
num_warmup_steps: 0
|
52 |
+
|
53 |
+
model:
|
54 |
+
_model_target_: model.OpenSLUModel
|
55 |
+
|
56 |
+
encoder:
|
57 |
+
_model_target_: model.encoder.AutoEncoder
|
58 |
+
encoder_name: self-attention-lstm
|
59 |
+
|
60 |
+
embedding:
|
61 |
+
embedding_dim: 128
|
62 |
+
dropout_rate: 0.4
|
63 |
+
|
64 |
+
lstm:
|
65 |
+
layer_num: 1
|
66 |
+
bidirectional: true
|
67 |
+
output_dim: 256
|
68 |
+
dropout_rate: 0.4
|
69 |
+
|
70 |
+
attention:
|
71 |
+
hidden_dim: 1024
|
72 |
+
output_dim: 128
|
73 |
+
dropout_rate: 0.4
|
74 |
+
|
75 |
+
unflat_attention:
|
76 |
+
dropout_rate: 0.4
|
77 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
78 |
+
return_with_input: true
|
79 |
+
return_sentence_level_hidden: true
|
80 |
+
|
81 |
+
decoder:
|
82 |
+
_model_target_: model.decoder.AGIFDecoder
|
83 |
+
# teacher_forcing: true
|
84 |
+
interaction:
|
85 |
+
_model_target_: model.decoder.interaction.AGIFInteraction
|
86 |
+
intent_embedding_dim: 128
|
87 |
+
input_dim: "{model.encoder.output_dim}"
|
88 |
+
hidden_dim: 128
|
89 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
90 |
+
dropout_rate: 0.4
|
91 |
+
alpha: 0.2
|
92 |
+
num_heads: 4
|
93 |
+
num_layers: 2
|
94 |
+
row_normalized: true
|
95 |
+
|
96 |
+
intent_classifier:
|
97 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
98 |
+
mode: "intent"
|
99 |
+
mlp:
|
100 |
+
- _model_target_: torch.nn.Linear
|
101 |
+
in_features: "{model.encoder.output_dim}"
|
102 |
+
out_features: 256
|
103 |
+
- _model_target_: torch.nn.LeakyReLU
|
104 |
+
negative_slope: 0.2
|
105 |
+
- _model_target_: torch.nn.Linear
|
106 |
+
in_features: 256
|
107 |
+
out_features: "{base.intent_label_num}"
|
108 |
+
dropout_rate: 0.4
|
109 |
+
loss_fn:
|
110 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
111 |
+
use_multi: "{base.multi_intent}"
|
112 |
+
multi_threshold: 0.5
|
113 |
+
return_sentence_level: true
|
114 |
+
ignore_index: -100
|
115 |
+
weight: 0.3
|
116 |
+
|
117 |
+
slot_classifier:
|
118 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
119 |
+
mode: "slot"
|
120 |
+
input_dim: "{model.encoder.output_dim}"
|
121 |
+
layer_num: 1
|
122 |
+
bidirectional: false
|
123 |
+
force_ratio: 0.9
|
124 |
+
hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
125 |
+
embedding_dim: 128
|
126 |
+
# loss_fn:
|
127 |
+
# _model_target_: torch.nn.NLLLoss
|
128 |
+
ignore_index: -100
|
129 |
+
dropout_rate: 0.4
|
130 |
+
use_multi: false
|
131 |
+
multi_threshold: 0.5
|
132 |
+
return_sentence_level: false
|
133 |
+
weight: 0.7
|
config/reproduction/mix-atis/gl-gin.yaml
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
multi_intent: true
|
6 |
+
train: true
|
7 |
+
test: true
|
8 |
+
device: cuda
|
9 |
+
seed: 42
|
10 |
+
epoch_num: 300
|
11 |
+
batch_size: 32
|
12 |
+
ignore_index: -100
|
13 |
+
|
14 |
+
model_manager:
|
15 |
+
load_dir: null
|
16 |
+
save_dir: save/gl-gin-mix-atis
|
17 |
+
|
18 |
+
evaluator:
|
19 |
+
best_key: EMA
|
20 |
+
eval_by_epoch: true
|
21 |
+
# eval_step: 1800
|
22 |
+
metric:
|
23 |
+
- intent_acc
|
24 |
+
- intent_f1
|
25 |
+
- slot_f1
|
26 |
+
- EMA
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: mix-atis
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
|
53 |
+
encoder:
|
54 |
+
_model_target_: model.encoder.AutoEncoder
|
55 |
+
encoder_name: self-attention-lstm
|
56 |
+
|
57 |
+
embedding:
|
58 |
+
embedding_dim: 128
|
59 |
+
dropout_rate: 0.4
|
60 |
+
|
61 |
+
lstm:
|
62 |
+
layer_num: 1
|
63 |
+
bidirectional: true
|
64 |
+
output_dim: 256
|
65 |
+
dropout_rate: 0.4
|
66 |
+
|
67 |
+
attention:
|
68 |
+
hidden_dim: 1024
|
69 |
+
output_dim: 128
|
70 |
+
dropout_rate: 0.4
|
71 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
72 |
+
return_with_input: true
|
73 |
+
return_sentence_level_hidden: false
|
74 |
+
|
75 |
+
decoder:
|
76 |
+
_model_target_: model.decoder.GLGINDecoder
|
77 |
+
dropout_rate: 0.4
|
78 |
+
interaction:
|
79 |
+
_model_target_: model.decoder.interaction.GLGINInteraction
|
80 |
+
intent_embedding_dim: 64
|
81 |
+
input_dim: "{model.encoder.output_dim}"
|
82 |
+
hidden_dim: 256
|
83 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
84 |
+
dropout_rate: 0.4
|
85 |
+
alpha: 0.2
|
86 |
+
num_heads: 8
|
87 |
+
num_layers: 2
|
88 |
+
row_normalized: true
|
89 |
+
slot_graph_window: 1
|
90 |
+
intent_label_num: "{base.intent_label_num}"
|
91 |
+
|
92 |
+
intent_classifier:
|
93 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
94 |
+
mode: "token-level-intent"
|
95 |
+
mlp:
|
96 |
+
- _model_target_: torch.nn.Linear
|
97 |
+
in_features: "{model.encoder.output_dim}"
|
98 |
+
out_features: 256
|
99 |
+
- _model_target_: torch.nn.LeakyReLU
|
100 |
+
negative_slope: 0.2
|
101 |
+
- _model_target_: torch.nn.Linear
|
102 |
+
in_features: 256
|
103 |
+
out_features: "{base.intent_label_num}"
|
104 |
+
loss_fn:
|
105 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
106 |
+
dropout_rate: 0.4
|
107 |
+
use_multi: "{base.multi_intent}"
|
108 |
+
multi_threshold: 0.5
|
109 |
+
return_sentence_level: true
|
110 |
+
ignore_index: "{base.ignore_index}"
|
111 |
+
|
112 |
+
slot_classifier:
|
113 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
114 |
+
mode: "slot"
|
115 |
+
mlp:
|
116 |
+
- _model_target_: torch.nn.Linear
|
117 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
118 |
+
out_features: "{model.decoder.interaction.output_dim}"
|
119 |
+
- _model_target_: torch.nn.LeakyReLU
|
120 |
+
negative_slope: 0.2
|
121 |
+
- _model_target_: torch.nn.Linear
|
122 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
123 |
+
out_features: "{base.slot_label_num}"
|
124 |
+
ignore_index: "{base.ignore_index}"
|
125 |
+
dropout_rate: 0.4
|
126 |
+
use_multi: false
|
127 |
+
multi_threshold: 0.5
|
128 |
+
return_sentence_level: false
|
config/reproduction/mix-atis/vanilla.yaml
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base:
|
2 |
+
name: "OpenSLUv1"
|
3 |
+
multi_intent: true
|
4 |
+
train: true
|
5 |
+
test: true
|
6 |
+
device: cuda
|
7 |
+
seed: 42
|
8 |
+
epoch_num: 100
|
9 |
+
batch_size: 16
|
10 |
+
ignore_index: -100
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/vanilla-mix-atis
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- intent_f1
|
23 |
+
- slot_f1
|
24 |
+
- EMA
|
25 |
+
|
26 |
+
dataset:
|
27 |
+
dataset_name: atis
|
28 |
+
|
29 |
+
tokenizer:
|
30 |
+
_tokenizer_name_: word_tokenizer
|
31 |
+
_padding_side_: right
|
32 |
+
_align_mode_: fast
|
33 |
+
add_special_tokens: false
|
34 |
+
max_length: 512
|
35 |
+
|
36 |
+
optimizer:
|
37 |
+
_model_target_: torch.optim.Adam
|
38 |
+
_model_partial_: true
|
39 |
+
lr: 0.001
|
40 |
+
weight_decay: 1e-6
|
41 |
+
|
42 |
+
scheduler:
|
43 |
+
_model_target_: transformers.get_scheduler
|
44 |
+
_model_partial_: true
|
45 |
+
name : "linear"
|
46 |
+
num_warmup_steps: 0
|
47 |
+
|
48 |
+
model:
|
49 |
+
_model_target_: model.OpenSLUModel
|
50 |
+
|
51 |
+
encoder:
|
52 |
+
_model_target_: model.encoder.AutoEncoder
|
53 |
+
encoder_name: self-attention-lstm
|
54 |
+
|
55 |
+
embedding:
|
56 |
+
embedding_dim: 128
|
57 |
+
dropout_rate: 0.4
|
58 |
+
|
59 |
+
lstm:
|
60 |
+
layer_num: 1
|
61 |
+
bidirectional: true
|
62 |
+
output_dim: 256
|
63 |
+
dropout_rate: 0.4
|
64 |
+
|
65 |
+
attention:
|
66 |
+
hidden_dim: 1024
|
67 |
+
output_dim: 128
|
68 |
+
dropout_rate: 0.4
|
69 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
70 |
+
return_with_input: true
|
71 |
+
return_sentence_level_hidden: true
|
72 |
+
|
73 |
+
decoder:
|
74 |
+
_model_target_: model.decoder.BaseDecoder
|
75 |
+
|
76 |
+
intent_classifier:
|
77 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
78 |
+
mode: "intent"
|
79 |
+
input_dim: "{model.encoder.output_dim}"
|
80 |
+
loss_fn:
|
81 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
82 |
+
use_multi: "{base.multi_intent}"
|
83 |
+
multi_threshold: 0.5
|
84 |
+
return_sentence_level: true
|
85 |
+
ignore_index: "{base.ignore_index}"
|
86 |
+
|
87 |
+
|
88 |
+
slot_classifier:
|
89 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
90 |
+
mode: "slot"
|
91 |
+
input_dim: "{model.encoder.output_dim}"
|
92 |
+
use_multi: false
|
93 |
+
multi_threshold: 0.5
|
94 |
+
ignore_index: "{base.ignore_index}"
|
95 |
+
return_sentence_level: false
|
config/reproduction/mix-snips/agif.yaml
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla P100-PCIE-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
multi_intent: true
|
6 |
+
train: true
|
7 |
+
test: true
|
8 |
+
device: cuda
|
9 |
+
seed: 42
|
10 |
+
epoch_num: 50
|
11 |
+
batch_size: 64
|
12 |
+
ignore_index: -100
|
13 |
+
|
14 |
+
model_manager:
|
15 |
+
load_dir: null
|
16 |
+
save_dir: save/agif-mix-snips
|
17 |
+
|
18 |
+
evaluator:
|
19 |
+
best_key: EMA
|
20 |
+
eval_by_epoch: true
|
21 |
+
# eval_step: 1800
|
22 |
+
metric:
|
23 |
+
- intent_acc
|
24 |
+
- intent_f1
|
25 |
+
- slot_f1
|
26 |
+
- EMA
|
27 |
+
|
28 |
+
accelerator:
|
29 |
+
use_accelerator: false
|
30 |
+
|
31 |
+
dataset:
|
32 |
+
dataset_name: mix-snips
|
33 |
+
|
34 |
+
tokenizer:
|
35 |
+
_tokenizer_name_: word_tokenizer
|
36 |
+
_padding_side_: right
|
37 |
+
_align_mode_: fast
|
38 |
+
add_special_tokens: false
|
39 |
+
max_length: 512
|
40 |
+
|
41 |
+
optimizer:
|
42 |
+
_model_target_: torch.optim.Adam
|
43 |
+
_model_partial_: true
|
44 |
+
lr: 0.001
|
45 |
+
weight_decay: 1e-6
|
46 |
+
|
47 |
+
scheduler:
|
48 |
+
_model_target_: transformers.get_scheduler
|
49 |
+
_model_partial_: true
|
50 |
+
name : "linear"
|
51 |
+
num_warmup_steps: 0
|
52 |
+
|
53 |
+
model:
|
54 |
+
_model_target_: model.OpenSLUModel
|
55 |
+
|
56 |
+
encoder:
|
57 |
+
_model_target_: model.encoder.AutoEncoder
|
58 |
+
encoder_name: self-attention-lstm
|
59 |
+
|
60 |
+
embedding:
|
61 |
+
embedding_dim: 128
|
62 |
+
dropout_rate: 0.4
|
63 |
+
|
64 |
+
lstm:
|
65 |
+
layer_num: 1
|
66 |
+
bidirectional: true
|
67 |
+
output_dim: 256
|
68 |
+
dropout_rate: 0.4
|
69 |
+
|
70 |
+
attention:
|
71 |
+
hidden_dim: 1024
|
72 |
+
output_dim: 128
|
73 |
+
dropout_rate: 0.4
|
74 |
+
|
75 |
+
unflat_attention:
|
76 |
+
dropout_rate: 0.4
|
77 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
78 |
+
return_with_input: true
|
79 |
+
return_sentence_level_hidden: true
|
80 |
+
|
81 |
+
decoder:
|
82 |
+
_model_target_: model.decoder.AGIFDecoder
|
83 |
+
# teacher_forcing: true
|
84 |
+
interaction:
|
85 |
+
_model_target_: model.decoder.interaction.AGIFInteraction
|
86 |
+
intent_embedding_dim: 128
|
87 |
+
input_dim: "{model.encoder.output_dim}"
|
88 |
+
hidden_dim: 128
|
89 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
90 |
+
dropout_rate: 0.4
|
91 |
+
alpha: 0.2
|
92 |
+
num_heads: 4
|
93 |
+
num_layers: 2
|
94 |
+
row_normalized: true
|
95 |
+
|
96 |
+
intent_classifier:
|
97 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
98 |
+
mode: "intent"
|
99 |
+
mlp:
|
100 |
+
- _model_target_: torch.nn.Linear
|
101 |
+
in_features: "{model.encoder.output_dim}"
|
102 |
+
out_features: 256
|
103 |
+
- _model_target_: torch.nn.LeakyReLU
|
104 |
+
negative_slope: 0.2
|
105 |
+
- _model_target_: torch.nn.Linear
|
106 |
+
in_features: 256
|
107 |
+
out_features: "{base.intent_label_num}"
|
108 |
+
dropout_rate: 0.4
|
109 |
+
loss_fn:
|
110 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
111 |
+
use_multi: "{base.multi_intent}"
|
112 |
+
multi_threshold: 0.5
|
113 |
+
return_sentence_level: true
|
114 |
+
ignore_index: -100
|
115 |
+
weight: 0.3
|
116 |
+
|
117 |
+
slot_classifier:
|
118 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
119 |
+
mode: "slot"
|
120 |
+
input_dim: "{model.encoder.output_dim}"
|
121 |
+
layer_num: 1
|
122 |
+
bidirectional: false
|
123 |
+
force_ratio: 0.9
|
124 |
+
hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
125 |
+
embedding_dim: 128
|
126 |
+
ignore_index: -100
|
127 |
+
dropout_rate: 0.4
|
128 |
+
use_multi: false
|
129 |
+
multi_threshold: 0.5
|
130 |
+
return_sentence_level: false
|
131 |
+
weight: 0.7
|
config/reproduction/mix-snips/gl-gin.yaml
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
multi_intent: true
|
6 |
+
train: true
|
7 |
+
test: true
|
8 |
+
device: cuda
|
9 |
+
seed: 42
|
10 |
+
epoch_num: 50
|
11 |
+
batch_size: 32
|
12 |
+
ignore_index: -100
|
13 |
+
|
14 |
+
|
15 |
+
model_manager:
|
16 |
+
load_dir: null
|
17 |
+
save_dir: save/gl-gin-mix-snips
|
18 |
+
|
19 |
+
evaluator:
|
20 |
+
best_key: EMA
|
21 |
+
eval_by_epoch: false
|
22 |
+
eval_step: 1800
|
23 |
+
metric:
|
24 |
+
- intent_acc
|
25 |
+
- intent_f1
|
26 |
+
- slot_f1
|
27 |
+
- EMA
|
28 |
+
|
29 |
+
dataset:
|
30 |
+
dataset_name: mix-snips
|
31 |
+
|
32 |
+
tokenizer:
|
33 |
+
_tokenizer_name_: word_tokenizer
|
34 |
+
_padding_side_: right
|
35 |
+
_align_mode_: fast
|
36 |
+
add_special_tokens: false
|
37 |
+
max_length: 512
|
38 |
+
|
39 |
+
optimizer:
|
40 |
+
_model_target_: torch.optim.Adam
|
41 |
+
_model_partial_: true
|
42 |
+
lr: 0.001
|
43 |
+
weight_decay: 1e-6
|
44 |
+
|
45 |
+
scheduler:
|
46 |
+
_model_target_: transformers.get_scheduler
|
47 |
+
_model_partial_: true
|
48 |
+
name : "linear"
|
49 |
+
num_warmup_steps: 0
|
50 |
+
|
51 |
+
model:
|
52 |
+
_model_target_: model.OpenSLUModel
|
53 |
+
|
54 |
+
encoder:
|
55 |
+
_model_target_: model.encoder.AutoEncoder
|
56 |
+
encoder_name: self-attention-lstm
|
57 |
+
|
58 |
+
embedding:
|
59 |
+
embedding_dim: 128
|
60 |
+
dropout_rate: 0.4
|
61 |
+
|
62 |
+
lstm:
|
63 |
+
layer_num: 2
|
64 |
+
bidirectional: true
|
65 |
+
output_dim: 256
|
66 |
+
dropout_rate: 0.4
|
67 |
+
|
68 |
+
attention:
|
69 |
+
hidden_dim: 1024
|
70 |
+
output_dim: 128
|
71 |
+
dropout_rate: 0.4
|
72 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
73 |
+
return_with_input: true
|
74 |
+
return_sentence_level_hidden: false
|
75 |
+
|
76 |
+
decoder:
|
77 |
+
_model_target_: model.decoder.GLGINDecoder
|
78 |
+
dropout_rate: 0.4
|
79 |
+
interaction:
|
80 |
+
_model_target_: model.decoder.interaction.GLGINInteraction
|
81 |
+
intent_embedding_dim: 256
|
82 |
+
input_dim: "{model.encoder.output_dim}"
|
83 |
+
hidden_dim: 256
|
84 |
+
output_dim: "{model.decoder.interaction.intent_embedding_dim}"
|
85 |
+
dropout_rate: 0.4
|
86 |
+
alpha: 0.2
|
87 |
+
num_heads: 4
|
88 |
+
num_layers: 2
|
89 |
+
row_normalized: true
|
90 |
+
slot_graph_window: 1
|
91 |
+
intent_label_num: "{base.intent_label_num}"
|
92 |
+
|
93 |
+
intent_classifier:
|
94 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
95 |
+
mode: "token-level-intent"
|
96 |
+
mlp:
|
97 |
+
- _model_target_: torch.nn.Linear
|
98 |
+
in_features: "{model.encoder.output_dim}"
|
99 |
+
out_features: 256
|
100 |
+
- _model_target_: torch.nn.LeakyReLU
|
101 |
+
negative_slope: 0.2
|
102 |
+
- _model_target_: torch.nn.Linear
|
103 |
+
in_features: 256
|
104 |
+
out_features: "{base.intent_label_num}"
|
105 |
+
loss_fn:
|
106 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
107 |
+
dropout_rate: 0.4
|
108 |
+
use_multi: "{base.multi_intent}"
|
109 |
+
multi_threshold: 0.5
|
110 |
+
return_sentence_level: true
|
111 |
+
ignore_index: "{base.ignore_index}"
|
112 |
+
weight: 0.2
|
113 |
+
|
114 |
+
slot_classifier:
|
115 |
+
_model_target_: model.decoder.classifier.MLPClassifier
|
116 |
+
mode: "slot"
|
117 |
+
mlp:
|
118 |
+
- _model_target_: torch.nn.Linear
|
119 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
120 |
+
out_features: "{model.decoder.interaction.output_dim}"
|
121 |
+
- _model_target_: torch.nn.LeakyReLU
|
122 |
+
negative_slope: 0.2
|
123 |
+
- _model_target_: torch.nn.Linear
|
124 |
+
in_features: "{model.decoder.interaction.output_dim}"
|
125 |
+
out_features: "{base.slot_label_num}"
|
126 |
+
ignore_index: "{base.ignore_index}"
|
127 |
+
dropout_rate: 0.4
|
128 |
+
use_multi: false
|
129 |
+
multi_threshold: 0.5
|
130 |
+
weight: 0.8
|
131 |
+
return_sentence_level: false
|
config/reproduction/mix-snips/vanilla.yaml
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base:
|
2 |
+
name: "OpenSLUv1"
|
3 |
+
multi_intent: true
|
4 |
+
train: true
|
5 |
+
test: true
|
6 |
+
device: cuda
|
7 |
+
seed: 42
|
8 |
+
epoch_num: 100
|
9 |
+
batch_size: 16
|
10 |
+
ignore_index: -100
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/vanilla-mix-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- intent_f1
|
23 |
+
- slot_f1
|
24 |
+
- EMA
|
25 |
+
|
26 |
+
dataset:
|
27 |
+
dataset_name: atis
|
28 |
+
|
29 |
+
tokenizer:
|
30 |
+
_tokenizer_name_: word_tokenizer
|
31 |
+
_padding_side_: right
|
32 |
+
_align_mode_: fast
|
33 |
+
add_special_tokens: false
|
34 |
+
max_length: 512
|
35 |
+
|
36 |
+
optimizer:
|
37 |
+
_model_target_: torch.optim.Adam
|
38 |
+
_model_partial_: true
|
39 |
+
lr: 0.001
|
40 |
+
weight_decay: 1e-6
|
41 |
+
|
42 |
+
scheduler:
|
43 |
+
_model_target_: transformers.get_scheduler
|
44 |
+
_model_partial_: true
|
45 |
+
name : "linear"
|
46 |
+
num_warmup_steps: 0
|
47 |
+
|
48 |
+
model:
|
49 |
+
_model_target_: model.OpenSLUModel
|
50 |
+
|
51 |
+
encoder:
|
52 |
+
_model_target_: model.encoder.AutoEncoder
|
53 |
+
encoder_name: self-attention-lstm
|
54 |
+
|
55 |
+
embedding:
|
56 |
+
embedding_dim: 128
|
57 |
+
dropout_rate: 0.4
|
58 |
+
|
59 |
+
lstm:
|
60 |
+
layer_num: 1
|
61 |
+
bidirectional: true
|
62 |
+
output_dim: 256
|
63 |
+
dropout_rate: 0.4
|
64 |
+
|
65 |
+
attention:
|
66 |
+
hidden_dim: 1024
|
67 |
+
output_dim: 128
|
68 |
+
dropout_rate: 0.4
|
69 |
+
output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
|
70 |
+
return_with_input: true
|
71 |
+
return_sentence_level_hidden: true
|
72 |
+
|
73 |
+
decoder:
|
74 |
+
_model_target_: model.decoder.BaseDecoder
|
75 |
+
|
76 |
+
intent_classifier:
|
77 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
78 |
+
mode: "intent"
|
79 |
+
input_dim: "{model.encoder.output_dim}"
|
80 |
+
loss_fn:
|
81 |
+
_model_target_: torch.nn.BCEWithLogitsLoss
|
82 |
+
use_multi: "{base.multi_intent}"
|
83 |
+
multi_threshold: 0.5
|
84 |
+
return_sentence_level: true
|
85 |
+
ignore_index: "{base.ignore_index}"
|
86 |
+
|
87 |
+
|
88 |
+
slot_classifier:
|
89 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
90 |
+
mode: "slot"
|
91 |
+
input_dim: "{model.encoder.output_dim}"
|
92 |
+
use_multi: false
|
93 |
+
multi_threshold: 0.5
|
94 |
+
ignore_index: "{base.ignore_index}"
|
95 |
+
return_sentence_level: false
|
config/reproduction/snips/bi-model.yaml
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/bi-model-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
|
53 |
+
encoder:
|
54 |
+
_model_target_: model.encoder.BiEncoder
|
55 |
+
intent_encoder:
|
56 |
+
_model_target_: model.encoder.AutoEncoder
|
57 |
+
encoder_name: lstm
|
58 |
+
|
59 |
+
embedding:
|
60 |
+
embedding_dim: 256
|
61 |
+
dropout_rate: 0.5
|
62 |
+
|
63 |
+
lstm:
|
64 |
+
dropout_rate: 0.5
|
65 |
+
output_dim: 256
|
66 |
+
layer_num: 2
|
67 |
+
bidirectional: true
|
68 |
+
|
69 |
+
return_with_input: true
|
70 |
+
return_sentence_level_hidden: false
|
71 |
+
|
72 |
+
slot_encoder:
|
73 |
+
_model_target_: model.encoder.AutoEncoder
|
74 |
+
encoder_name: lstm
|
75 |
+
|
76 |
+
embedding:
|
77 |
+
embedding_dim: 256
|
78 |
+
dropout_rate: 0.5
|
79 |
+
|
80 |
+
lstm:
|
81 |
+
dropout_rate: 0.5
|
82 |
+
output_dim: 256
|
83 |
+
layer_num: 2
|
84 |
+
bidirectional: true
|
85 |
+
|
86 |
+
return_with_input: true
|
87 |
+
return_sentence_level_hidden: false
|
88 |
+
|
89 |
+
decoder:
|
90 |
+
_model_target_: model.decoder.BaseDecoder
|
91 |
+
interaction:
|
92 |
+
_model_target_: model.decoder.interaction.BiModelInteraction
|
93 |
+
output_dim: 256
|
94 |
+
dropout_rate: 0.5
|
95 |
+
|
96 |
+
intent_classifier:
|
97 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
98 |
+
mode: "intent"
|
99 |
+
ignore_index: -100
|
100 |
+
|
101 |
+
slot_classifier:
|
102 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
103 |
+
mode: "slot"
|
104 |
+
ignore_index: -100
|
config/reproduction/snips/dca_net.yaml
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/dca-net-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: lstm
|
55 |
+
|
56 |
+
embedding:
|
57 |
+
load_embedding_name: glove.6B.300d.txt
|
58 |
+
embedding_dim: 300
|
59 |
+
dropout_rate: 0.4
|
60 |
+
|
61 |
+
lstm:
|
62 |
+
dropout_rate: 0.4
|
63 |
+
output_dim: 128
|
64 |
+
layer_num: 2
|
65 |
+
bidirectional: true
|
66 |
+
output_dim: "{model.encoder.lstm.output_dim}"
|
67 |
+
return_with_input: true
|
68 |
+
return_sentence_level_hidden: false
|
69 |
+
|
70 |
+
decoder:
|
71 |
+
_model_target_: model.decoder.DCANetDecoder
|
72 |
+
interaction:
|
73 |
+
_model_target_: model.decoder.interaction.DCANetInteraction
|
74 |
+
output_dim: "{model.encoder.output_dim}"
|
75 |
+
attention_dropout: 0.4
|
76 |
+
num_attention_heads: 8
|
77 |
+
|
78 |
+
intent_classifier:
|
79 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
80 |
+
mode: "intent"
|
81 |
+
input_dim: "{model.encoder.output_dim}"
|
82 |
+
ignore_index: -100
|
83 |
+
|
84 |
+
slot_classifier:
|
85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
86 |
+
mode: "slot"
|
87 |
+
input_dim: "{model.encoder.output_dim}"
|
88 |
+
ignore_index: -100
|
config/reproduction/snips/deberta.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 32
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/deberta-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: microsoft/deberta-v3-base
|
33 |
+
_padding_side_: right
|
34 |
+
add_special_tokens: true
|
35 |
+
max_length: 512
|
36 |
+
|
37 |
+
optimizer:
|
38 |
+
_model_target_: torch.optim.AdamW
|
39 |
+
_model_partial_: true
|
40 |
+
lr: 2e-5
|
41 |
+
weight_decay: 1e-8
|
42 |
+
|
43 |
+
scheduler:
|
44 |
+
_model_target_: transformers.get_scheduler
|
45 |
+
_model_partial_: true
|
46 |
+
name : "linear"
|
47 |
+
num_warmup_steps: 0
|
48 |
+
|
49 |
+
model:
|
50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
51 |
+
ignore_index: -100
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: microsoft/deberta-v3-base
|
55 |
+
output_dim: 768
|
56 |
+
return_with_input: true
|
57 |
+
return_sentence_level_hidden: true
|
58 |
+
|
59 |
+
decoder:
|
60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
61 |
+
intent_classifier:
|
62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
63 |
+
mode: "intent"
|
64 |
+
ignore_index: -100
|
65 |
+
|
66 |
+
|
67 |
+
slot_classifier:
|
68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
69 |
+
mode: "slot"
|
70 |
+
ignore_index: -100
|
config/reproduction/snips/electra.yaml
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
base:
|
3 |
+
name: "OpenSLUv1"
|
4 |
+
train: true
|
5 |
+
test: true
|
6 |
+
device: cuda
|
7 |
+
seed: 42
|
8 |
+
epoch_num: 300
|
9 |
+
batch_size: 32
|
10 |
+
|
11 |
+
model_manager:
|
12 |
+
load_dir: null
|
13 |
+
save_dir: save/electra-snips
|
14 |
+
|
15 |
+
evaluator:
|
16 |
+
best_key: EMA
|
17 |
+
eval_by_epoch: true
|
18 |
+
# eval_step: 1800
|
19 |
+
metric:
|
20 |
+
- intent_acc
|
21 |
+
- slot_f1
|
22 |
+
- EMA
|
23 |
+
|
24 |
+
accelerator:
|
25 |
+
use_accelerator: false
|
26 |
+
|
27 |
+
dataset:
|
28 |
+
dataset_name: snips
|
29 |
+
|
30 |
+
tokenizer:
|
31 |
+
_tokenizer_name_: google/electra-small-discriminator
|
32 |
+
_padding_side_: right
|
33 |
+
add_special_tokens: true
|
34 |
+
max_length: 512
|
35 |
+
|
36 |
+
optimizer:
|
37 |
+
_model_target_: torch.optim.AdamW
|
38 |
+
_model_partial_: true
|
39 |
+
lr: 2e-5
|
40 |
+
weight_decay: 1e-8
|
41 |
+
|
42 |
+
scheduler:
|
43 |
+
_model_target_: transformers.get_scheduler
|
44 |
+
_model_partial_: true
|
45 |
+
name : "linear"
|
46 |
+
num_warmup_steps: 0
|
47 |
+
|
48 |
+
model:
|
49 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
50 |
+
ignore_index: -100
|
51 |
+
encoder:
|
52 |
+
_model_target_: model.encoder.AutoEncoder
|
53 |
+
encoder_name: google/electra-small-discriminator
|
54 |
+
output_dim: 256
|
55 |
+
return_with_input: true
|
56 |
+
return_sentence_level_hidden: true
|
57 |
+
|
58 |
+
decoder:
|
59 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
60 |
+
intent_classifier:
|
61 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
62 |
+
mode: "intent"
|
63 |
+
ignore_index: -100
|
64 |
+
|
65 |
+
|
66 |
+
slot_classifier:
|
67 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
68 |
+
mode: "slot"
|
69 |
+
ignore_index: -100
|
config/reproduction/snips/joint-bert.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 128
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/joint-bert-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
metric:
|
32 |
+
- intent_acc
|
33 |
+
- slot_f1
|
34 |
+
- EMA
|
35 |
+
|
36 |
+
tokenizer:
|
37 |
+
_tokenizer_name_: bert-base-uncased
|
38 |
+
_padding_side_: right
|
39 |
+
_align_mode_: general
|
40 |
+
add_special_tokens: true
|
41 |
+
|
42 |
+
optimizer:
|
43 |
+
_model_target_: torch.optim.AdamW
|
44 |
+
_model_partial_: true
|
45 |
+
lr: 4e-6
|
46 |
+
weight_decay: 1e-8
|
47 |
+
|
48 |
+
scheduler:
|
49 |
+
_model_target_: transformers.get_scheduler
|
50 |
+
_model_partial_: true
|
51 |
+
name : "linear"
|
52 |
+
num_warmup_steps: 0
|
53 |
+
|
54 |
+
model:
|
55 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
56 |
+
ignore_index: -100
|
57 |
+
encoder:
|
58 |
+
_model_target_: model.encoder.AutoEncoder
|
59 |
+
encoder_name: bert-base-uncased
|
60 |
+
output_dim: 768
|
61 |
+
return_with_input: true
|
62 |
+
return_sentence_level_hidden: true
|
63 |
+
|
64 |
+
decoder:
|
65 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
66 |
+
intent_classifier:
|
67 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
68 |
+
mode: "intent"
|
69 |
+
ignore_index: -100
|
70 |
+
|
71 |
+
|
72 |
+
slot_classifier:
|
73 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
74 |
+
mode: "slot"
|
75 |
+
ignore_index: -100
|
config/reproduction/snips/roberta.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 32
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/roberta-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: roberta-base
|
33 |
+
_padding_side_: right
|
34 |
+
add_special_tokens: true
|
35 |
+
max_length: 512
|
36 |
+
|
37 |
+
optimizer:
|
38 |
+
_model_target_: torch.optim.AdamW
|
39 |
+
_model_partial_: true
|
40 |
+
lr: 2e-5
|
41 |
+
weight_decay: 1e-8
|
42 |
+
|
43 |
+
scheduler:
|
44 |
+
_model_target_: transformers.get_scheduler
|
45 |
+
_model_partial_: true
|
46 |
+
name : "linear"
|
47 |
+
num_warmup_steps: 0
|
48 |
+
|
49 |
+
model:
|
50 |
+
_model_target_: model.open_slu_model.OpenSLUModel
|
51 |
+
ignore_index: -100
|
52 |
+
encoder:
|
53 |
+
_model_target_: model.encoder.AutoEncoder
|
54 |
+
encoder_name: roberta-base
|
55 |
+
output_dim: 768
|
56 |
+
return_with_input: true
|
57 |
+
return_sentence_level_hidden: true
|
58 |
+
|
59 |
+
decoder:
|
60 |
+
_model_target_: model.decoder.base_decoder.BaseDecoder
|
61 |
+
intent_classifier:
|
62 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
63 |
+
mode: "intent"
|
64 |
+
ignore_index: -100
|
65 |
+
|
66 |
+
|
67 |
+
slot_classifier:
|
68 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
69 |
+
mode: "slot"
|
70 |
+
ignore_index: -100
|
config/reproduction/snips/slot-gated.yaml
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "NVIDIA GeForce RTX 2080 Ti"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/slot-gated-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
ignore_index: -100
|
53 |
+
encoder:
|
54 |
+
_model_target_: model.encoder.AutoEncoder
|
55 |
+
encoder_name: lstm
|
56 |
+
|
57 |
+
embedding:
|
58 |
+
embedding_dim: 256
|
59 |
+
dropout_rate: 0.4
|
60 |
+
|
61 |
+
lstm:
|
62 |
+
dropout_rate: 0.5
|
63 |
+
output_dim: 256
|
64 |
+
layer_num: 2
|
65 |
+
bidirectional: true
|
66 |
+
|
67 |
+
return_with_input: true
|
68 |
+
return_sentence_level_hidden: false
|
69 |
+
|
70 |
+
decoder:
|
71 |
+
_model_target_: model.decoder.BaseDecoder
|
72 |
+
|
73 |
+
interaction:
|
74 |
+
_model_target_: model.decoder.interaction.SlotGatedInteraction
|
75 |
+
remove_slot_attn: false
|
76 |
+
output_dim: 256
|
77 |
+
dropout_rate: 0.4
|
78 |
+
|
79 |
+
intent_classifier:
|
80 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
81 |
+
mode: "intent"
|
82 |
+
ignore_index: -100
|
83 |
+
|
84 |
+
slot_classifier:
|
85 |
+
_model_target_: model.decoder.classifier.LinearClassifier
|
86 |
+
mode: "slot"
|
87 |
+
ignore_index: -100
|
config/reproduction/snips/stack-propagation.yaml
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
device: "Tesla V100-SXM2-16GB"
|
2 |
+
|
3 |
+
base:
|
4 |
+
name: "OpenSLUv1"
|
5 |
+
train: true
|
6 |
+
test: true
|
7 |
+
device: cuda
|
8 |
+
seed: 42
|
9 |
+
epoch_num: 300
|
10 |
+
batch_size: 16
|
11 |
+
|
12 |
+
model_manager:
|
13 |
+
load_dir: null
|
14 |
+
save_dir: save/stack-propagation-snips
|
15 |
+
|
16 |
+
evaluator:
|
17 |
+
best_key: EMA
|
18 |
+
eval_by_epoch: true
|
19 |
+
# eval_step: 1800
|
20 |
+
metric:
|
21 |
+
- intent_acc
|
22 |
+
- slot_f1
|
23 |
+
- EMA
|
24 |
+
|
25 |
+
accelerator:
|
26 |
+
use_accelerator: false
|
27 |
+
|
28 |
+
dataset:
|
29 |
+
dataset_name: snips
|
30 |
+
|
31 |
+
tokenizer:
|
32 |
+
_tokenizer_name_: word_tokenizer
|
33 |
+
_padding_side_: right
|
34 |
+
_align_mode_: fast
|
35 |
+
add_special_tokens: false
|
36 |
+
max_length: 512
|
37 |
+
|
38 |
+
optimizer:
|
39 |
+
_model_target_: torch.optim.Adam
|
40 |
+
_model_partial_: true
|
41 |
+
lr: 0.001
|
42 |
+
weight_decay: 1e-6
|
43 |
+
|
44 |
+
scheduler:
|
45 |
+
_model_target_: transformers.get_scheduler
|
46 |
+
_model_partial_: true
|
47 |
+
name : "linear"
|
48 |
+
num_warmup_steps: 0
|
49 |
+
|
50 |
+
model:
|
51 |
+
_model_target_: model.OpenSLUModel
|
52 |
+
|
53 |
+
encoder:
|
54 |
+
_model_target_: model.encoder.AutoEncoder
|
55 |
+
encoder_name: self-attention-lstm
|
56 |
+
|
57 |
+
embedding:
|
58 |
+
embedding_dim: 256
|
59 |
+
dropout_rate: 0.4
|
60 |
+
|
61 |
+
lstm:
|
62 |
+
layer_num: 1
|
63 |
+
bidirectional: true
|
64 |
+
output_dim: 256
|
65 |
+
dropout_rate: 0.4
|
66 |
+
|
67 |
+
attention:
|
68 |
+
hidden_dim: 1024
|
69 |
+
output_dim: 128
|
70 |
+
dropout_rate: 0.4
|
71 |
+
|
72 |
+
return_with_input: true
|
73 |
+
return_sentence_level_hidden: false
|
74 |
+
|
75 |
+
decoder:
|
76 |
+
_model_target_: model.decoder.StackPropagationDecoder
|
77 |
+
interaction:
|
78 |
+
_model_target_: model.decoder.interaction.StackInteraction
|
79 |
+
differentiable: false
|
80 |
+
|
81 |
+
intent_classifier:
|
82 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
83 |
+
layer_num: 1
|
84 |
+
bidirectional: false
|
85 |
+
force_ratio: 0.9
|
86 |
+
hidden_dim: 64
|
87 |
+
embedding_dim: 8
|
88 |
+
ignore_index: -100
|
89 |
+
dropout_rate: 0.4
|
90 |
+
mode: "token-level-intent"
|
91 |
+
use_multi: false
|
92 |
+
return_sentence_level: true
|
93 |
+
|
94 |
+
slot_classifier:
|
95 |
+
_model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
|
96 |
+
layer_num: 1
|
97 |
+
bidirectional: false
|
98 |
+
force_ratio: 0.9
|
99 |
+
hidden_dim: 64
|
100 |
+
embedding_dim: 32
|
101 |
+
ignore_index: -100
|
102 |
+
dropout_rate: 0.4
|
103 |
+
mode: "slot"
|
104 |
+
use_multi: false
|
105 |
+
return_sentence_level: false
|
config/visual.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
host: 127.0.0.1
|
2 |
+
port: 7861
|
3 |
+
|
4 |
+
is_push_to_public: true
|
5 |
+
output_path: save/stack/outputs.jsonl
|
6 |
+
page-size: 2
|
model/decoder/base_decoder.py
CHANGED
@@ -16,7 +16,7 @@ class BaseDecoder(nn.Module):
|
|
16 |
|
17 |
Notice: t is often only necessary to change this module and its sub-modules
|
18 |
"""
|
19 |
-
def __init__(self, intent_classifier, slot_classifier, interaction=None):
|
20 |
super().__init__()
|
21 |
self.intent_classifier = intent_classifier
|
22 |
self.slot_classifier = slot_classifier
|
@@ -33,7 +33,13 @@ class BaseDecoder(nn.Module):
|
|
33 |
"""
|
34 |
if self.interaction is not None:
|
35 |
hidden = self.interaction(hidden)
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def decode(self, output: OutputData, target: InputData = None):
|
39 |
"""decode output logits
|
@@ -45,7 +51,12 @@ class BaseDecoder(nn.Module):
|
|
45 |
Returns:
|
46 |
List: decoded sequence ids
|
47 |
"""
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def compute_loss(self, pred: OutputData, target: InputData, compute_intent_loss=True, compute_slot_loss=True):
|
51 |
"""compute loss.
|
@@ -60,16 +71,18 @@ class BaseDecoder(nn.Module):
|
|
60 |
Returns:
|
61 |
Tensor: loss result
|
62 |
"""
|
63 |
-
intent_loss = self.intent_classifier.compute_loss(pred, target) if compute_intent_loss else None
|
64 |
-
slot_loss = self.slot_classifier.compute_loss(pred, target) if compute_slot_loss else None
|
65 |
-
slot_weight = self.slot_classifier.config.get("weight")
|
66 |
-
slot_weight = slot_weight if slot_weight is not None else 1.
|
67 |
-
intent_weight = self.intent_classifier.config.get("weight")
|
68 |
-
intent_weight = intent_weight if intent_weight is not None else 1.
|
69 |
loss = 0
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
71 |
loss += intent_loss * intent_weight
|
72 |
-
if
|
|
|
|
|
|
|
73 |
loss += slot_loss * slot_weight
|
74 |
return loss, intent_loss, slot_loss
|
75 |
|
|
|
16 |
|
17 |
Notice: t is often only necessary to change this module and its sub-modules
|
18 |
"""
|
19 |
+
def __init__(self, intent_classifier=None, slot_classifier=None, interaction=None):
|
20 |
super().__init__()
|
21 |
self.intent_classifier = intent_classifier
|
22 |
self.slot_classifier = slot_classifier
|
|
|
33 |
"""
|
34 |
if self.interaction is not None:
|
35 |
hidden = self.interaction(hidden)
|
36 |
+
intent = None
|
37 |
+
slot = None
|
38 |
+
if self.intent_classifier is not None:
|
39 |
+
intent = self.intent_classifier(hidden)
|
40 |
+
if self.slot_classifier is not None:
|
41 |
+
slot = self.slot_classifier(hidden)
|
42 |
+
return OutputData(intent, slot)
|
43 |
|
44 |
def decode(self, output: OutputData, target: InputData = None):
|
45 |
"""decode output logits
|
|
|
51 |
Returns:
|
52 |
List: decoded sequence ids
|
53 |
"""
|
54 |
+
intent, slot = None, None
|
55 |
+
if self.intent_classifier is not None:
|
56 |
+
intent = self.intent_classifier.decode(output, target)
|
57 |
+
if self.slot_classifier is not None:
|
58 |
+
slot = self.slot_classifier.decode(output, target)
|
59 |
+
return OutputData(intent, slot)
|
60 |
|
61 |
def compute_loss(self, pred: OutputData, target: InputData, compute_intent_loss=True, compute_slot_loss=True):
|
62 |
"""compute loss.
|
|
|
71 |
Returns:
|
72 |
Tensor: loss result
|
73 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
loss = 0
|
75 |
+
intent_loss = None
|
76 |
+
slot_loss = None
|
77 |
+
if self.intent_classifier is not None:
|
78 |
+
intent_loss = self.intent_classifier.compute_loss(pred, target) if compute_intent_loss else None
|
79 |
+
intent_weight = self.intent_classifier.config.get("weight")
|
80 |
+
intent_weight = intent_weight if intent_weight is not None else 1.
|
81 |
loss += intent_loss * intent_weight
|
82 |
+
if self.slot_classifier is not None:
|
83 |
+
slot_loss = self.slot_classifier.compute_loss(pred, target) if compute_slot_loss else None
|
84 |
+
slot_weight = self.slot_classifier.config.get("weight")
|
85 |
+
slot_weight = slot_weight if slot_weight is not None else 1.
|
86 |
loss += slot_loss * slot_weight
|
87 |
return loss, intent_loss, slot_loss
|
88 |
|
model/encoder/auto_encoder.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-
|
6 |
Description:
|
7 |
|
8 |
'''
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-18 19:33:34
|
6 |
Description:
|
7 |
|
8 |
'''
|
model/encoder/non_pretrained_encoder.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-
|
6 |
Description: non-pretrained encoder model
|
7 |
|
8 |
'''
|
@@ -50,7 +50,7 @@ class NonPretrainedEncoder(BaseEncoder):
|
|
50 |
# Embedding Initialization
|
51 |
embed_config = config["embedding"]
|
52 |
self.__embedding_dim = embed_config["embedding_dim"]
|
53 |
-
if embed_config.get("load_embedding_name"):
|
54 |
self.__embedding_layer = nn.Embedding.from_pretrained(embed_config["embedding_matrix"], padding_idx=0)
|
55 |
else:
|
56 |
self.__embedding_layer = nn.Embedding(
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-17 21:08:19
|
6 |
Description: non-pretrained encoder model
|
7 |
|
8 |
'''
|
|
|
50 |
# Embedding Initialization
|
51 |
embed_config = config["embedding"]
|
52 |
self.__embedding_dim = embed_config["embedding_dim"]
|
53 |
+
if embed_config.get("load_embedding_name") and embed_config.get("embedding_matrix"):
|
54 |
self.__embedding_layer = nn.Embedding.from_pretrained(embed_config["embedding_matrix"], padding_idx=0)
|
55 |
else:
|
56 |
self.__embedding_layer = nn.Embedding(
|
model/encoder/pretrained_encoder.py
CHANGED
@@ -2,11 +2,12 @@
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
-
LastEditTime: 2023-
|
6 |
Description: pretrained encoder model
|
7 |
|
8 |
'''
|
9 |
-
from transformers import AutoModel
|
|
|
10 |
|
11 |
from common.utils import InputData, HiddenData
|
12 |
from model.encoder.base_encoder import BaseEncoder
|
@@ -21,7 +22,11 @@ class PretrainedEncoder(BaseEncoder):
|
|
21 |
encoder_name (str): pretrained model name in hugging face.
|
22 |
"""
|
23 |
super().__init__(**config)
|
24 |
-
self.
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def forward(self, inputs: InputData):
|
27 |
output = self.encoder(**inputs.get_inputs())
|
|
|
2 |
Author: Qiguang Chen
|
3 |
Date: 2023-01-11 10:39:26
|
4 |
LastEditors: Qiguang Chen
|
5 |
+
LastEditTime: 2023-02-18 17:38:30
|
6 |
Description: pretrained encoder model
|
7 |
|
8 |
'''
|
9 |
+
from transformers import AutoModel, AutoConfig
|
10 |
+
from common import utils
|
11 |
|
12 |
from common.utils import InputData, HiddenData
|
13 |
from model.encoder.base_encoder import BaseEncoder
|
|
|
22 |
encoder_name (str): pretrained model name in hugging face.
|
23 |
"""
|
24 |
super().__init__(**config)
|
25 |
+
if self.config.get("_is_check_point_"):
|
26 |
+
self.encoder = utils.instantiate(config["pretrained_model"], target="_pretrained_model_target_")
|
27 |
+
# print(self.encoder)
|
28 |
+
else:
|
29 |
+
self.encoder = AutoModel.from_pretrained(config["encoder_name"])
|
30 |
|
31 |
def forward(self, inputs: InputData):
|
32 |
output = self.encoder(**inputs.get_inputs())
|