ktagowski commited on
Commit
0689246
1 Parent(s): c57ae3b

feat: Add model files

Browse files
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - pl
4
+ license: mit
5
+ tags:
6
+ - bert
7
+ - sentiment-classification
8
+ - clarinpl-embeddings
9
+ - LEPISZCZE
10
+ datasets:
11
+ - clarin-pl/aspectemo
12
+ metrics:
13
+ - accuracy
14
+ - f1
15
+ - precision
16
+ - recall
17
+ ---
18
+
19
+ # LEPISZCZE-aspectemo-allegro__herbert-base-cased-v1
20
+ ## Description
21
+ Finetuned [allegro/herbert-base-cased](https://huggingface.co/allegro/herbert-base-cased) model on [clarin-pl/aspectemo](https://huggingface.co/datasets/clarin-pl/aspectemo) dataset.
22
+
23
+ Trained via [clarin-pl-embeddings](https://github.com/clarin-pl/embeddings) library, included in [LEPISZCZE](https://lepiszcze.ml/tasks/sentimentanalysis/) benchmark.
24
+
25
+ ## Results on clarin-pl/aspectemo
26
+ | | accuracy | f1_macro | f1_micro | f1_weighted | recall_macro | recall_micro | recall_weighted | precision_macro | precision_micro | precision_weighted |
27
+ |:------|-----------:|-----------:|-----------:|--------------:|---------------:|---------------:|------------------:|------------------:|------------------:|---------------------:|
28
+ | value | 0.952 | 0.368 | 0.585 | 0.586 | 0.371 | 0.566 | 0.566 | 0.392 | 0.606 | 0.617 |
29
+
30
+ ### Metrics per class
31
+ | | precision | recall | f1 | support |
32
+ |:----------|------------:|---------:|------:|----------:|
33
+ | a_amb | 0.2 | 0.033 | 0.057 | 91 |
34
+ | a_minus_m | 0.632 | 0.542 | 0.584 | 1033 |
35
+ | a_minus_s | 0.156 | 0.209 | 0.178 | 67 |
36
+ | a_plus_m | 0.781 | 0.694 | 0.735 | 1015 |
37
+ | a_plus_s | 0.153 | 0.22 | 0.18 | 41 |
38
+ | a_zero | 0.431 | 0.529 | 0.475 | 501 |
39
+
40
+ ## Finetuning hyperparameters
41
+ | Hyperparameter Name | Value |
42
+ |:------------------------|:---------|
43
+ | use_scheduler | True |
44
+ | optimizer | AdamW |
45
+ | warmup_steps | 25 |
46
+ | learning_rate | 0.0005 |
47
+ | adam_epsilon | 1e-05 |
48
+ | weight_decay | 0 |
49
+ | finetune_last_n_layers | 4 |
50
+ | classifier_dropout | 0.2 |
51
+ | max_seq_length | 512 |
52
+ | batch_size | 64 |
53
+ | max_epochs | 20 |
54
+ | early_stopping_monitor | val/Loss |
55
+ | early_stopping_mode | min |
56
+ | early_stopping_patience | 3 |
57
+
58
+ ## Citation (BibTeX)
59
+ ```
60
+ @article{augustyniak2022way,
61
+ title={This is the way: designing and compiling LEPISZCZE, a comprehensive NLP benchmark for Polish},
62
+ author={Augustyniak, Lukasz and Tagowski, Kamil and Sawczyn, Albert and Janiak, Denis and Bartusiak, Roman and Szymczak, Adrian and Janz, Arkadiusz and Szyma{'n}ski, Piotr and W{\k{a}}troba, Marcin and Morzy, Miko{\l}aj and others},
63
+ journal={Advances in Neural Information Processing Systems},
64
+ volume={35},
65
+ pages={21805--21818},
66
+ year={2022}
67
+ }
68
+ ```
69
+
70
+
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allegro/herbert-base-cased",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": 0.2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "O",
13
+ "1": "a_minus_m",
14
+ "2": "a_minus_s",
15
+ "3": "a_zero",
16
+ "4": "a_plus_s",
17
+ "5": "a_plus_m",
18
+ "6": "a_amb"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "O": 0,
24
+ "a_amb": 6,
25
+ "a_minus_m": 1,
26
+ "a_minus_s": 2,
27
+ "a_plus_m": 5,
28
+ "a_plus_s": 4,
29
+ "a_zero": 3
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 514,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 1,
37
+ "position_embedding_type": "absolute",
38
+ "tokenizer_class": "HerbertTokenizerFast",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.28.1",
41
+ "type_vocab_size": 2,
42
+ "use_cache": true,
43
+ "vocab_size": 50000
44
+ }
hparams.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "optimizer":"AdamW",
3
+ "learning_rate":0.0005,
4
+ "adam_epsilon":0.00001,
5
+ "warmup_steps":25,
6
+ "weight_decay":0,
7
+ "train_batch_size":64,
8
+ "eval_batch_size":64,
9
+ "use_scheduler":true,
10
+ "metrics":null,
11
+ "model_name_or_path":"allegro/herbert-base-cased",
12
+ "num_classes":7,
13
+ "finetune_last_n_layers":4,
14
+ "config_kwargs":{
15
+ "classifier_dropout":0.2
16
+ },
17
+ "task_model_kwargs":{
18
+ "adam_epsilon":0.00001,
19
+ "eval_batch_size":64,
20
+ "learning_rate":0.0005,
21
+ "optimizer":"AdamW",
22
+ "train_batch_size":64,
23
+ "use_scheduler":true,
24
+ "warmup_steps":25,
25
+ "weight_decay":0
26
+ },
27
+ "model_compile_kwargs":null,
28
+ "evaluation_mode":"unit",
29
+ "tagging_scheme":null,
30
+ "ignore_index":-100,
31
+ "downstream_model_type":"AutoModelForTokenClassification"
32
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
packages.json ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "absl-py==1.4.0",
3
+ "aiobotocore==2.5.0",
4
+ "aiohttp-retry==2.8.3",
5
+ "aiohttp==3.8.4",
6
+ "aioitertools==0.11.0",
7
+ "aiosignal==1.3.1",
8
+ "alembic==1.10.4",
9
+ "amqp==5.1.1",
10
+ "annoy==1.17.2",
11
+ "antlr4-python3-runtime==4.9.3",
12
+ "anyio==3.6.2",
13
+ "appdirs==1.4.4",
14
+ "argon2-cffi-bindings==21.2.0",
15
+ "argon2-cffi==21.3.0",
16
+ "arrow==1.2.3",
17
+ "asttokens==2.2.1",
18
+ "async-timeout==4.0.2",
19
+ "asyncssh==2.13.1",
20
+ "atpublic==3.1.1",
21
+ "attrs==23.1.0",
22
+ "backcall==0.2.0",
23
+ "beautifulsoup4==4.12.2",
24
+ "billiard==3.6.4.0",
25
+ "black==21.10b0",
26
+ "bleach==6.0.0",
27
+ "blessed==1.20.0",
28
+ "boto3==1.26.76",
29
+ "botocore==1.29.76",
30
+ "cachecontrol==0.12.11",
31
+ "cachetools==5.3.0",
32
+ "catalogue==2.0.8",
33
+ "celery==5.2.7",
34
+ "certifi==2022.12.7",
35
+ "cffi==1.15.1",
36
+ "charset-normalizer==3.1.0",
37
+ "clarinpl-embeddings==0.3.0",
38
+ "cleo==2.0.1",
39
+ "click-didyoumean==0.3.0",
40
+ "click-plugins==1.1.1",
41
+ "click-repl==0.2.0",
42
+ "click==8.0.4",
43
+ "cloudpickle==2.2.1",
44
+ "cmaes==0.9.1",
45
+ "cmake==3.26.3",
46
+ "colorama==0.4.6",
47
+ "coloredlogs==15.0.1",
48
+ "colorlog==6.7.0",
49
+ "comm==0.1.3",
50
+ "configobj==5.0.8",
51
+ "contourpy==1.0.7",
52
+ "coverage==6.5.0",
53
+ "crashtest==0.4.1",
54
+ "croniter==1.3.14",
55
+ "cryptography==40.0.2",
56
+ "cycler==0.11.0",
57
+ "databricks-cli==0.17.6",
58
+ "datasets==2.11.0",
59
+ "dateutils==0.6.12",
60
+ "debugpy==1.6.7",
61
+ "decorator==5.1.1",
62
+ "deepdiff==6.3.0",
63
+ "defusedxml==0.7.1",
64
+ "dictdiffer==0.9.0",
65
+ "dill==0.3.6",
66
+ "diskcache==5.6.1",
67
+ "distlib==0.3.6",
68
+ "distro==1.8.0",
69
+ "docker-pycreds==0.4.0",
70
+ "docker==6.0.1",
71
+ "dpath==2.1.5",
72
+ "dulwich==0.21.3",
73
+ "dvc-data==0.47.2",
74
+ "dvc-http==2.30.2",
75
+ "dvc-objects==0.21.2",
76
+ "dvc-render==0.3.1",
77
+ "dvc-s3==2.22.0",
78
+ "dvc-studio-client==0.8.0",
79
+ "dvc-task==0.2.1",
80
+ "dvc==2.55.0",
81
+ "entrypoints==0.4",
82
+ "evaluate==0.4.0",
83
+ "executing==1.2.0",
84
+ "fastapi==0.88.0",
85
+ "fastjsonschema==2.16.3",
86
+ "filelock==3.12.0",
87
+ "flask==2.2.4",
88
+ "flatbuffers==23.3.3",
89
+ "flatten-dict==0.4.2",
90
+ "flufl.lock==7.1.1",
91
+ "fonttools==4.39.3",
92
+ "fqdn==1.5.1",
93
+ "frozenlist==1.3.3",
94
+ "fsspec==2023.4.0",
95
+ "funcy==2.0",
96
+ "gitdb==4.0.10",
97
+ "gitpython==3.1.31",
98
+ "google-auth-oauthlib==1.0.0",
99
+ "google-auth==2.17.3",
100
+ "grandalf==0.8",
101
+ "greenlet==2.0.2",
102
+ "grpcio==1.54.0",
103
+ "gunicorn==20.1.0",
104
+ "h11==0.14.0",
105
+ "html5lib==1.1",
106
+ "huggingface-hub==0.14.1",
107
+ "humanfriendly==10.0",
108
+ "hydra-core==1.3.2",
109
+ "idna==3.4",
110
+ "importlib-metadata==6.6.0",
111
+ "importlib-resources==5.12.0",
112
+ "iniconfig==2.0.0",
113
+ "inquirer==3.1.3",
114
+ "ipykernel==6.22.0",
115
+ "ipython-genutils==0.2.0",
116
+ "ipython==8.12.0",
117
+ "ipywidgets==8.0.6",
118
+ "isoduration==20.11.0",
119
+ "isort==5.10.1",
120
+ "iterative-telemetry==0.0.8",
121
+ "itsdangerous==2.1.2",
122
+ "jaraco.classes==3.2.3",
123
+ "jedi==0.18.2",
124
+ "jeepney==0.8.0",
125
+ "jinja2==3.1.2",
126
+ "jmespath==1.0.1",
127
+ "joblib==1.2.0",
128
+ "jsonpointer==2.3",
129
+ "jsonschema==4.17.3",
130
+ "jupyter-client==8.2.0",
131
+ "jupyter-console==6.6.3",
132
+ "jupyter-core==5.3.0",
133
+ "jupyter-events==0.6.3",
134
+ "jupyter-server-terminals==0.4.4",
135
+ "jupyter-server==2.5.0",
136
+ "jupyter==1.0.0",
137
+ "jupyterlab-pygments==0.2.2",
138
+ "jupyterlab-widgets==3.0.7",
139
+ "keyring==23.13.1",
140
+ "kiwisolver==1.4.4",
141
+ "kombu==5.2.4",
142
+ "leaderboard==0.0.1",
143
+ "lightning-cloud==0.5.34",
144
+ "lightning-utilities==0.8.0",
145
+ "lightning==2.0.1",
146
+ "lit==16.0.2",
147
+ "lockfile==0.12.2",
148
+ "lz4==4.3.2",
149
+ "mako==1.2.4",
150
+ "markdown-it-py==2.2.0",
151
+ "markdown==3.4.3",
152
+ "markupsafe==2.1.2",
153
+ "matplotlib-inline==0.1.6",
154
+ "matplotlib==3.7.1",
155
+ "mdurl==0.1.2",
156
+ "mistune==2.0.5",
157
+ "mlflow==2.3.0",
158
+ "mpmath==1.3.0",
159
+ "msgpack==1.0.4",
160
+ "multidict==6.0.4",
161
+ "multiprocess==0.70.14",
162
+ "mypy-extensions==1.0.0",
163
+ "mypy==0.950",
164
+ "nanotime==0.5.2",
165
+ "nbclassic==0.5.6",
166
+ "nbclient==0.7.4",
167
+ "nbconvert==7.3.1",
168
+ "nbformat==5.8.0",
169
+ "nest-asyncio==1.5.6",
170
+ "networkx==3.1",
171
+ "notebook-shim==0.2.3",
172
+ "notebook==6.5.4",
173
+ "numpy==1.23.4",
174
+ "nvidia-cublas-cu11==11.10.3.66",
175
+ "nvidia-cuda-cupti-cu11==11.7.101",
176
+ "nvidia-cuda-nvrtc-cu11==11.7.99",
177
+ "nvidia-cuda-runtime-cu11==11.7.99",
178
+ "nvidia-cudnn-cu11==8.5.0.96",
179
+ "nvidia-cufft-cu11==10.9.0.58",
180
+ "nvidia-curand-cu11==10.2.10.91",
181
+ "nvidia-cusolver-cu11==11.4.0.1",
182
+ "nvidia-cusparse-cu11==11.7.4.91",
183
+ "nvidia-nccl-cu11==2.14.3",
184
+ "nvidia-nvtx-cu11==11.7.91",
185
+ "oauthlib==3.2.2",
186
+ "omegaconf==2.3.0",
187
+ "onnx==1.13.1",
188
+ "onnxconverter-common==1.13.0",
189
+ "onnxruntime-tools==1.7.0",
190
+ "onnxruntime==1.14.1",
191
+ "optuna==3.1.1",
192
+ "ordered-set==4.1.0",
193
+ "orjson==3.8.10",
194
+ "packaging==23.1",
195
+ "pandas==2.0.1",
196
+ "pandocfilters==1.5.0",
197
+ "parso==0.8.3",
198
+ "pastel==0.2.1",
199
+ "pathspec==0.11.1",
200
+ "pathtools==0.1.2",
201
+ "pexpect==4.8.0",
202
+ "pickleshare==0.7.5",
203
+ "pillow==9.5.0",
204
+ "pip==22.3.1",
205
+ "pkginfo==1.9.6",
206
+ "platformdirs==3.5.0",
207
+ "pluggy==1.0.0",
208
+ "poethepoet==0.11.0",
209
+ "poetry-core==1.4.0",
210
+ "poetry-plugin-export==1.3.0",
211
+ "poetry==1.3.2",
212
+ "prometheus-client==0.16.0",
213
+ "prompt-toolkit==3.0.38",
214
+ "protobuf==3.20.3",
215
+ "psutil==5.9.5",
216
+ "ptyprocess==0.7.0",
217
+ "pure-eval==0.2.2",
218
+ "py-cpuinfo==9.0.0",
219
+ "py3nvml==0.2.7",
220
+ "py==1.11.0",
221
+ "pyarrow==11.0.0",
222
+ "pyasn1-modules==0.3.0",
223
+ "pyasn1==0.5.0",
224
+ "pycparser==2.21",
225
+ "pydantic==1.10.7",
226
+ "pydot==1.4.2",
227
+ "pyflakes==2.4.0",
228
+ "pygit2==1.12.0",
229
+ "pygments==2.15.1",
230
+ "pygtrie==2.5.0",
231
+ "pyjwt==2.6.0",
232
+ "pyparsing==3.0.9",
233
+ "pyrsistent==0.19.3",
234
+ "pytest==6.2.5",
235
+ "python-dateutil==2.8.2",
236
+ "python-editor==1.0.4",
237
+ "python-json-logger==2.0.7",
238
+ "python-multipart==0.0.6",
239
+ "pytorch-lightning==2.0.1",
240
+ "pytz==2023.3",
241
+ "pyyaml==6.0",
242
+ "pyzmq==25.0.2",
243
+ "qtconsole==5.4.2",
244
+ "qtpy==2.3.1",
245
+ "querystring-parser==1.2.4",
246
+ "rapidfuzz==2.13.7",
247
+ "readchar==4.0.5",
248
+ "regex==2023.3.23",
249
+ "requests-oauthlib==1.3.1",
250
+ "requests-toolbelt==0.10.1",
251
+ "requests==2.29.0",
252
+ "responses==0.18.0",
253
+ "rfc3339-validator==0.1.4",
254
+ "rfc3986-validator==0.1.1",
255
+ "rich==13.3.5",
256
+ "rsa==4.9",
257
+ "ruamel.yaml.clib==0.2.7",
258
+ "ruamel.yaml==0.17.21",
259
+ "s3fs==2023.4.0",
260
+ "s3transfer==0.6.0",
261
+ "sacremoses==0.0.53",
262
+ "scikit-learn==1.2.2",
263
+ "scipy==1.10.1",
264
+ "scmrepo==1.0.2",
265
+ "secretstorage==3.3.3",
266
+ "send2trash==1.8.2",
267
+ "sentry-sdk==1.21.0",
268
+ "seqeval==1.2.2",
269
+ "setproctitle==1.3.2",
270
+ "setuptools==67.7.2",
271
+ "shellingham==1.5.0.post1",
272
+ "shortuuid==1.0.11",
273
+ "shtab==1.6.1",
274
+ "six==1.16.0",
275
+ "smmap==5.0.0",
276
+ "sniffio==1.3.0",
277
+ "soupsieve==2.4.1",
278
+ "sqlalchemy==2.0.11",
279
+ "sqlparse==0.4.4",
280
+ "sqltrie==0.3.1",
281
+ "srsly==2.4.6",
282
+ "stack-data==0.6.2",
283
+ "starlette==0.22.0",
284
+ "starsessions==1.3.0",
285
+ "sympy==1.11.1",
286
+ "tabulate==0.9.0",
287
+ "tensorboard-data-server==0.7.0",
288
+ "tensorboard-plugin-wit==1.8.1",
289
+ "tensorboard==2.12.2",
290
+ "terminado==0.17.1",
291
+ "tf2onnx==1.8.4",
292
+ "threadpoolctl==3.1.0",
293
+ "tinycss2==1.2.1",
294
+ "tokenizers==0.13.3",
295
+ "toml==0.10.2",
296
+ "tomli==1.2.3",
297
+ "tomlkit==0.11.8",
298
+ "torch==2.0.0",
299
+ "torchmetrics==0.11.4",
300
+ "tornado==6.3.1",
301
+ "tqdm==4.65.0",
302
+ "traitlets==5.9.0",
303
+ "transformers==4.28.1",
304
+ "triton==2.0.0",
305
+ "trove-classifiers==2023.1.20",
306
+ "typer==0.7.0",
307
+ "types-pyyaml==6.0.12.9",
308
+ "types-requests==2.26.1",
309
+ "types-setuptools==67.7.0.0",
310
+ "typing-extensions==4.5.0",
311
+ "tzdata==2023.3",
312
+ "uri-template==1.2.0",
313
+ "urllib3==1.26.15",
314
+ "uvicorn==0.21.1",
315
+ "vine==5.0.0",
316
+ "virtualenv==20.17.1",
317
+ "voluptuous==0.13.1",
318
+ "wandb==0.15.0",
319
+ "wcwidth==0.2.6",
320
+ "webcolors==1.13",
321
+ "webencodings==0.5.1",
322
+ "websocket-client==1.5.1",
323
+ "websockets==11.0.2",
324
+ "werkzeug==2.3.1",
325
+ "wheel==0.40.0",
326
+ "widgetsnbextension==4.0.7",
327
+ "wrapt==1.15.0",
328
+ "xmltodict==0.13.0",
329
+ "xxhash==3.2.0",
330
+ "yarl==1.9.2",
331
+ "zc.lockfile==3.0.post1",
332
+ "zipp==3.15.0"
333
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6022bffa030bf39ad8baeedc68e79b1129af909787fda758e4f7059b58967c0a
3
+ size 495500013
special_tokens_map.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<pad>",
6
+ "sep_token": "</s>",
7
+ "unk_token": "<unk>"
8
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [],
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "do_lowercase_and_remove_accent": false,
7
+ "id2lang": null,
8
+ "lang2id": null,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "HerbertTokenizer",
14
+ "unk_token": "<unk>"
15
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff