Upload tokenizer
Browse files- README.md +2 -2
- tokenizer.json +4 -4
- tokenizer_config.json +6 -1
README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
---
|
|
|
|
|
2 |
library_name: transformers
|
3 |
base_model: boun-tabi-LMG/TURNA
|
4 |
datasets:
|
5 |
- GGLab/GECTurk
|
6 |
- mcemilg/GECTurk-generation
|
7 |
-
language:
|
8 |
-
- tr
|
9 |
---
|
10 |
|
11 |
# Model Card for Model ID
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- tr
|
4 |
library_name: transformers
|
5 |
base_model: boun-tabi-LMG/TURNA
|
6 |
datasets:
|
7 |
- GGLab/GECTurk
|
8 |
- mcemilg/GECTurk-generation
|
|
|
|
|
9 |
---
|
10 |
|
11 |
# Model Card for Model ID
|
tokenizer.json
CHANGED
@@ -964,15 +964,15 @@
|
|
964 |
"pre_tokenizer": {
|
965 |
"type": "Metaspace",
|
966 |
"replacement": "▁",
|
967 |
-
"
|
968 |
-
"
|
969 |
},
|
970 |
"post_processor": null,
|
971 |
"decoder": {
|
972 |
"type": "Metaspace",
|
973 |
"replacement": "▁",
|
974 |
-
"
|
975 |
-
"
|
976 |
},
|
977 |
"model": {
|
978 |
"type": "Unigram",
|
|
|
964 |
"pre_tokenizer": {
|
965 |
"type": "Metaspace",
|
966 |
"replacement": "▁",
|
967 |
+
"prepend_scheme": "always",
|
968 |
+
"split": true
|
969 |
},
|
970 |
"post_processor": null,
|
971 |
"decoder": {
|
972 |
"type": "Metaspace",
|
973 |
"replacement": "▁",
|
974 |
+
"prepend_scheme": "always",
|
975 |
+
"split": true
|
976 |
},
|
977 |
"model": {
|
978 |
"type": "Unigram",
|
tokenizer_config.json
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
{
|
|
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"0": {
|
4 |
"content": "<PAD>",
|
@@ -939,10 +941,13 @@
|
|
939 |
"clean_up_tokenization_spaces": false,
|
940 |
"eos_token": "<EOS>",
|
941 |
"extra_ids": 100,
|
|
|
942 |
"model_max_length": 1024,
|
943 |
"pad_token": "<PAD>",
|
944 |
-
"padding_side": "
|
|
|
945 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
946 |
"truncation_side": "right",
|
|
|
947 |
"unk_token": "<UNK>"
|
948 |
}
|
|
|
1 |
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": true,
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
6 |
"content": "<PAD>",
|
|
|
941 |
"clean_up_tokenization_spaces": false,
|
942 |
"eos_token": "<EOS>",
|
943 |
"extra_ids": 100,
|
944 |
+
"max_length": 45,
|
945 |
"model_max_length": 1024,
|
946 |
"pad_token": "<PAD>",
|
947 |
+
"padding_side": "left",
|
948 |
+
"stride": 0,
|
949 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
950 |
"truncation_side": "right",
|
951 |
+
"truncation_strategy": "longest_first",
|
952 |
"unk_token": "<UNK>"
|
953 |
}
|