Alperens1 commited on
Commit
0491c59
1 Parent(s): 565cc23

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. tokenizer.json +4 -4
  3. tokenizer_config.json +6 -1
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
 
 
2
  library_name: transformers
3
  base_model: boun-tabi-LMG/TURNA
4
  datasets:
5
  - GGLab/GECTurk
6
  - mcemilg/GECTurk-generation
7
- language:
8
- - tr
9
  ---
10
 
11
  # Model Card for Model ID
 
1
  ---
2
+ language:
3
+ - tr
4
  library_name: transformers
5
  base_model: boun-tabi-LMG/TURNA
6
  datasets:
7
  - GGLab/GECTurk
8
  - mcemilg/GECTurk-generation
 
 
9
  ---
10
 
11
  # Model Card for Model ID
tokenizer.json CHANGED
@@ -964,15 +964,15 @@
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
- "add_prefix_space": true,
968
- "prepend_scheme": "always"
969
  },
970
  "post_processor": null,
971
  "decoder": {
972
  "type": "Metaspace",
973
  "replacement": "▁",
974
- "add_prefix_space": true,
975
- "prepend_scheme": "always"
976
  },
977
  "model": {
978
  "type": "Unigram",
 
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
+ "prepend_scheme": "always",
968
+ "split": true
969
  },
970
  "post_processor": null,
971
  "decoder": {
972
  "type": "Metaspace",
973
  "replacement": "▁",
974
+ "prepend_scheme": "always",
975
+ "split": true
976
  },
977
  "model": {
978
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<PAD>",
@@ -939,10 +941,13 @@
939
  "clean_up_tokenization_spaces": false,
940
  "eos_token": "<EOS>",
941
  "extra_ids": 100,
 
942
  "model_max_length": 1024,
943
  "pad_token": "<PAD>",
944
- "padding_side": "right",
 
945
  "tokenizer_class": "PreTrainedTokenizerFast",
946
  "truncation_side": "right",
 
947
  "unk_token": "<UNK>"
948
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<PAD>",
 
941
  "clean_up_tokenization_spaces": false,
942
  "eos_token": "<EOS>",
943
  "extra_ids": 100,
944
+ "max_length": 45,
945
  "model_max_length": 1024,
946
  "pad_token": "<PAD>",
947
+ "padding_side": "left",
948
+ "stride": 0,
949
  "tokenizer_class": "PreTrainedTokenizerFast",
950
  "truncation_side": "right",
951
+ "truncation_strategy": "longest_first",
952
  "unk_token": "<UNK>"
953
  }