duraad commited on
Commit
2d57902
1 Parent(s): 1c0d23c

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. special_tokens_map.json +42 -6
  3. tokenizer.json +2 -2
  4. tokenizer_config.json +7 -0
README.md CHANGED
@@ -1,5 +1,4 @@
1
  ---
2
- base_model: duraad/nep-spell-mbart-new
3
  tags:
4
  - generated_from_trainer
5
  metrics:
@@ -7,6 +6,7 @@ metrics:
7
  - precision
8
  - recall
9
  - f1
 
10
  model-index:
11
  - name: nep-spell-mbart-new
12
  results: []
 
1
  ---
 
2
  tags:
3
  - generated_from_trainer
4
  metrics:
 
6
  - precision
7
  - recall
8
  - f1
9
+ base_model: duraad/nep-spell-mbart-new
10
  model-index:
11
  - name: nep-spell-mbart-new
12
  results: []
special_tokens_map.json CHANGED
@@ -53,9 +53,27 @@
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
- "bos_token": "<s>",
57
- "cls_token": "<s>",
58
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "mask_token": {
60
  "content": "<mask>",
61
  "lstrip": true,
@@ -63,7 +81,25 @@
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
- "pad_token": "<pad>",
67
- "sep_token": "</s>",
68
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
 
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
+ "bos_token": {
57
+ "content": "<s>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ },
63
+ "cls_token": {
64
+ "content": "<s>",
65
+ "lstrip": false,
66
+ "normalized": false,
67
+ "rstrip": false,
68
+ "single_word": false
69
+ },
70
+ "eos_token": {
71
+ "content": "</s>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false
76
+ },
77
  "mask_token": {
78
  "content": "<mask>",
79
  "lstrip": true,
 
81
  "rstrip": false,
82
  "single_word": false
83
  },
84
+ "pad_token": {
85
+ "content": "<pad>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false
90
+ },
91
+ "sep_token": {
92
+ "content": "</s>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false
97
+ },
98
+ "unk_token": {
99
+ "content": "<unk>",
100
+ "lstrip": false,
101
+ "normalized": false,
102
+ "rstrip": false,
103
+ "single_word": false
104
+ }
105
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0742c8f849b4108e1b2d0e5ca109391cf86da0f8f9462d85ad2f9f7d97bf4cbf
3
- size 17110208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17faaf4d244b98f7b82cd97cc65088c2815c9375de600529f7f7634da093ab6
3
+ size 17094857
tokenizer_config.json CHANGED
@@ -516,12 +516,19 @@
516
  "cls_token": "<s>",
517
  "eos_token": "</s>",
518
  "mask_token": "<mask>",
 
519
  "model_max_length": 1024,
 
520
  "pad_token": "<pad>",
 
 
521
  "sep_token": "</s>",
522
  "sp_model_kwargs": {},
523
  "src_lang": "ne_NP",
 
524
  "tgt_lang": "ne_NP",
525
  "tokenizer_class": "MBart50Tokenizer",
 
 
526
  "unk_token": "<unk>"
527
  }
 
516
  "cls_token": "<s>",
517
  "eos_token": "</s>",
518
  "mask_token": "<mask>",
519
+ "max_length": 512,
520
  "model_max_length": 1024,
521
+ "pad_to_multiple_of": null,
522
  "pad_token": "<pad>",
523
+ "pad_token_type_id": 0,
524
+ "padding_side": "right",
525
  "sep_token": "</s>",
526
  "sp_model_kwargs": {},
527
  "src_lang": "ne_NP",
528
+ "stride": 0,
529
  "tgt_lang": "ne_NP",
530
  "tokenizer_class": "MBart50Tokenizer",
531
+ "truncation_side": "right",
532
+ "truncation_strategy": "longest_first",
533
  "unk_token": "<unk>"
534
  }