radoslavralev commited on
Commit
a30e6b5
·
verified ·
1 Parent(s): 7fc2211

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. tokenizer.json +13 -14
  2. tokenizer_config.json +0 -0
tokenizer.json CHANGED
@@ -1,14 +1,13 @@
1
  {
2
  "version": "1.0",
3
  "truncation": {
 
4
  "max_length": 128,
5
  "strategy": "LongestFirst",
6
  "stride": 0
7
  },
8
  "padding": {
9
- "strategy": {
10
- "Fixed": 128
11
- },
12
  "direction": "Right",
13
  "pad_to_multiple_of": null,
14
  "pad_id": 0,
@@ -18,48 +17,48 @@
18
  "added_tokens": [
19
  {
20
  "id": 0,
21
- "special": true,
22
  "content": "[PAD]",
23
  "single_word": false,
24
  "lstrip": false,
25
  "rstrip": false,
26
- "normalized": false
 
27
  },
28
  {
29
  "id": 100,
30
- "special": true,
31
  "content": "[UNK]",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
35
- "normalized": false
 
36
  },
37
  {
38
  "id": 101,
39
- "special": true,
40
  "content": "[CLS]",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
44
- "normalized": false
 
45
  },
46
  {
47
  "id": 102,
48
- "special": true,
49
  "content": "[SEP]",
50
  "single_word": false,
51
  "lstrip": false,
52
  "rstrip": false,
53
- "normalized": false
 
54
  },
55
  {
56
  "id": 103,
57
- "special": true,
58
  "content": "[MASK]",
59
  "single_word": false,
60
  "lstrip": false,
61
  "rstrip": false,
62
- "normalized": false
 
63
  }
64
  ],
65
  "normalizer": {
@@ -30682,4 +30681,4 @@
30682
  "##~": 30521
30683
  }
30684
  }
30685
- }
 
1
  {
2
  "version": "1.0",
3
  "truncation": {
4
+ "direction": "Right",
5
  "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
+ "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
  "pad_id": 0,
 
17
  "added_tokens": [
18
  {
19
  "id": 0,
 
20
  "content": "[PAD]",
21
  "single_word": false,
22
  "lstrip": false,
23
  "rstrip": false,
24
+ "normalized": false,
25
+ "special": true
26
  },
27
  {
28
  "id": 100,
 
29
  "content": "[UNK]",
30
  "single_word": false,
31
  "lstrip": false,
32
  "rstrip": false,
33
+ "normalized": false,
34
+ "special": true
35
  },
36
  {
37
  "id": 101,
 
38
  "content": "[CLS]",
39
  "single_word": false,
40
  "lstrip": false,
41
  "rstrip": false,
42
+ "normalized": false,
43
+ "special": true
44
  },
45
  {
46
  "id": 102,
 
47
  "content": "[SEP]",
48
  "single_word": false,
49
  "lstrip": false,
50
  "rstrip": false,
51
+ "normalized": false,
52
+ "special": true
53
  },
54
  {
55
  "id": 103,
 
56
  "content": "[MASK]",
57
  "single_word": false,
58
  "lstrip": false,
59
  "rstrip": false,
60
+ "normalized": false,
61
+ "special": true
62
  }
63
  ],
64
  "normalizer": {
 
30681
  "##~": 30521
30682
  }
30683
  }
30684
+ }
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff