gsaltintas commited on
Commit
ed489dc
·
verified ·
1 Parent(s): 139a559

Upload tokenizer file vocab.json - Upload model files

Browse files
Files changed (1) hide show
  1. tokenizer.json +17 -125
tokenizer.json CHANGED
@@ -1,127 +1,19 @@
1
  {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<unk>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "<s>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 2,
26
- "content": "</s>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- },
33
- {
34
- "id": 3,
35
- "content": "<pad>",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 4,
44
- "content": "mod",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 5,
53
- "content": "=",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 6,
62
- "content": " ",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
- }
69
- ],
70
- "normalizer": null,
71
- "pre_tokenizer": {
72
- "type": "Sequence",
73
- "pretokenizers": [
74
- {
75
- "type": "Split",
76
- "pattern": {
77
- "Regex": "\\p{N}"
78
- },
79
- "behavior": "Isolated",
80
- "invert": false
81
- },
82
- {
83
- "type": "ByteLevel",
84
- "add_prefix_space": false,
85
- "trim_offsets": true,
86
- "use_regex": true
87
- }
88
- ]
89
- },
90
- "post_processor": null,
91
- "decoder": {
92
- "type": "ByteLevel",
93
- "add_prefix_space": true,
94
- "trim_offsets": true,
95
- "use_regex": true
96
- },
97
- "model": {
98
- "type": "BPE",
99
- "dropout": null,
100
- "unk_token": "<unk>",
101
- "continuing_subword_prefix": null,
102
- "end_of_word_suffix": null,
103
- "fuse_unk": false,
104
- "byte_fallback": false,
105
- "ignore_merges": false,
106
- "vocab": {
107
- "<unk>": 0,
108
- "<s>": 1,
109
- "</s>": 2,
110
- "<pad>": 3,
111
- "mod": 4,
112
- "=": 5,
113
- " ": 6,
114
- "0": 7,
115
- "1": 8,
116
- "2": 9,
117
- "3": 10,
118
- "4": 11,
119
- "5": 12,
120
- "6": 13,
121
- "7": 14,
122
- "8": 15,
123
- "9": 16
124
- },
125
- "merges": []
126
- }
127
  }
 
1
  {
2
+ "<unk>": 0,
3
+ "mod": 4,
4
+ "4": 11,
5
+ "=": 5,
6
+ "1": 8,
7
+ "<pad>": 3,
8
+ "7": 14,
9
+ "5": 12,
10
+ "3": 10,
11
+ "9": 16,
12
+ "<s>": 1,
13
+ "2": 9,
14
+ "0": 7,
15
+ "8": 15,
16
+ "6": 13,
17
+ " ": 6,
18
+ "</s>": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }