Xenova HF staff commited on
Commit
9117913
1 Parent(s): 1301423

Upload tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +98 -0
tokenizer.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [],
6
+ "normalizer": null,
7
+ "pre_tokenizer": {
8
+ "type": "BertPreTokenizer"
9
+ },
10
+ "post_processor": {
11
+ "type": "TemplateProcessing",
12
+ "single": [
13
+ {
14
+ "SpecialToken": {
15
+ "id": "<cls>",
16
+ "type_id": 0
17
+ }
18
+ },
19
+ {
20
+ "Sequence": {
21
+ "id": "A",
22
+ "type_id": 0
23
+ }
24
+ }
25
+ ],
26
+ "pair": [
27
+ {
28
+ "Sequence": {
29
+ "id": "A",
30
+ "type_id": 0
31
+ }
32
+ },
33
+ {
34
+ "Sequence": {
35
+ "id": "B",
36
+ "type_id": 1
37
+ }
38
+ }
39
+ ],
40
+ "special_tokens": {
41
+ "<cls>": {
42
+ "id": "<cls>",
43
+ "ids": [
44
+ 0
45
+ ],
46
+ "tokens": [
47
+ "<cls>"
48
+ ]
49
+ }
50
+ }
51
+ },
52
+ "decoder": {
53
+ "type": "WordPiece",
54
+ "prefix": "",
55
+ "cleanup": true
56
+ },
57
+ "model": {
58
+ "type": "WordPiece",
59
+ "unk_token": "<unk>",
60
+ "continuing_subword_prefix": "",
61
+ "max_input_chars_per_word": 10000000000,
62
+ "vocab": {
63
+ "<cls>": 0,
64
+ "<pad>": 1,
65
+ "<eos>": 2,
66
+ "<unk>": 3,
67
+ "L": 4,
68
+ "A": 5,
69
+ "G": 6,
70
+ "V": 7,
71
+ "S": 8,
72
+ "E": 9,
73
+ "R": 10,
74
+ "T": 11,
75
+ "I": 12,
76
+ "D": 13,
77
+ "P": 14,
78
+ "K": 15,
79
+ "Q": 16,
80
+ "N": 17,
81
+ "F": 18,
82
+ "Y": 19,
83
+ "M": 20,
84
+ "H": 21,
85
+ "W": 22,
86
+ "C": 23,
87
+ "X": 24,
88
+ "B": 25,
89
+ "U": 26,
90
+ "Z": 27,
91
+ "O": 28,
92
+ ".": 29,
93
+ "-": 30,
94
+ "<null_1>": 31,
95
+ "<mask>": 32
96
+ }
97
+ }
98
+ }