m4lw4r3exe commited on
Commit
7919dff
1 Parent(s): 1272dca

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +3 -0
  2. tokenizer.json +173 -0
  3. tokenizer_config.json +4 -0
special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "pad_token": "[PAD]"
3
+ }
tokenizer.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 2048,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 1,
14
+ "pad_type_id": 0,
15
+ "pad_token": "[PAD]"
16
+ },
17
+ "added_tokens": [
18
+ {
19
+ "id": 0,
20
+ "content": "[UNK]",
21
+ "single_word": false,
22
+ "lstrip": false,
23
+ "rstrip": false,
24
+ "normalized": false,
25
+ "special": true
26
+ },
27
+ {
28
+ "id": 1,
29
+ "content": "[PAD]",
30
+ "single_word": false,
31
+ "lstrip": false,
32
+ "rstrip": false,
33
+ "normalized": false,
34
+ "special": true
35
+ },
36
+ {
37
+ "id": 2,
38
+ "content": "[MASK]",
39
+ "single_word": false,
40
+ "lstrip": false,
41
+ "rstrip": false,
42
+ "normalized": false,
43
+ "special": true
44
+ }
45
+ ],
46
+ "normalizer": null,
47
+ "pre_tokenizer": {
48
+ "type": "WhitespaceSplit"
49
+ },
50
+ "post_processor": null,
51
+ "decoder": null,
52
+ "model": {
53
+ "type": "WordLevel",
54
+ "vocab": {
55
+ "[UNK]": 0,
56
+ "[PAD]": 1,
57
+ "[MASK]": 2,
58
+ "TIME_DELTA=1": 3,
59
+ "TIME_DELTA=2": 4,
60
+ "NOTE_OFF=54": 5,
61
+ "NOTE_ON=54": 6,
62
+ "NOTE_OFF=51": 7,
63
+ "NOTE_ON=51": 8,
64
+ "BAR_END": 9,
65
+ "BAR_START": 10,
66
+ "NOTE_OFF=60": 11,
67
+ "NOTE_ON=60": 12,
68
+ "NOTE_OFF=82": 13,
69
+ "NOTE_ON=82": 14,
70
+ "NOTE_OFF=36": 15,
71
+ "NOTE_ON=36": 16,
72
+ "TIME_DELTA=3": 17,
73
+ "NOTE_OFF=63": 18,
74
+ "NOTE_ON=63": 19,
75
+ "NOTE_OFF=35": 20,
76
+ "NOTE_ON=35": 21,
77
+ "NOTE_OFF=58": 22,
78
+ "NOTE_ON=58": 23,
79
+ "NOTE_OFF=65": 24,
80
+ "NOTE_ON=65": 25,
81
+ "NOTE_OFF=55": 26,
82
+ "NOTE_ON=55": 27,
83
+ "NOTE_OFF=38": 28,
84
+ "NOTE_ON=38": 29,
85
+ "TIME_DELTA=4": 30,
86
+ "NOTE_OFF=62": 31,
87
+ "NOTE_ON=62": 32,
88
+ "NOTE_OFF=56": 33,
89
+ "NOTE_ON=56": 34,
90
+ "NOTE_OFF=40": 35,
91
+ "NOTE_ON=40": 36,
92
+ "NOTE_OFF=67": 37,
93
+ "NOTE_ON=67": 38,
94
+ "NOTE_OFF=34": 39,
95
+ "NOTE_ON=34": 40,
96
+ "NOTE_OFF=41": 41,
97
+ "NOTE_ON=41": 42,
98
+ "TRACK_END": 43,
99
+ "TRACK_START": 44,
100
+ "NOTE_OFF=39": 45,
101
+ "NOTE_ON=39": 46,
102
+ "NOTE_OFF=72": 47,
103
+ "NOTE_ON=72": 48,
104
+ "NOTE_OFF=48": 49,
105
+ "NOTE_ON=48": 50,
106
+ "TIME_DELTA=15": 51,
107
+ "NOTE_OFF=70": 52,
108
+ "NOTE_ON=70": 53,
109
+ "DENSITY=3": 54,
110
+ "TIME_DELTA=5": 55,
111
+ "NOTE_OFF=75": 56,
112
+ "NOTE_ON=75": 57,
113
+ "INST=4": 58,
114
+ "DENSITY=2": 59,
115
+ "DENSITY=0": 60,
116
+ "NOTE_OFF=76": 61,
117
+ "NOTE_ON=76": 62,
118
+ "INST=10": 63,
119
+ "TIME_DELTA=16": 64,
120
+ "NOTE_OFF=46": 65,
121
+ "NOTE_ON=46": 66,
122
+ "NOTE_OFF=66": 67,
123
+ "NOTE_ON=66": 68,
124
+ "PIECE_START": 69,
125
+ "INST=DRUMS": 70,
126
+ "INST=6": 71,
127
+ "DENSITY=1": 72,
128
+ "TIME_DELTA=6": 73,
129
+ "NOTE_OFF=74": 74,
130
+ "NOTE_ON=74": 75,
131
+ "NOTE_OFF=50": 76,
132
+ "NOTE_ON=50": 77,
133
+ "INST=1": 78,
134
+ "NOTE_OFF=57": 79,
135
+ "NOTE_ON=57": 80,
136
+ "INST=8": 81,
137
+ "NOTE_OFF=78": 82,
138
+ "NOTE_ON=78": 83,
139
+ "NOTE_OFF=31": 84,
140
+ "NOTE_ON=31": 85,
141
+ "TIME_DELTA=8": 86,
142
+ "INST=3": 87,
143
+ "TIME_DELTA=7": 88,
144
+ "INST=14": 89,
145
+ "NOTE_OFF=53": 90,
146
+ "NOTE_OFF=79": 91,
147
+ "NOTE_ON=53": 92,
148
+ "NOTE_ON=79": 93,
149
+ "INST=5": 94,
150
+ "NOTE_OFF=77": 95,
151
+ "NOTE_ON=77": 96,
152
+ "TIME_DELTA=10": 97,
153
+ "NOTE_OFF=86": 98,
154
+ "NOTE_ON=86": 99,
155
+ "NOTE_OFF=29": 100,
156
+ "NOTE_OFF=42": 101,
157
+ "NOTE_OFF=84": 102,
158
+ "NOTE_ON=29": 103,
159
+ "NOTE_ON=42": 104,
160
+ "NOTE_ON=84": 105,
161
+ "NOTE_OFF=87": 106,
162
+ "NOTE_ON=87": 107,
163
+ "TIME_DELTA=11": 108,
164
+ "NOTE_OFF=33": 109,
165
+ "NOTE_OFF=69": 110,
166
+ "NOTE_OFF=81": 111,
167
+ "NOTE_ON=33": 112,
168
+ "NOTE_ON=69": 113,
169
+ "NOTE_ON=81": 114
170
+ },
171
+ "unk_token": "[UNK]"
172
+ }
173
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "tokenizer_class": "PreTrainedTokenizerFast"
4
+ }