End of training
Browse files- .gitignore +1 -0
- added_tokens.json +3 -0
- config.json +39 -0
- generation_config.json +6 -0
- merges.txt +251 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +6 -0
- tokenizer.json +600 -0
- tokenizer_config.json +9 -0
- training_args.bin +3 -0
- vocab.json +1 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|endoftext|>": 270
|
3 |
+
}
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "gpt2",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 270,
|
9 |
+
"embd_pdrop": 0.1,
|
10 |
+
"eos_token_id": 270,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 20,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 12,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"reorder_and_upcast_attn": false,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
23 |
+
"scale_attn_weights": true,
|
24 |
+
"summary_activation": null,
|
25 |
+
"summary_first_dropout": 0.1,
|
26 |
+
"summary_proj_to_labels": true,
|
27 |
+
"summary_type": "cls_index",
|
28 |
+
"summary_use_proj": true,
|
29 |
+
"task_specific_params": {
|
30 |
+
"text-generation": {
|
31 |
+
"do_sample": true,
|
32 |
+
"max_length": 50
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.30.2",
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 271
|
39 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 270,
|
4 |
+
"eos_token_id": 270,
|
5 |
+
"transformers_version": "4.30.2"
|
6 |
+
}
|
merges.txt
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#version: 0.2
|
2 |
+
L L
|
3 |
+
H H
|
4 |
+
A A
|
5 |
+
B B
|
6 |
+
F F
|
7 |
+
K K
|
8 |
+
P P
|
9 |
+
G G
|
10 |
+
N N
|
11 |
+
O O
|
12 |
+
M M
|
13 |
+
C C
|
14 |
+
E E
|
15 |
+
I I
|
16 |
+
J J
|
17 |
+
D D
|
18 |
+
HH HH
|
19 |
+
LL LL
|
20 |
+
AA AA
|
21 |
+
FF FF
|
22 |
+
BB BB
|
23 |
+
KK KK
|
24 |
+
PP PP
|
25 |
+
NN NN
|
26 |
+
OO OO
|
27 |
+
MM MM
|
28 |
+
GG GG
|
29 |
+
EE EE
|
30 |
+
CC CC
|
31 |
+
II II
|
32 |
+
JJ JJ
|
33 |
+
DD DD
|
34 |
+
LL L
|
35 |
+
BB B
|
36 |
+
HH H
|
37 |
+
AA A
|
38 |
+
FF F
|
39 |
+
GG G
|
40 |
+
KK K
|
41 |
+
CC C
|
42 |
+
MM M
|
43 |
+
NN N
|
44 |
+
PP P
|
45 |
+
OO O
|
46 |
+
EE E
|
47 |
+
II I
|
48 |
+
JJ J
|
49 |
+
DD D
|
50 |
+
HHHH HHHH
|
51 |
+
FFFF FFFF
|
52 |
+
AAAA AAAA
|
53 |
+
BBBB B
|
54 |
+
LLLL LLLL
|
55 |
+
KKKK KKKK
|
56 |
+
NNNN NNNN
|
57 |
+
PPPP P
|
58 |
+
GGGG G
|
59 |
+
CCCC C
|
60 |
+
MMMM MMMM
|
61 |
+
LLLL L
|
62 |
+
OOOO O
|
63 |
+
HHHH H
|
64 |
+
EEEE E
|
65 |
+
AAAA A
|
66 |
+
IIII I
|
67 |
+
DDDD D
|
68 |
+
FFFF F
|
69 |
+
JJJJ J
|
70 |
+
KKKK K
|
71 |
+
BBBB BBBB
|
72 |
+
NNNN N
|
73 |
+
MMMM M
|
74 |
+
OOOO OOOO
|
75 |
+
PPPP PPPP
|
76 |
+
EEEE EEEE
|
77 |
+
JJJJ JJJJ
|
78 |
+
IIII IIII
|
79 |
+
GGGG GGGG
|
80 |
+
CCCC CCCC
|
81 |
+
LLLL LL
|
82 |
+
DDDD DDDD
|
83 |
+
AAAA AA
|
84 |
+
HHHH HH
|
85 |
+
BBBB BB
|
86 |
+
FFFF FF
|
87 |
+
KKKK KK
|
88 |
+
PPPP PP
|
89 |
+
GGGG GG
|
90 |
+
NNNN NN
|
91 |
+
EEEE EE
|
92 |
+
DDDD DD
|
93 |
+
OOOO OO
|
94 |
+
CCCC CC
|
95 |
+
MMMM MM
|
96 |
+
JJJJ JJ
|
97 |
+
IIII II
|
98 |
+
LLLL LLL
|
99 |
+
AAAA AAA
|
100 |
+
HHHH HHH
|
101 |
+
BBBB BBB
|
102 |
+
FFFF FFF
|
103 |
+
KKKK KKK
|
104 |
+
PPPP PPP
|
105 |
+
MMMM MMM
|
106 |
+
OOOO OOO
|
107 |
+
GGGG GGG
|
108 |
+
CCCC CCC
|
109 |
+
NNNN NNN
|
110 |
+
EEEE EEE
|
111 |
+
IIII III
|
112 |
+
JJJJ JJJ
|
113 |
+
DDDD DDD
|
114 |
+
FFFFFFFF F
|
115 |
+
AAAAAAAA A
|
116 |
+
HHHHHHHH H
|
117 |
+
BBBB BBBBB
|
118 |
+
LLLLLLLL L
|
119 |
+
KKKKKKKK K
|
120 |
+
OOOO OOOOO
|
121 |
+
NNNNNNNN N
|
122 |
+
PPPP PPPPP
|
123 |
+
GGGG GGGGG
|
124 |
+
CCCC CCCCC
|
125 |
+
MMMMMMMM M
|
126 |
+
IIII IIIII
|
127 |
+
DDDD DDDDD
|
128 |
+
EEEE EEEEE
|
129 |
+
JJJJ JJJJJ
|
130 |
+
HHHHHHHH HH
|
131 |
+
FFFFFFFF FF
|
132 |
+
AAAAAAAA AA
|
133 |
+
LLLLLLLL LL
|
134 |
+
KKKKKKKK KK
|
135 |
+
BBBBBBBB BB
|
136 |
+
PPPPPPPP PP
|
137 |
+
OOOOOOOO OO
|
138 |
+
HHHHHHHH HHH
|
139 |
+
NNNNNNNN NN
|
140 |
+
EEEEEEEE EE
|
141 |
+
MMMMMMMM MM
|
142 |
+
FFFFFFFF FFF
|
143 |
+
JJJJJJJJ JJ
|
144 |
+
DDDDDDDD DD
|
145 |
+
CCCCCCCC CC
|
146 |
+
IIIIIIII II
|
147 |
+
GGGGGGGG GG
|
148 |
+
KKKKKKKK KKK
|
149 |
+
AAAAAAAA AAA
|
150 |
+
LLLLLLLL LLL
|
151 |
+
BBBBBBBB BBB
|
152 |
+
FFFFFFFF FFFFFFFF
|
153 |
+
NNNNNNNN NNN
|
154 |
+
MMMMMMMM MMM
|
155 |
+
OOOOOOOO OOO
|
156 |
+
PPPPPPPP PPP
|
157 |
+
HHHHHHHH HHHHHHHH
|
158 |
+
EEEEEEEE EEE
|
159 |
+
JJJJJJJJ JJJ
|
160 |
+
IIIIIIII III
|
161 |
+
AAAAAAAA AAAAAAAA
|
162 |
+
CCCCCCCC CCC
|
163 |
+
GGGGGGGG GGG
|
164 |
+
LLLLLLLL LLLLLLLL
|
165 |
+
DDDDDDDD DDD
|
166 |
+
HHHHHHHH HHHH
|
167 |
+
KKKKKKKK KKKKKKKK
|
168 |
+
AAAAAAAA AAAA
|
169 |
+
LLL MMM
|
170 |
+
MMMMMMMM MMMMMMMM
|
171 |
+
NNNNNNNN NNNNNNNN
|
172 |
+
BBBBBBBB BBBBBBBB
|
173 |
+
LLL HHH
|
174 |
+
FFFFFFFF FFFF
|
175 |
+
JJJJJJJJ JJJJJJJJ
|
176 |
+
LLL CCC
|
177 |
+
LLLLLLLL LLLL
|
178 |
+
LLL BBB
|
179 |
+
KKKKKKKK KKKK
|
180 |
+
IIIIIIII IIII
|
181 |
+
PPPPPPPP PPPP
|
182 |
+
BBB HHH
|
183 |
+
OOOOOOOO OOOOOOOO
|
184 |
+
BBBBBBBB BBBB
|
185 |
+
EEEEEEEE EEEE
|
186 |
+
AAA OOO
|
187 |
+
NNNNNNNN NNNN
|
188 |
+
DDDDDDDD DDDD
|
189 |
+
MMMMMMMM MMMM
|
190 |
+
CCCCCCCC CCCC
|
191 |
+
MMM CCC
|
192 |
+
GGGGGGGG GGGG
|
193 |
+
BBB LLL
|
194 |
+
FFFFFFFF FFFFF
|
195 |
+
BBB CCC
|
196 |
+
BBB MMM
|
197 |
+
DDD KKK
|
198 |
+
BBB GGG
|
199 |
+
LLL GGG
|
200 |
+
OOOOOOOO OOOO
|
201 |
+
HHH CCC
|
202 |
+
FFF KKK
|
203 |
+
HHH LLL
|
204 |
+
AAA KKK
|
205 |
+
EEE GGG
|
206 |
+
MMM LLL
|
207 |
+
BBB FFF
|
208 |
+
AAA PPP
|
209 |
+
AAAAAAAA AAAAA
|
210 |
+
HHHHHHHH HHHHH
|
211 |
+
AAA HHH
|
212 |
+
JJJJJJJJ JJJJ
|
213 |
+
PPPPPPPP PPPPPPPP
|
214 |
+
AAA FFF
|
215 |
+
LLL KKK
|
216 |
+
AAA GGG
|
217 |
+
EEE KKK
|
218 |
+
PPP FFF
|
219 |
+
III CCC
|
220 |
+
EEEEEEEE EEEEEEEE
|
221 |
+
LLLL CCC
|
222 |
+
KKKKKKKK KKKKK
|
223 |
+
NNN HHH
|
224 |
+
NNN GGG
|
225 |
+
LLLLLLLL LLLLL
|
226 |
+
MMMMMMMM MMMMM
|
227 |
+
BBB KKK
|
228 |
+
AAA JJJ
|
229 |
+
BBBBBBBB BBBBB
|
230 |
+
BBB PPP
|
231 |
+
HHH JJJ
|
232 |
+
NNN KKK
|
233 |
+
III GGG
|
234 |
+
LLLL HHHH
|
235 |
+
DDD GGG
|
236 |
+
BBB EEE
|
237 |
+
PPP CCC
|
238 |
+
LLLL HHH
|
239 |
+
III FFF
|
240 |
+
BBB OOO
|
241 |
+
BBB JJJ
|
242 |
+
LLLL CCCC
|
243 |
+
GGG JJJ
|
244 |
+
GGGGGGGG GGGGGGGG
|
245 |
+
LLL OOO
|
246 |
+
NNN OOO
|
247 |
+
AAA CCC
|
248 |
+
LLLL BBBB
|
249 |
+
DDD CCC
|
250 |
+
MMM HHH
|
251 |
+
IIIIIIII IIIIIIII
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d4bb48c5ddd03422653feb3416689f32d5d19597e7f57863f02c9595c4a66f6
|
3 |
+
size 344250205
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"pad_token": "<|endoftext|>",
|
5 |
+
"unk_token": "<|endoftext|>"
|
6 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,600 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": "1.0",
|
3 |
+
"truncation": {
|
4 |
+
"direction": "Right",
|
5 |
+
"max_length": 20,
|
6 |
+
"strategy": "LongestFirst",
|
7 |
+
"stride": 0
|
8 |
+
},
|
9 |
+
"padding": null,
|
10 |
+
"added_tokens": [
|
11 |
+
{
|
12 |
+
"id": 0,
|
13 |
+
"content": "<[|endoftext|>]",
|
14 |
+
"single_word": false,
|
15 |
+
"lstrip": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"id": 1,
|
22 |
+
"content": "...",
|
23 |
+
"single_word": false,
|
24 |
+
"lstrip": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"id": 2,
|
31 |
+
"content": "\n",
|
32 |
+
"single_word": false,
|
33 |
+
"lstrip": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"special": true
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"id": 270,
|
40 |
+
"content": "<|endoftext|>",
|
41 |
+
"single_word": false,
|
42 |
+
"lstrip": false,
|
43 |
+
"rstrip": false,
|
44 |
+
"normalized": false,
|
45 |
+
"special": true
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"normalizer": null,
|
49 |
+
"pre_tokenizer": {
|
50 |
+
"type": "ByteLevel",
|
51 |
+
"add_prefix_space": false,
|
52 |
+
"trim_offsets": true,
|
53 |
+
"use_regex": true
|
54 |
+
},
|
55 |
+
"post_processor": {
|
56 |
+
"type": "ByteLevel",
|
57 |
+
"add_prefix_space": true,
|
58 |
+
"trim_offsets": false,
|
59 |
+
"use_regex": true
|
60 |
+
},
|
61 |
+
"decoder": {
|
62 |
+
"type": "ByteLevel",
|
63 |
+
"add_prefix_space": true,
|
64 |
+
"trim_offsets": true,
|
65 |
+
"use_regex": true
|
66 |
+
},
|
67 |
+
"model": {
|
68 |
+
"type": "BPE",
|
69 |
+
"dropout": null,
|
70 |
+
"unk_token": null,
|
71 |
+
"continuing_subword_prefix": null,
|
72 |
+
"end_of_word_suffix": null,
|
73 |
+
"fuse_unk": false,
|
74 |
+
"byte_fallback": false,
|
75 |
+
"vocab": {
|
76 |
+
"<[|endoftext|>]": 0,
|
77 |
+
"...": 1,
|
78 |
+
"\n": 2,
|
79 |
+
"A": 3,
|
80 |
+
"B": 4,
|
81 |
+
"C": 5,
|
82 |
+
"D": 6,
|
83 |
+
"E": 7,
|
84 |
+
"F": 8,
|
85 |
+
"G": 9,
|
86 |
+
"H": 10,
|
87 |
+
"I": 11,
|
88 |
+
"J": 12,
|
89 |
+
"K": 13,
|
90 |
+
"L": 14,
|
91 |
+
"M": 15,
|
92 |
+
"N": 16,
|
93 |
+
"O": 17,
|
94 |
+
"P": 18,
|
95 |
+
"Ċ": 19,
|
96 |
+
"LL": 20,
|
97 |
+
"HH": 21,
|
98 |
+
"AA": 22,
|
99 |
+
"BB": 23,
|
100 |
+
"FF": 24,
|
101 |
+
"KK": 25,
|
102 |
+
"PP": 26,
|
103 |
+
"GG": 27,
|
104 |
+
"NN": 28,
|
105 |
+
"OO": 29,
|
106 |
+
"MM": 30,
|
107 |
+
"CC": 31,
|
108 |
+
"EE": 32,
|
109 |
+
"II": 33,
|
110 |
+
"JJ": 34,
|
111 |
+
"DD": 35,
|
112 |
+
"HHHH": 36,
|
113 |
+
"LLLL": 37,
|
114 |
+
"AAAA": 38,
|
115 |
+
"FFFF": 39,
|
116 |
+
"BBBB": 40,
|
117 |
+
"KKKK": 41,
|
118 |
+
"PPPP": 42,
|
119 |
+
"NNNN": 43,
|
120 |
+
"OOOO": 44,
|
121 |
+
"MMMM": 45,
|
122 |
+
"GGGG": 46,
|
123 |
+
"EEEE": 47,
|
124 |
+
"CCCC": 48,
|
125 |
+
"IIII": 49,
|
126 |
+
"JJJJ": 50,
|
127 |
+
"DDDD": 51,
|
128 |
+
"LLL": 52,
|
129 |
+
"BBB": 53,
|
130 |
+
"HHH": 54,
|
131 |
+
"AAA": 55,
|
132 |
+
"FFF": 56,
|
133 |
+
"GGG": 57,
|
134 |
+
"KKK": 58,
|
135 |
+
"CCC": 59,
|
136 |
+
"MMM": 60,
|
137 |
+
"NNN": 61,
|
138 |
+
"PPP": 62,
|
139 |
+
"OOO": 63,
|
140 |
+
"EEE": 64,
|
141 |
+
"III": 65,
|
142 |
+
"JJJ": 66,
|
143 |
+
"DDD": 67,
|
144 |
+
"HHHHHHHH": 68,
|
145 |
+
"FFFFFFFF": 69,
|
146 |
+
"AAAAAAAA": 70,
|
147 |
+
"BBBBB": 71,
|
148 |
+
"LLLLLLLL": 72,
|
149 |
+
"KKKKKKKK": 73,
|
150 |
+
"NNNNNNNN": 74,
|
151 |
+
"PPPPP": 75,
|
152 |
+
"GGGGG": 76,
|
153 |
+
"CCCCC": 77,
|
154 |
+
"MMMMMMMM": 78,
|
155 |
+
"LLLLL": 79,
|
156 |
+
"OOOOO": 80,
|
157 |
+
"HHHHH": 81,
|
158 |
+
"EEEEE": 82,
|
159 |
+
"AAAAA": 83,
|
160 |
+
"IIIII": 84,
|
161 |
+
"DDDDD": 85,
|
162 |
+
"FFFFF": 86,
|
163 |
+
"JJJJJ": 87,
|
164 |
+
"KKKKK": 88,
|
165 |
+
"BBBBBBBB": 89,
|
166 |
+
"NNNNN": 90,
|
167 |
+
"MMMMM": 91,
|
168 |
+
"OOOOOOOO": 92,
|
169 |
+
"PPPPPPPP": 93,
|
170 |
+
"EEEEEEEE": 94,
|
171 |
+
"JJJJJJJJ": 95,
|
172 |
+
"IIIIIIII": 96,
|
173 |
+
"GGGGGGGG": 97,
|
174 |
+
"CCCCCCCC": 98,
|
175 |
+
"LLLLLL": 99,
|
176 |
+
"DDDDDDDD": 100,
|
177 |
+
"AAAAAA": 101,
|
178 |
+
"HHHHHH": 102,
|
179 |
+
"BBBBBB": 103,
|
180 |
+
"FFFFFF": 104,
|
181 |
+
"KKKKKK": 105,
|
182 |
+
"PPPPPP": 106,
|
183 |
+
"GGGGGG": 107,
|
184 |
+
"NNNNNN": 108,
|
185 |
+
"EEEEEE": 109,
|
186 |
+
"DDDDDD": 110,
|
187 |
+
"OOOOOO": 111,
|
188 |
+
"CCCCCC": 112,
|
189 |
+
"MMMMMM": 113,
|
190 |
+
"JJJJJJ": 114,
|
191 |
+
"IIIIII": 115,
|
192 |
+
"LLLLLLL": 116,
|
193 |
+
"AAAAAAA": 117,
|
194 |
+
"HHHHHHH": 118,
|
195 |
+
"BBBBBBB": 119,
|
196 |
+
"FFFFFFF": 120,
|
197 |
+
"KKKKKKK": 121,
|
198 |
+
"PPPPPPP": 122,
|
199 |
+
"MMMMMMM": 123,
|
200 |
+
"OOOOOOO": 124,
|
201 |
+
"GGGGGGG": 125,
|
202 |
+
"CCCCCCC": 126,
|
203 |
+
"NNNNNNN": 127,
|
204 |
+
"EEEEEEE": 128,
|
205 |
+
"IIIIIII": 129,
|
206 |
+
"JJJJJJJ": 130,
|
207 |
+
"DDDDDDD": 131,
|
208 |
+
"FFFFFFFFF": 132,
|
209 |
+
"AAAAAAAAA": 133,
|
210 |
+
"HHHHHHHHH": 134,
|
211 |
+
"BBBBBBBBB": 135,
|
212 |
+
"LLLLLLLLL": 136,
|
213 |
+
"KKKKKKKKK": 137,
|
214 |
+
"OOOOOOOOO": 138,
|
215 |
+
"NNNNNNNNN": 139,
|
216 |
+
"PPPPPPPPP": 140,
|
217 |
+
"GGGGGGGGG": 141,
|
218 |
+
"CCCCCCCCC": 142,
|
219 |
+
"MMMMMMMMM": 143,
|
220 |
+
"IIIIIIIII": 144,
|
221 |
+
"DDDDDDDDD": 145,
|
222 |
+
"EEEEEEEEE": 146,
|
223 |
+
"JJJJJJJJJ": 147,
|
224 |
+
"HHHHHHHHHH": 148,
|
225 |
+
"FFFFFFFFFF": 149,
|
226 |
+
"AAAAAAAAAA": 150,
|
227 |
+
"LLLLLLLLLL": 151,
|
228 |
+
"KKKKKKKKKK": 152,
|
229 |
+
"BBBBBBBBBB": 153,
|
230 |
+
"PPPPPPPPPP": 154,
|
231 |
+
"OOOOOOOOOO": 155,
|
232 |
+
"HHHHHHHHHHH": 156,
|
233 |
+
"NNNNNNNNNN": 157,
|
234 |
+
"EEEEEEEEEE": 158,
|
235 |
+
"MMMMMMMMMM": 159,
|
236 |
+
"FFFFFFFFFFF": 160,
|
237 |
+
"JJJJJJJJJJ": 161,
|
238 |
+
"DDDDDDDDDD": 162,
|
239 |
+
"CCCCCCCCCC": 163,
|
240 |
+
"IIIIIIIIII": 164,
|
241 |
+
"GGGGGGGGGG": 165,
|
242 |
+
"KKKKKKKKKKK": 166,
|
243 |
+
"AAAAAAAAAAA": 167,
|
244 |
+
"LLLLLLLLLLL": 168,
|
245 |
+
"BBBBBBBBBBB": 169,
|
246 |
+
"FFFFFFFFFFFFFFFF": 170,
|
247 |
+
"NNNNNNNNNNN": 171,
|
248 |
+
"MMMMMMMMMMM": 172,
|
249 |
+
"OOOOOOOOOOO": 173,
|
250 |
+
"PPPPPPPPPPP": 174,
|
251 |
+
"HHHHHHHHHHHHHHHH": 175,
|
252 |
+
"EEEEEEEEEEE": 176,
|
253 |
+
"JJJJJJJJJJJ": 177,
|
254 |
+
"IIIIIIIIIII": 178,
|
255 |
+
"AAAAAAAAAAAAAAAA": 179,
|
256 |
+
"CCCCCCCCCCC": 180,
|
257 |
+
"GGGGGGGGGGG": 181,
|
258 |
+
"LLLLLLLLLLLLLLLL": 182,
|
259 |
+
"DDDDDDDDDDD": 183,
|
260 |
+
"HHHHHHHHHHHH": 184,
|
261 |
+
"KKKKKKKKKKKKKKKK": 185,
|
262 |
+
"AAAAAAAAAAAA": 186,
|
263 |
+
"LLLMMM": 187,
|
264 |
+
"MMMMMMMMMMMMMMMM": 188,
|
265 |
+
"NNNNNNNNNNNNNNNN": 189,
|
266 |
+
"BBBBBBBBBBBBBBBB": 190,
|
267 |
+
"LLLHHH": 191,
|
268 |
+
"FFFFFFFFFFFF": 192,
|
269 |
+
"JJJJJJJJJJJJJJJJ": 193,
|
270 |
+
"LLLCCC": 194,
|
271 |
+
"LLLLLLLLLLLL": 195,
|
272 |
+
"LLLBBB": 196,
|
273 |
+
"KKKKKKKKKKKK": 197,
|
274 |
+
"IIIIIIIIIIII": 198,
|
275 |
+
"PPPPPPPPPPPP": 199,
|
276 |
+
"BBBHHH": 200,
|
277 |
+
"OOOOOOOOOOOOOOOO": 201,
|
278 |
+
"BBBBBBBBBBBB": 202,
|
279 |
+
"EEEEEEEEEEEE": 203,
|
280 |
+
"AAAOOO": 204,
|
281 |
+
"NNNNNNNNNNNN": 205,
|
282 |
+
"DDDDDDDDDDDD": 206,
|
283 |
+
"MMMMMMMMMMMM": 207,
|
284 |
+
"CCCCCCCCCCCC": 208,
|
285 |
+
"MMMCCC": 209,
|
286 |
+
"GGGGGGGGGGGG": 210,
|
287 |
+
"BBBLLL": 211,
|
288 |
+
"FFFFFFFFFFFFF": 212,
|
289 |
+
"BBBCCC": 213,
|
290 |
+
"BBBMMM": 214,
|
291 |
+
"DDDKKK": 215,
|
292 |
+
"BBBGGG": 216,
|
293 |
+
"LLLGGG": 217,
|
294 |
+
"OOOOOOOOOOOO": 218,
|
295 |
+
"HHHCCC": 219,
|
296 |
+
"FFFKKK": 220,
|
297 |
+
"HHHLLL": 221,
|
298 |
+
"AAAKKK": 222,
|
299 |
+
"EEEGGG": 223,
|
300 |
+
"MMMLLL": 224,
|
301 |
+
"BBBFFF": 225,
|
302 |
+
"AAAPPP": 226,
|
303 |
+
"AAAAAAAAAAAAA": 227,
|
304 |
+
"HHHHHHHHHHHHH": 228,
|
305 |
+
"AAAHHH": 229,
|
306 |
+
"JJJJJJJJJJJJ": 230,
|
307 |
+
"PPPPPPPPPPPPPPPP": 231,
|
308 |
+
"AAAFFF": 232,
|
309 |
+
"LLLKKK": 233,
|
310 |
+
"AAAGGG": 234,
|
311 |
+
"EEEKKK": 235,
|
312 |
+
"PPPFFF": 236,
|
313 |
+
"IIICCC": 237,
|
314 |
+
"EEEEEEEEEEEEEEEE": 238,
|
315 |
+
"LLLLCCC": 239,
|
316 |
+
"KKKKKKKKKKKKK": 240,
|
317 |
+
"NNNHHH": 241,
|
318 |
+
"NNNGGG": 242,
|
319 |
+
"LLLLLLLLLLLLL": 243,
|
320 |
+
"MMMMMMMMMMMMM": 244,
|
321 |
+
"BBBKKK": 245,
|
322 |
+
"AAAJJJ": 246,
|
323 |
+
"BBBBBBBBBBBBB": 247,
|
324 |
+
"BBBPPP": 248,
|
325 |
+
"HHHJJJ": 249,
|
326 |
+
"NNNKKK": 250,
|
327 |
+
"IIIGGG": 251,
|
328 |
+
"LLLLHHHH": 252,
|
329 |
+
"DDDGGG": 253,
|
330 |
+
"BBBEEE": 254,
|
331 |
+
"PPPCCC": 255,
|
332 |
+
"LLLLHHH": 256,
|
333 |
+
"IIIFFF": 257,
|
334 |
+
"BBBOOO": 258,
|
335 |
+
"BBBJJJ": 259,
|
336 |
+
"LLLLCCCC": 260,
|
337 |
+
"GGGJJJ": 261,
|
338 |
+
"GGGGGGGGGGGGGGGG": 262,
|
339 |
+
"LLLOOO": 263,
|
340 |
+
"NNNOOO": 264,
|
341 |
+
"AAACCC": 265,
|
342 |
+
"LLLLBBBB": 266,
|
343 |
+
"DDDCCC": 267,
|
344 |
+
"MMMHHH": 268,
|
345 |
+
"IIIIIIIIIIIIIIII": 269
|
346 |
+
},
|
347 |
+
"merges": [
|
348 |
+
"L L",
|
349 |
+
"H H",
|
350 |
+
"A A",
|
351 |
+
"B B",
|
352 |
+
"F F",
|
353 |
+
"K K",
|
354 |
+
"P P",
|
355 |
+
"G G",
|
356 |
+
"N N",
|
357 |
+
"O O",
|
358 |
+
"M M",
|
359 |
+
"C C",
|
360 |
+
"E E",
|
361 |
+
"I I",
|
362 |
+
"J J",
|
363 |
+
"D D",
|
364 |
+
"HH HH",
|
365 |
+
"LL LL",
|
366 |
+
"AA AA",
|
367 |
+
"FF FF",
|
368 |
+
"BB BB",
|
369 |
+
"KK KK",
|
370 |
+
"PP PP",
|
371 |
+
"NN NN",
|
372 |
+
"OO OO",
|
373 |
+
"MM MM",
|
374 |
+
"GG GG",
|
375 |
+
"EE EE",
|
376 |
+
"CC CC",
|
377 |
+
"II II",
|
378 |
+
"JJ JJ",
|
379 |
+
"DD DD",
|
380 |
+
"LL L",
|
381 |
+
"BB B",
|
382 |
+
"HH H",
|
383 |
+
"AA A",
|
384 |
+
"FF F",
|
385 |
+
"GG G",
|
386 |
+
"KK K",
|
387 |
+
"CC C",
|
388 |
+
"MM M",
|
389 |
+
"NN N",
|
390 |
+
"PP P",
|
391 |
+
"OO O",
|
392 |
+
"EE E",
|
393 |
+
"II I",
|
394 |
+
"JJ J",
|
395 |
+
"DD D",
|
396 |
+
"HHHH HHHH",
|
397 |
+
"FFFF FFFF",
|
398 |
+
"AAAA AAAA",
|
399 |
+
"BBBB B",
|
400 |
+
"LLLL LLLL",
|
401 |
+
"KKKK KKKK",
|
402 |
+
"NNNN NNNN",
|
403 |
+
"PPPP P",
|
404 |
+
"GGGG G",
|
405 |
+
"CCCC C",
|
406 |
+
"MMMM MMMM",
|
407 |
+
"LLLL L",
|
408 |
+
"OOOO O",
|
409 |
+
"HHHH H",
|
410 |
+
"EEEE E",
|
411 |
+
"AAAA A",
|
412 |
+
"IIII I",
|
413 |
+
"DDDD D",
|
414 |
+
"FFFF F",
|
415 |
+
"JJJJ J",
|
416 |
+
"KKKK K",
|
417 |
+
"BBBB BBBB",
|
418 |
+
"NNNN N",
|
419 |
+
"MMMM M",
|
420 |
+
"OOOO OOOO",
|
421 |
+
"PPPP PPPP",
|
422 |
+
"EEEE EEEE",
|
423 |
+
"JJJJ JJJJ",
|
424 |
+
"IIII IIII",
|
425 |
+
"GGGG GGGG",
|
426 |
+
"CCCC CCCC",
|
427 |
+
"LLLL LL",
|
428 |
+
"DDDD DDDD",
|
429 |
+
"AAAA AA",
|
430 |
+
"HHHH HH",
|
431 |
+
"BBBB BB",
|
432 |
+
"FFFF FF",
|
433 |
+
"KKKK KK",
|
434 |
+
"PPPP PP",
|
435 |
+
"GGGG GG",
|
436 |
+
"NNNN NN",
|
437 |
+
"EEEE EE",
|
438 |
+
"DDDD DD",
|
439 |
+
"OOOO OO",
|
440 |
+
"CCCC CC",
|
441 |
+
"MMMM MM",
|
442 |
+
"JJJJ JJ",
|
443 |
+
"IIII II",
|
444 |
+
"LLLL LLL",
|
445 |
+
"AAAA AAA",
|
446 |
+
"HHHH HHH",
|
447 |
+
"BBBB BBB",
|
448 |
+
"FFFF FFF",
|
449 |
+
"KKKK KKK",
|
450 |
+
"PPPP PPP",
|
451 |
+
"MMMM MMM",
|
452 |
+
"OOOO OOO",
|
453 |
+
"GGGG GGG",
|
454 |
+
"CCCC CCC",
|
455 |
+
"NNNN NNN",
|
456 |
+
"EEEE EEE",
|
457 |
+
"IIII III",
|
458 |
+
"JJJJ JJJ",
|
459 |
+
"DDDD DDD",
|
460 |
+
"FFFFFFFF F",
|
461 |
+
"AAAAAAAA A",
|
462 |
+
"HHHHHHHH H",
|
463 |
+
"BBBB BBBBB",
|
464 |
+
"LLLLLLLL L",
|
465 |
+
"KKKKKKKK K",
|
466 |
+
"OOOO OOOOO",
|
467 |
+
"NNNNNNNN N",
|
468 |
+
"PPPP PPPPP",
|
469 |
+
"GGGG GGGGG",
|
470 |
+
"CCCC CCCCC",
|
471 |
+
"MMMMMMMM M",
|
472 |
+
"IIII IIIII",
|
473 |
+
"DDDD DDDDD",
|
474 |
+
"EEEE EEEEE",
|
475 |
+
"JJJJ JJJJJ",
|
476 |
+
"HHHHHHHH HH",
|
477 |
+
"FFFFFFFF FF",
|
478 |
+
"AAAAAAAA AA",
|
479 |
+
"LLLLLLLL LL",
|
480 |
+
"KKKKKKKK KK",
|
481 |
+
"BBBBBBBB BB",
|
482 |
+
"PPPPPPPP PP",
|
483 |
+
"OOOOOOOO OO",
|
484 |
+
"HHHHHHHH HHH",
|
485 |
+
"NNNNNNNN NN",
|
486 |
+
"EEEEEEEE EE",
|
487 |
+
"MMMMMMMM MM",
|
488 |
+
"FFFFFFFF FFF",
|
489 |
+
"JJJJJJJJ JJ",
|
490 |
+
"DDDDDDDD DD",
|
491 |
+
"CCCCCCCC CC",
|
492 |
+
"IIIIIIII II",
|
493 |
+
"GGGGGGGG GG",
|
494 |
+
"KKKKKKKK KKK",
|
495 |
+
"AAAAAAAA AAA",
|
496 |
+
"LLLLLLLL LLL",
|
497 |
+
"BBBBBBBB BBB",
|
498 |
+
"FFFFFFFF FFFFFFFF",
|
499 |
+
"NNNNNNNN NNN",
|
500 |
+
"MMMMMMMM MMM",
|
501 |
+
"OOOOOOOO OOO",
|
502 |
+
"PPPPPPPP PPP",
|
503 |
+
"HHHHHHHH HHHHHHHH",
|
504 |
+
"EEEEEEEE EEE",
|
505 |
+
"JJJJJJJJ JJJ",
|
506 |
+
"IIIIIIII III",
|
507 |
+
"AAAAAAAA AAAAAAAA",
|
508 |
+
"CCCCCCCC CCC",
|
509 |
+
"GGGGGGGG GGG",
|
510 |
+
"LLLLLLLL LLLLLLLL",
|
511 |
+
"DDDDDDDD DDD",
|
512 |
+
"HHHHHHHH HHHH",
|
513 |
+
"KKKKKKKK KKKKKKKK",
|
514 |
+
"AAAAAAAA AAAA",
|
515 |
+
"LLL MMM",
|
516 |
+
"MMMMMMMM MMMMMMMM",
|
517 |
+
"NNNNNNNN NNNNNNNN",
|
518 |
+
"BBBBBBBB BBBBBBBB",
|
519 |
+
"LLL HHH",
|
520 |
+
"FFFFFFFF FFFF",
|
521 |
+
"JJJJJJJJ JJJJJJJJ",
|
522 |
+
"LLL CCC",
|
523 |
+
"LLLLLLLL LLLL",
|
524 |
+
"LLL BBB",
|
525 |
+
"KKKKKKKK KKKK",
|
526 |
+
"IIIIIIII IIII",
|
527 |
+
"PPPPPPPP PPPP",
|
528 |
+
"BBB HHH",
|
529 |
+
"OOOOOOOO OOOOOOOO",
|
530 |
+
"BBBBBBBB BBBB",
|
531 |
+
"EEEEEEEE EEEE",
|
532 |
+
"AAA OOO",
|
533 |
+
"NNNNNNNN NNNN",
|
534 |
+
"DDDDDDDD DDDD",
|
535 |
+
"MMMMMMMM MMMM",
|
536 |
+
"CCCCCCCC CCCC",
|
537 |
+
"MMM CCC",
|
538 |
+
"GGGGGGGG GGGG",
|
539 |
+
"BBB LLL",
|
540 |
+
"FFFFFFFF FFFFF",
|
541 |
+
"BBB CCC",
|
542 |
+
"BBB MMM",
|
543 |
+
"DDD KKK",
|
544 |
+
"BBB GGG",
|
545 |
+
"LLL GGG",
|
546 |
+
"OOOOOOOO OOOO",
|
547 |
+
"HHH CCC",
|
548 |
+
"FFF KKK",
|
549 |
+
"HHH LLL",
|
550 |
+
"AAA KKK",
|
551 |
+
"EEE GGG",
|
552 |
+
"MMM LLL",
|
553 |
+
"BBB FFF",
|
554 |
+
"AAA PPP",
|
555 |
+
"AAAAAAAA AAAAA",
|
556 |
+
"HHHHHHHH HHHHH",
|
557 |
+
"AAA HHH",
|
558 |
+
"JJJJJJJJ JJJJ",
|
559 |
+
"PPPPPPPP PPPPPPPP",
|
560 |
+
"AAA FFF",
|
561 |
+
"LLL KKK",
|
562 |
+
"AAA GGG",
|
563 |
+
"EEE KKK",
|
564 |
+
"PPP FFF",
|
565 |
+
"III CCC",
|
566 |
+
"EEEEEEEE EEEEEEEE",
|
567 |
+
"LLLL CCC",
|
568 |
+
"KKKKKKKK KKKKK",
|
569 |
+
"NNN HHH",
|
570 |
+
"NNN GGG",
|
571 |
+
"LLLLLLLL LLLLL",
|
572 |
+
"MMMMMMMM MMMMM",
|
573 |
+
"BBB KKK",
|
574 |
+
"AAA JJJ",
|
575 |
+
"BBBBBBBB BBBBB",
|
576 |
+
"BBB PPP",
|
577 |
+
"HHH JJJ",
|
578 |
+
"NNN KKK",
|
579 |
+
"III GGG",
|
580 |
+
"LLLL HHHH",
|
581 |
+
"DDD GGG",
|
582 |
+
"BBB EEE",
|
583 |
+
"PPP CCC",
|
584 |
+
"LLLL HHH",
|
585 |
+
"III FFF",
|
586 |
+
"BBB OOO",
|
587 |
+
"BBB JJJ",
|
588 |
+
"LLLL CCCC",
|
589 |
+
"GGG JJJ",
|
590 |
+
"GGGGGGGG GGGGGGGG",
|
591 |
+
"LLL OOO",
|
592 |
+
"NNN OOO",
|
593 |
+
"AAA CCC",
|
594 |
+
"LLLL BBBB",
|
595 |
+
"DDD CCC",
|
596 |
+
"MMM HHH",
|
597 |
+
"IIIIIIII IIIIIIII"
|
598 |
+
]
|
599 |
+
}
|
600 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<|endoftext|>",
|
4 |
+
"clean_up_tokenization_spaces": true,
|
5 |
+
"eos_token": "<|endoftext|>",
|
6 |
+
"model_max_length": 1024,
|
7 |
+
"tokenizer_class": "GPT2Tokenizer",
|
8 |
+
"unk_token": "<|endoftext|>"
|
9 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11b012f47000eab94a4ee1c6d0c3db1587a011efe49804560b789ac58864ad25
|
3 |
+
size 3899
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<[|endoftext|>]":0,"...":1,"\n":2,"A":3,"B":4,"C":5,"D":6,"E":7,"F":8,"G":9,"H":10,"I":11,"J":12,"K":13,"L":14,"M":15,"N":16,"O":17,"P":18,"Ċ":19,"LL":20,"HH":21,"AA":22,"BB":23,"FF":24,"KK":25,"PP":26,"GG":27,"NN":28,"OO":29,"MM":30,"CC":31,"EE":32,"II":33,"JJ":34,"DD":35,"HHHH":36,"LLLL":37,"AAAA":38,"FFFF":39,"BBBB":40,"KKKK":41,"PPPP":42,"NNNN":43,"OOOO":44,"MMMM":45,"GGGG":46,"EEEE":47,"CCCC":48,"IIII":49,"JJJJ":50,"DDDD":51,"LLL":52,"BBB":53,"HHH":54,"AAA":55,"FFF":56,"GGG":57,"KKK":58,"CCC":59,"MMM":60,"NNN":61,"PPP":62,"OOO":63,"EEE":64,"III":65,"JJJ":66,"DDD":67,"HHHHHHHH":68,"FFFFFFFF":69,"AAAAAAAA":70,"BBBBB":71,"LLLLLLLL":72,"KKKKKKKK":73,"NNNNNNNN":74,"PPPPP":75,"GGGGG":76,"CCCCC":77,"MMMMMMMM":78,"LLLLL":79,"OOOOO":80,"HHHHH":81,"EEEEE":82,"AAAAA":83,"IIIII":84,"DDDDD":85,"FFFFF":86,"JJJJJ":87,"KKKKK":88,"BBBBBBBB":89,"NNNNN":90,"MMMMM":91,"OOOOOOOO":92,"PPPPPPPP":93,"EEEEEEEE":94,"JJJJJJJJ":95,"IIIIIIII":96,"GGGGGGGG":97,"CCCCCCCC":98,"LLLLLL":99,"DDDDDDDD":100,"AAAAAA":101,"HHHHHH":102,"BBBBBB":103,"FFFFFF":104,"KKKKKK":105,"PPPPPP":106,"GGGGGG":107,"NNNNNN":108,"EEEEEE":109,"DDDDDD":110,"OOOOOO":111,"CCCCCC":112,"MMMMMM":113,"JJJJJJ":114,"IIIIII":115,"LLLLLLL":116,"AAAAAAA":117,"HHHHHHH":118,"BBBBBBB":119,"FFFFFFF":120,"KKKKKKK":121,"PPPPPPP":122,"MMMMMMM":123,"OOOOOOO":124,"GGGGGGG":125,"CCCCCCC":126,"NNNNNNN":127,"EEEEEEE":128,"IIIIIII":129,"JJJJJJJ":130,"DDDDDDD":131,"FFFFFFFFF":132,"AAAAAAAAA":133,"HHHHHHHHH":134,"BBBBBBBBB":135,"LLLLLLLLL":136,"KKKKKKKKK":137,"OOOOOOOOO":138,"NNNNNNNNN":139,"PPPPPPPPP":140,"GGGGGGGGG":141,"CCCCCCCCC":142,"MMMMMMMMM":143,"IIIIIIIII":144,"DDDDDDDDD":145,"EEEEEEEEE":146,"JJJJJJJJJ":147,"HHHHHHHHHH":148,"FFFFFFFFFF":149,"AAAAAAAAAA":150,"LLLLLLLLLL":151,"KKKKKKKKKK":152,"BBBBBBBBBB":153,"PPPPPPPPPP":154,"OOOOOOOOOO":155,"HHHHHHHHHHH":156,"NNNNNNNNNN":157,"EEEEEEEEEE":158,"MMMMMMMMMM":159,"FFFFFFFFFFF":160,"JJJJJJJJJJ":161,"DDDDDDDDDD":162,"CCCCCCCCCC":163,"IIIIIIIIII":164,"GGGGGGGGGG":165,"KKKKKKKKKKK":166,"AAAAAAAAAAA":167,"LLLLLLLLLLL":168,"BBBBBBBBBBB":169,"FFFFFFFFFFFFFFFF":170,"NNNNNNNNNNN":171,"MMMMMMMMMMM":172,"OOOOOOOOOOO":173,"PPPPPPPPPPP":174,"HHHHHHHHHHHHHHHH":175,"EEEEEEEEEEE":176,"JJJJJJJJJJJ":177,"IIIIIIIIIII":178,"AAAAAAAAAAAAAAAA":179,"CCCCCCCCCCC":180,"GGGGGGGGGGG":181,"LLLLLLLLLLLLLLLL":182,"DDDDDDDDDDD":183,"HHHHHHHHHHHH":184,"KKKKKKKKKKKKKKKK":185,"AAAAAAAAAAAA":186,"LLLMMM":187,"MMMMMMMMMMMMMMMM":188,"NNNNNNNNNNNNNNNN":189,"BBBBBBBBBBBBBBBB":190,"LLLHHH":191,"FFFFFFFFFFFF":192,"JJJJJJJJJJJJJJJJ":193,"LLLCCC":194,"LLLLLLLLLLLL":195,"LLLBBB":196,"KKKKKKKKKKKK":197,"IIIIIIIIIIII":198,"PPPPPPPPPPPP":199,"BBBHHH":200,"OOOOOOOOOOOOOOOO":201,"BBBBBBBBBBBB":202,"EEEEEEEEEEEE":203,"AAAOOO":204,"NNNNNNNNNNNN":205,"DDDDDDDDDDDD":206,"MMMMMMMMMMMM":207,"CCCCCCCCCCCC":208,"MMMCCC":209,"GGGGGGGGGGGG":210,"BBBLLL":211,"FFFFFFFFFFFFF":212,"BBBCCC":213,"BBBMMM":214,"DDDKKK":215,"BBBGGG":216,"LLLGGG":217,"OOOOOOOOOOOO":218,"HHHCCC":219,"FFFKKK":220,"HHHLLL":221,"AAAKKK":222,"EEEGGG":223,"MMMLLL":224,"BBBFFF":225,"AAAPPP":226,"AAAAAAAAAAAAA":227,"HHHHHHHHHHHHH":228,"AAAHHH":229,"JJJJJJJJJJJJ":230,"PPPPPPPPPPPPPPPP":231,"AAAFFF":232,"LLLKKK":233,"AAAGGG":234,"EEEKKK":235,"PPPFFF":236,"IIICCC":237,"EEEEEEEEEEEEEEEE":238,"LLLLCCC":239,"KKKKKKKKKKKKK":240,"NNNHHH":241,"NNNGGG":242,"LLLLLLLLLLLLL":243,"MMMMMMMMMMMMM":244,"BBBKKK":245,"AAAJJJ":246,"BBBBBBBBBBBBB":247,"BBBPPP":248,"HHHJJJ":249,"NNNKKK":250,"IIIGGG":251,"LLLLHHHH":252,"DDDGGG":253,"BBBEEE":254,"PPPCCC":255,"LLLLHHH":256,"IIIFFF":257,"BBBOOO":258,"BBBJJJ":259,"LLLLCCCC":260,"GGGJJJ":261,"GGGGGGGGGGGGGGGG":262,"LLLOOO":263,"NNNOOO":264,"AAACCC":265,"LLLLBBBB":266,"DDDCCC":267,"MMMHHH":268,"IIIIIIIIIIIIIIII":269}
|