rjarpa commited on
Commit
067e554
1 Parent(s): 9a0f633

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 270
3
+ }
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 270,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 270,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 20,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.30.2",
37
+ "use_cache": true,
38
+ "vocab_size": 271
39
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 270,
4
+ "eos_token_id": 270,
5
+ "transformers_version": "4.30.2"
6
+ }
merges.txt ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ L L
3
+ H H
4
+ A A
5
+ B B
6
+ F F
7
+ K K
8
+ P P
9
+ G G
10
+ N N
11
+ O O
12
+ M M
13
+ C C
14
+ E E
15
+ I I
16
+ J J
17
+ D D
18
+ HH HH
19
+ LL LL
20
+ AA AA
21
+ FF FF
22
+ BB BB
23
+ KK KK
24
+ PP PP
25
+ NN NN
26
+ OO OO
27
+ MM MM
28
+ GG GG
29
+ EE EE
30
+ CC CC
31
+ II II
32
+ JJ JJ
33
+ DD DD
34
+ LL L
35
+ BB B
36
+ HH H
37
+ AA A
38
+ FF F
39
+ GG G
40
+ KK K
41
+ CC C
42
+ MM M
43
+ NN N
44
+ PP P
45
+ OO O
46
+ EE E
47
+ II I
48
+ JJ J
49
+ DD D
50
+ HHHH HHHH
51
+ FFFF FFFF
52
+ AAAA AAAA
53
+ BBBB B
54
+ LLLL LLLL
55
+ KKKK KKKK
56
+ NNNN NNNN
57
+ PPPP P
58
+ GGGG G
59
+ CCCC C
60
+ MMMM MMMM
61
+ LLLL L
62
+ OOOO O
63
+ HHHH H
64
+ EEEE E
65
+ AAAA A
66
+ IIII I
67
+ DDDD D
68
+ FFFF F
69
+ JJJJ J
70
+ KKKK K
71
+ BBBB BBBB
72
+ NNNN N
73
+ MMMM M
74
+ OOOO OOOO
75
+ PPPP PPPP
76
+ EEEE EEEE
77
+ JJJJ JJJJ
78
+ IIII IIII
79
+ GGGG GGGG
80
+ CCCC CCCC
81
+ LLLL LL
82
+ DDDD DDDD
83
+ AAAA AA
84
+ HHHH HH
85
+ BBBB BB
86
+ FFFF FF
87
+ KKKK KK
88
+ PPPP PP
89
+ GGGG GG
90
+ NNNN NN
91
+ EEEE EE
92
+ DDDD DD
93
+ OOOO OO
94
+ CCCC CC
95
+ MMMM MM
96
+ JJJJ JJ
97
+ IIII II
98
+ LLLL LLL
99
+ AAAA AAA
100
+ HHHH HHH
101
+ BBBB BBB
102
+ FFFF FFF
103
+ KKKK KKK
104
+ PPPP PPP
105
+ MMMM MMM
106
+ OOOO OOO
107
+ GGGG GGG
108
+ CCCC CCC
109
+ NNNN NNN
110
+ EEEE EEE
111
+ IIII III
112
+ JJJJ JJJ
113
+ DDDD DDD
114
+ FFFFFFFF F
115
+ AAAAAAAA A
116
+ HHHHHHHH H
117
+ BBBB BBBBB
118
+ LLLLLLLL L
119
+ KKKKKKKK K
120
+ OOOO OOOOO
121
+ NNNNNNNN N
122
+ PPPP PPPPP
123
+ GGGG GGGGG
124
+ CCCC CCCCC
125
+ MMMMMMMM M
126
+ IIII IIIII
127
+ DDDD DDDDD
128
+ EEEE EEEEE
129
+ JJJJ JJJJJ
130
+ HHHHHHHH HH
131
+ FFFFFFFF FF
132
+ AAAAAAAA AA
133
+ LLLLLLLL LL
134
+ KKKKKKKK KK
135
+ BBBBBBBB BB
136
+ PPPPPPPP PP
137
+ OOOOOOOO OO
138
+ HHHHHHHH HHH
139
+ NNNNNNNN NN
140
+ EEEEEEEE EE
141
+ MMMMMMMM MM
142
+ FFFFFFFF FFF
143
+ JJJJJJJJ JJ
144
+ DDDDDDDD DD
145
+ CCCCCCCC CC
146
+ IIIIIIII II
147
+ GGGGGGGG GG
148
+ KKKKKKKK KKK
149
+ AAAAAAAA AAA
150
+ LLLLLLLL LLL
151
+ BBBBBBBB BBB
152
+ FFFFFFFF FFFFFFFF
153
+ NNNNNNNN NNN
154
+ MMMMMMMM MMM
155
+ OOOOOOOO OOO
156
+ PPPPPPPP PPP
157
+ HHHHHHHH HHHHHHHH
158
+ EEEEEEEE EEE
159
+ JJJJJJJJ JJJ
160
+ IIIIIIII III
161
+ AAAAAAAA AAAAAAAA
162
+ CCCCCCCC CCC
163
+ GGGGGGGG GGG
164
+ LLLLLLLL LLLLLLLL
165
+ DDDDDDDD DDD
166
+ HHHHHHHH HHHH
167
+ KKKKKKKK KKKKKKKK
168
+ AAAAAAAA AAAA
169
+ LLL MMM
170
+ MMMMMMMM MMMMMMMM
171
+ NNNNNNNN NNNNNNNN
172
+ BBBBBBBB BBBBBBBB
173
+ LLL HHH
174
+ FFFFFFFF FFFF
175
+ JJJJJJJJ JJJJJJJJ
176
+ LLL CCC
177
+ LLLLLLLL LLLL
178
+ LLL BBB
179
+ KKKKKKKK KKKK
180
+ IIIIIIII IIII
181
+ PPPPPPPP PPPP
182
+ BBB HHH
183
+ OOOOOOOO OOOOOOOO
184
+ BBBBBBBB BBBB
185
+ EEEEEEEE EEEE
186
+ AAA OOO
187
+ NNNNNNNN NNNN
188
+ DDDDDDDD DDDD
189
+ MMMMMMMM MMMM
190
+ CCCCCCCC CCCC
191
+ MMM CCC
192
+ GGGGGGGG GGGG
193
+ BBB LLL
194
+ FFFFFFFF FFFFF
195
+ BBB CCC
196
+ BBB MMM
197
+ DDD KKK
198
+ BBB GGG
199
+ LLL GGG
200
+ OOOOOOOO OOOO
201
+ HHH CCC
202
+ FFF KKK
203
+ HHH LLL
204
+ AAA KKK
205
+ EEE GGG
206
+ MMM LLL
207
+ BBB FFF
208
+ AAA PPP
209
+ AAAAAAAA AAAAA
210
+ HHHHHHHH HHHHH
211
+ AAA HHH
212
+ JJJJJJJJ JJJJ
213
+ PPPPPPPP PPPPPPPP
214
+ AAA FFF
215
+ LLL KKK
216
+ AAA GGG
217
+ EEE KKK
218
+ PPP FFF
219
+ III CCC
220
+ EEEEEEEE EEEEEEEE
221
+ LLLL CCC
222
+ KKKKKKKK KKKKK
223
+ NNN HHH
224
+ NNN GGG
225
+ LLLLLLLL LLLLL
226
+ MMMMMMMM MMMMM
227
+ BBB KKK
228
+ AAA JJJ
229
+ BBBBBBBB BBBBB
230
+ BBB PPP
231
+ HHH JJJ
232
+ NNN KKK
233
+ III GGG
234
+ LLLL HHHH
235
+ DDD GGG
236
+ BBB EEE
237
+ PPP CCC
238
+ LLLL HHH
239
+ III FFF
240
+ BBB OOO
241
+ BBB JJJ
242
+ LLLL CCCC
243
+ GGG JJJ
244
+ GGGGGGGG GGGGGGGG
245
+ LLL OOO
246
+ NNN OOO
247
+ AAA CCC
248
+ LLLL BBBB
249
+ DDD CCC
250
+ MMM HHH
251
+ IIIIIIII IIIIIIII
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d4bb48c5ddd03422653feb3416689f32d5d19597e7f57863f02c9595c4a66f6
3
+ size 344250205
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer.json ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 20,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": null,
10
+ "added_tokens": [
11
+ {
12
+ "id": 0,
13
+ "content": "<[|endoftext|>]",
14
+ "single_word": false,
15
+ "lstrip": false,
16
+ "rstrip": false,
17
+ "normalized": false,
18
+ "special": true
19
+ },
20
+ {
21
+ "id": 1,
22
+ "content": "...",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 2,
31
+ "content": "\n",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 270,
40
+ "content": "<|endoftext|>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ }
47
+ ],
48
+ "normalizer": null,
49
+ "pre_tokenizer": {
50
+ "type": "ByteLevel",
51
+ "add_prefix_space": false,
52
+ "trim_offsets": true,
53
+ "use_regex": true
54
+ },
55
+ "post_processor": {
56
+ "type": "ByteLevel",
57
+ "add_prefix_space": true,
58
+ "trim_offsets": false,
59
+ "use_regex": true
60
+ },
61
+ "decoder": {
62
+ "type": "ByteLevel",
63
+ "add_prefix_space": true,
64
+ "trim_offsets": true,
65
+ "use_regex": true
66
+ },
67
+ "model": {
68
+ "type": "BPE",
69
+ "dropout": null,
70
+ "unk_token": null,
71
+ "continuing_subword_prefix": null,
72
+ "end_of_word_suffix": null,
73
+ "fuse_unk": false,
74
+ "byte_fallback": false,
75
+ "vocab": {
76
+ "<[|endoftext|>]": 0,
77
+ "...": 1,
78
+ "\n": 2,
79
+ "A": 3,
80
+ "B": 4,
81
+ "C": 5,
82
+ "D": 6,
83
+ "E": 7,
84
+ "F": 8,
85
+ "G": 9,
86
+ "H": 10,
87
+ "I": 11,
88
+ "J": 12,
89
+ "K": 13,
90
+ "L": 14,
91
+ "M": 15,
92
+ "N": 16,
93
+ "O": 17,
94
+ "P": 18,
95
+ "Ċ": 19,
96
+ "LL": 20,
97
+ "HH": 21,
98
+ "AA": 22,
99
+ "BB": 23,
100
+ "FF": 24,
101
+ "KK": 25,
102
+ "PP": 26,
103
+ "GG": 27,
104
+ "NN": 28,
105
+ "OO": 29,
106
+ "MM": 30,
107
+ "CC": 31,
108
+ "EE": 32,
109
+ "II": 33,
110
+ "JJ": 34,
111
+ "DD": 35,
112
+ "HHHH": 36,
113
+ "LLLL": 37,
114
+ "AAAA": 38,
115
+ "FFFF": 39,
116
+ "BBBB": 40,
117
+ "KKKK": 41,
118
+ "PPPP": 42,
119
+ "NNNN": 43,
120
+ "OOOO": 44,
121
+ "MMMM": 45,
122
+ "GGGG": 46,
123
+ "EEEE": 47,
124
+ "CCCC": 48,
125
+ "IIII": 49,
126
+ "JJJJ": 50,
127
+ "DDDD": 51,
128
+ "LLL": 52,
129
+ "BBB": 53,
130
+ "HHH": 54,
131
+ "AAA": 55,
132
+ "FFF": 56,
133
+ "GGG": 57,
134
+ "KKK": 58,
135
+ "CCC": 59,
136
+ "MMM": 60,
137
+ "NNN": 61,
138
+ "PPP": 62,
139
+ "OOO": 63,
140
+ "EEE": 64,
141
+ "III": 65,
142
+ "JJJ": 66,
143
+ "DDD": 67,
144
+ "HHHHHHHH": 68,
145
+ "FFFFFFFF": 69,
146
+ "AAAAAAAA": 70,
147
+ "BBBBB": 71,
148
+ "LLLLLLLL": 72,
149
+ "KKKKKKKK": 73,
150
+ "NNNNNNNN": 74,
151
+ "PPPPP": 75,
152
+ "GGGGG": 76,
153
+ "CCCCC": 77,
154
+ "MMMMMMMM": 78,
155
+ "LLLLL": 79,
156
+ "OOOOO": 80,
157
+ "HHHHH": 81,
158
+ "EEEEE": 82,
159
+ "AAAAA": 83,
160
+ "IIIII": 84,
161
+ "DDDDD": 85,
162
+ "FFFFF": 86,
163
+ "JJJJJ": 87,
164
+ "KKKKK": 88,
165
+ "BBBBBBBB": 89,
166
+ "NNNNN": 90,
167
+ "MMMMM": 91,
168
+ "OOOOOOOO": 92,
169
+ "PPPPPPPP": 93,
170
+ "EEEEEEEE": 94,
171
+ "JJJJJJJJ": 95,
172
+ "IIIIIIII": 96,
173
+ "GGGGGGGG": 97,
174
+ "CCCCCCCC": 98,
175
+ "LLLLLL": 99,
176
+ "DDDDDDDD": 100,
177
+ "AAAAAA": 101,
178
+ "HHHHHH": 102,
179
+ "BBBBBB": 103,
180
+ "FFFFFF": 104,
181
+ "KKKKKK": 105,
182
+ "PPPPPP": 106,
183
+ "GGGGGG": 107,
184
+ "NNNNNN": 108,
185
+ "EEEEEE": 109,
186
+ "DDDDDD": 110,
187
+ "OOOOOO": 111,
188
+ "CCCCCC": 112,
189
+ "MMMMMM": 113,
190
+ "JJJJJJ": 114,
191
+ "IIIIII": 115,
192
+ "LLLLLLL": 116,
193
+ "AAAAAAA": 117,
194
+ "HHHHHHH": 118,
195
+ "BBBBBBB": 119,
196
+ "FFFFFFF": 120,
197
+ "KKKKKKK": 121,
198
+ "PPPPPPP": 122,
199
+ "MMMMMMM": 123,
200
+ "OOOOOOO": 124,
201
+ "GGGGGGG": 125,
202
+ "CCCCCCC": 126,
203
+ "NNNNNNN": 127,
204
+ "EEEEEEE": 128,
205
+ "IIIIIII": 129,
206
+ "JJJJJJJ": 130,
207
+ "DDDDDDD": 131,
208
+ "FFFFFFFFF": 132,
209
+ "AAAAAAAAA": 133,
210
+ "HHHHHHHHH": 134,
211
+ "BBBBBBBBB": 135,
212
+ "LLLLLLLLL": 136,
213
+ "KKKKKKKKK": 137,
214
+ "OOOOOOOOO": 138,
215
+ "NNNNNNNNN": 139,
216
+ "PPPPPPPPP": 140,
217
+ "GGGGGGGGG": 141,
218
+ "CCCCCCCCC": 142,
219
+ "MMMMMMMMM": 143,
220
+ "IIIIIIIII": 144,
221
+ "DDDDDDDDD": 145,
222
+ "EEEEEEEEE": 146,
223
+ "JJJJJJJJJ": 147,
224
+ "HHHHHHHHHH": 148,
225
+ "FFFFFFFFFF": 149,
226
+ "AAAAAAAAAA": 150,
227
+ "LLLLLLLLLL": 151,
228
+ "KKKKKKKKKK": 152,
229
+ "BBBBBBBBBB": 153,
230
+ "PPPPPPPPPP": 154,
231
+ "OOOOOOOOOO": 155,
232
+ "HHHHHHHHHHH": 156,
233
+ "NNNNNNNNNN": 157,
234
+ "EEEEEEEEEE": 158,
235
+ "MMMMMMMMMM": 159,
236
+ "FFFFFFFFFFF": 160,
237
+ "JJJJJJJJJJ": 161,
238
+ "DDDDDDDDDD": 162,
239
+ "CCCCCCCCCC": 163,
240
+ "IIIIIIIIII": 164,
241
+ "GGGGGGGGGG": 165,
242
+ "KKKKKKKKKKK": 166,
243
+ "AAAAAAAAAAA": 167,
244
+ "LLLLLLLLLLL": 168,
245
+ "BBBBBBBBBBB": 169,
246
+ "FFFFFFFFFFFFFFFF": 170,
247
+ "NNNNNNNNNNN": 171,
248
+ "MMMMMMMMMMM": 172,
249
+ "OOOOOOOOOOO": 173,
250
+ "PPPPPPPPPPP": 174,
251
+ "HHHHHHHHHHHHHHHH": 175,
252
+ "EEEEEEEEEEE": 176,
253
+ "JJJJJJJJJJJ": 177,
254
+ "IIIIIIIIIII": 178,
255
+ "AAAAAAAAAAAAAAAA": 179,
256
+ "CCCCCCCCCCC": 180,
257
+ "GGGGGGGGGGG": 181,
258
+ "LLLLLLLLLLLLLLLL": 182,
259
+ "DDDDDDDDDDD": 183,
260
+ "HHHHHHHHHHHH": 184,
261
+ "KKKKKKKKKKKKKKKK": 185,
262
+ "AAAAAAAAAAAA": 186,
263
+ "LLLMMM": 187,
264
+ "MMMMMMMMMMMMMMMM": 188,
265
+ "NNNNNNNNNNNNNNNN": 189,
266
+ "BBBBBBBBBBBBBBBB": 190,
267
+ "LLLHHH": 191,
268
+ "FFFFFFFFFFFF": 192,
269
+ "JJJJJJJJJJJJJJJJ": 193,
270
+ "LLLCCC": 194,
271
+ "LLLLLLLLLLLL": 195,
272
+ "LLLBBB": 196,
273
+ "KKKKKKKKKKKK": 197,
274
+ "IIIIIIIIIIII": 198,
275
+ "PPPPPPPPPPPP": 199,
276
+ "BBBHHH": 200,
277
+ "OOOOOOOOOOOOOOOO": 201,
278
+ "BBBBBBBBBBBB": 202,
279
+ "EEEEEEEEEEEE": 203,
280
+ "AAAOOO": 204,
281
+ "NNNNNNNNNNNN": 205,
282
+ "DDDDDDDDDDDD": 206,
283
+ "MMMMMMMMMMMM": 207,
284
+ "CCCCCCCCCCCC": 208,
285
+ "MMMCCC": 209,
286
+ "GGGGGGGGGGGG": 210,
287
+ "BBBLLL": 211,
288
+ "FFFFFFFFFFFFF": 212,
289
+ "BBBCCC": 213,
290
+ "BBBMMM": 214,
291
+ "DDDKKK": 215,
292
+ "BBBGGG": 216,
293
+ "LLLGGG": 217,
294
+ "OOOOOOOOOOOO": 218,
295
+ "HHHCCC": 219,
296
+ "FFFKKK": 220,
297
+ "HHHLLL": 221,
298
+ "AAAKKK": 222,
299
+ "EEEGGG": 223,
300
+ "MMMLLL": 224,
301
+ "BBBFFF": 225,
302
+ "AAAPPP": 226,
303
+ "AAAAAAAAAAAAA": 227,
304
+ "HHHHHHHHHHHHH": 228,
305
+ "AAAHHH": 229,
306
+ "JJJJJJJJJJJJ": 230,
307
+ "PPPPPPPPPPPPPPPP": 231,
308
+ "AAAFFF": 232,
309
+ "LLLKKK": 233,
310
+ "AAAGGG": 234,
311
+ "EEEKKK": 235,
312
+ "PPPFFF": 236,
313
+ "IIICCC": 237,
314
+ "EEEEEEEEEEEEEEEE": 238,
315
+ "LLLLCCC": 239,
316
+ "KKKKKKKKKKKKK": 240,
317
+ "NNNHHH": 241,
318
+ "NNNGGG": 242,
319
+ "LLLLLLLLLLLLL": 243,
320
+ "MMMMMMMMMMMMM": 244,
321
+ "BBBKKK": 245,
322
+ "AAAJJJ": 246,
323
+ "BBBBBBBBBBBBB": 247,
324
+ "BBBPPP": 248,
325
+ "HHHJJJ": 249,
326
+ "NNNKKK": 250,
327
+ "IIIGGG": 251,
328
+ "LLLLHHHH": 252,
329
+ "DDDGGG": 253,
330
+ "BBBEEE": 254,
331
+ "PPPCCC": 255,
332
+ "LLLLHHH": 256,
333
+ "IIIFFF": 257,
334
+ "BBBOOO": 258,
335
+ "BBBJJJ": 259,
336
+ "LLLLCCCC": 260,
337
+ "GGGJJJ": 261,
338
+ "GGGGGGGGGGGGGGGG": 262,
339
+ "LLLOOO": 263,
340
+ "NNNOOO": 264,
341
+ "AAACCC": 265,
342
+ "LLLLBBBB": 266,
343
+ "DDDCCC": 267,
344
+ "MMMHHH": 268,
345
+ "IIIIIIIIIIIIIIII": 269
346
+ },
347
+ "merges": [
348
+ "L L",
349
+ "H H",
350
+ "A A",
351
+ "B B",
352
+ "F F",
353
+ "K K",
354
+ "P P",
355
+ "G G",
356
+ "N N",
357
+ "O O",
358
+ "M M",
359
+ "C C",
360
+ "E E",
361
+ "I I",
362
+ "J J",
363
+ "D D",
364
+ "HH HH",
365
+ "LL LL",
366
+ "AA AA",
367
+ "FF FF",
368
+ "BB BB",
369
+ "KK KK",
370
+ "PP PP",
371
+ "NN NN",
372
+ "OO OO",
373
+ "MM MM",
374
+ "GG GG",
375
+ "EE EE",
376
+ "CC CC",
377
+ "II II",
378
+ "JJ JJ",
379
+ "DD DD",
380
+ "LL L",
381
+ "BB B",
382
+ "HH H",
383
+ "AA A",
384
+ "FF F",
385
+ "GG G",
386
+ "KK K",
387
+ "CC C",
388
+ "MM M",
389
+ "NN N",
390
+ "PP P",
391
+ "OO O",
392
+ "EE E",
393
+ "II I",
394
+ "JJ J",
395
+ "DD D",
396
+ "HHHH HHHH",
397
+ "FFFF FFFF",
398
+ "AAAA AAAA",
399
+ "BBBB B",
400
+ "LLLL LLLL",
401
+ "KKKK KKKK",
402
+ "NNNN NNNN",
403
+ "PPPP P",
404
+ "GGGG G",
405
+ "CCCC C",
406
+ "MMMM MMMM",
407
+ "LLLL L",
408
+ "OOOO O",
409
+ "HHHH H",
410
+ "EEEE E",
411
+ "AAAA A",
412
+ "IIII I",
413
+ "DDDD D",
414
+ "FFFF F",
415
+ "JJJJ J",
416
+ "KKKK K",
417
+ "BBBB BBBB",
418
+ "NNNN N",
419
+ "MMMM M",
420
+ "OOOO OOOO",
421
+ "PPPP PPPP",
422
+ "EEEE EEEE",
423
+ "JJJJ JJJJ",
424
+ "IIII IIII",
425
+ "GGGG GGGG",
426
+ "CCCC CCCC",
427
+ "LLLL LL",
428
+ "DDDD DDDD",
429
+ "AAAA AA",
430
+ "HHHH HH",
431
+ "BBBB BB",
432
+ "FFFF FF",
433
+ "KKKK KK",
434
+ "PPPP PP",
435
+ "GGGG GG",
436
+ "NNNN NN",
437
+ "EEEE EE",
438
+ "DDDD DD",
439
+ "OOOO OO",
440
+ "CCCC CC",
441
+ "MMMM MM",
442
+ "JJJJ JJ",
443
+ "IIII II",
444
+ "LLLL LLL",
445
+ "AAAA AAA",
446
+ "HHHH HHH",
447
+ "BBBB BBB",
448
+ "FFFF FFF",
449
+ "KKKK KKK",
450
+ "PPPP PPP",
451
+ "MMMM MMM",
452
+ "OOOO OOO",
453
+ "GGGG GGG",
454
+ "CCCC CCC",
455
+ "NNNN NNN",
456
+ "EEEE EEE",
457
+ "IIII III",
458
+ "JJJJ JJJ",
459
+ "DDDD DDD",
460
+ "FFFFFFFF F",
461
+ "AAAAAAAA A",
462
+ "HHHHHHHH H",
463
+ "BBBB BBBBB",
464
+ "LLLLLLLL L",
465
+ "KKKKKKKK K",
466
+ "OOOO OOOOO",
467
+ "NNNNNNNN N",
468
+ "PPPP PPPPP",
469
+ "GGGG GGGGG",
470
+ "CCCC CCCCC",
471
+ "MMMMMMMM M",
472
+ "IIII IIIII",
473
+ "DDDD DDDDD",
474
+ "EEEE EEEEE",
475
+ "JJJJ JJJJJ",
476
+ "HHHHHHHH HH",
477
+ "FFFFFFFF FF",
478
+ "AAAAAAAA AA",
479
+ "LLLLLLLL LL",
480
+ "KKKKKKKK KK",
481
+ "BBBBBBBB BB",
482
+ "PPPPPPPP PP",
483
+ "OOOOOOOO OO",
484
+ "HHHHHHHH HHH",
485
+ "NNNNNNNN NN",
486
+ "EEEEEEEE EE",
487
+ "MMMMMMMM MM",
488
+ "FFFFFFFF FFF",
489
+ "JJJJJJJJ JJ",
490
+ "DDDDDDDD DD",
491
+ "CCCCCCCC CC",
492
+ "IIIIIIII II",
493
+ "GGGGGGGG GG",
494
+ "KKKKKKKK KKK",
495
+ "AAAAAAAA AAA",
496
+ "LLLLLLLL LLL",
497
+ "BBBBBBBB BBB",
498
+ "FFFFFFFF FFFFFFFF",
499
+ "NNNNNNNN NNN",
500
+ "MMMMMMMM MMM",
501
+ "OOOOOOOO OOO",
502
+ "PPPPPPPP PPP",
503
+ "HHHHHHHH HHHHHHHH",
504
+ "EEEEEEEE EEE",
505
+ "JJJJJJJJ JJJ",
506
+ "IIIIIIII III",
507
+ "AAAAAAAA AAAAAAAA",
508
+ "CCCCCCCC CCC",
509
+ "GGGGGGGG GGG",
510
+ "LLLLLLLL LLLLLLLL",
511
+ "DDDDDDDD DDD",
512
+ "HHHHHHHH HHHH",
513
+ "KKKKKKKK KKKKKKKK",
514
+ "AAAAAAAA AAAA",
515
+ "LLL MMM",
516
+ "MMMMMMMM MMMMMMMM",
517
+ "NNNNNNNN NNNNNNNN",
518
+ "BBBBBBBB BBBBBBBB",
519
+ "LLL HHH",
520
+ "FFFFFFFF FFFF",
521
+ "JJJJJJJJ JJJJJJJJ",
522
+ "LLL CCC",
523
+ "LLLLLLLL LLLL",
524
+ "LLL BBB",
525
+ "KKKKKKKK KKKK",
526
+ "IIIIIIII IIII",
527
+ "PPPPPPPP PPPP",
528
+ "BBB HHH",
529
+ "OOOOOOOO OOOOOOOO",
530
+ "BBBBBBBB BBBB",
531
+ "EEEEEEEE EEEE",
532
+ "AAA OOO",
533
+ "NNNNNNNN NNNN",
534
+ "DDDDDDDD DDDD",
535
+ "MMMMMMMM MMMM",
536
+ "CCCCCCCC CCCC",
537
+ "MMM CCC",
538
+ "GGGGGGGG GGGG",
539
+ "BBB LLL",
540
+ "FFFFFFFF FFFFF",
541
+ "BBB CCC",
542
+ "BBB MMM",
543
+ "DDD KKK",
544
+ "BBB GGG",
545
+ "LLL GGG",
546
+ "OOOOOOOO OOOO",
547
+ "HHH CCC",
548
+ "FFF KKK",
549
+ "HHH LLL",
550
+ "AAA KKK",
551
+ "EEE GGG",
552
+ "MMM LLL",
553
+ "BBB FFF",
554
+ "AAA PPP",
555
+ "AAAAAAAA AAAAA",
556
+ "HHHHHHHH HHHHH",
557
+ "AAA HHH",
558
+ "JJJJJJJJ JJJJ",
559
+ "PPPPPPPP PPPPPPPP",
560
+ "AAA FFF",
561
+ "LLL KKK",
562
+ "AAA GGG",
563
+ "EEE KKK",
564
+ "PPP FFF",
565
+ "III CCC",
566
+ "EEEEEEEE EEEEEEEE",
567
+ "LLLL CCC",
568
+ "KKKKKKKK KKKKK",
569
+ "NNN HHH",
570
+ "NNN GGG",
571
+ "LLLLLLLL LLLLL",
572
+ "MMMMMMMM MMMMM",
573
+ "BBB KKK",
574
+ "AAA JJJ",
575
+ "BBBBBBBB BBBBB",
576
+ "BBB PPP",
577
+ "HHH JJJ",
578
+ "NNN KKK",
579
+ "III GGG",
580
+ "LLLL HHHH",
581
+ "DDD GGG",
582
+ "BBB EEE",
583
+ "PPP CCC",
584
+ "LLLL HHH",
585
+ "III FFF",
586
+ "BBB OOO",
587
+ "BBB JJJ",
588
+ "LLLL CCCC",
589
+ "GGG JJJ",
590
+ "GGGGGGGG GGGGGGGG",
591
+ "LLL OOO",
592
+ "NNN OOO",
593
+ "AAA CCC",
594
+ "LLLL BBBB",
595
+ "DDD CCC",
596
+ "MMM HHH",
597
+ "IIIIIIII IIIIIIII"
598
+ ]
599
+ }
600
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 1024,
7
+ "tokenizer_class": "GPT2Tokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b012f47000eab94a4ee1c6d0c3db1587a011efe49804560b789ac58864ad25
3
+ size 3899
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<[|endoftext|>]":0,"...":1,"\n":2,"A":3,"B":4,"C":5,"D":6,"E":7,"F":8,"G":9,"H":10,"I":11,"J":12,"K":13,"L":14,"M":15,"N":16,"O":17,"P":18,"Ċ":19,"LL":20,"HH":21,"AA":22,"BB":23,"FF":24,"KK":25,"PP":26,"GG":27,"NN":28,"OO":29,"MM":30,"CC":31,"EE":32,"II":33,"JJ":34,"DD":35,"HHHH":36,"LLLL":37,"AAAA":38,"FFFF":39,"BBBB":40,"KKKK":41,"PPPP":42,"NNNN":43,"OOOO":44,"MMMM":45,"GGGG":46,"EEEE":47,"CCCC":48,"IIII":49,"JJJJ":50,"DDDD":51,"LLL":52,"BBB":53,"HHH":54,"AAA":55,"FFF":56,"GGG":57,"KKK":58,"CCC":59,"MMM":60,"NNN":61,"PPP":62,"OOO":63,"EEE":64,"III":65,"JJJ":66,"DDD":67,"HHHHHHHH":68,"FFFFFFFF":69,"AAAAAAAA":70,"BBBBB":71,"LLLLLLLL":72,"KKKKKKKK":73,"NNNNNNNN":74,"PPPPP":75,"GGGGG":76,"CCCCC":77,"MMMMMMMM":78,"LLLLL":79,"OOOOO":80,"HHHHH":81,"EEEEE":82,"AAAAA":83,"IIIII":84,"DDDDD":85,"FFFFF":86,"JJJJJ":87,"KKKKK":88,"BBBBBBBB":89,"NNNNN":90,"MMMMM":91,"OOOOOOOO":92,"PPPPPPPP":93,"EEEEEEEE":94,"JJJJJJJJ":95,"IIIIIIII":96,"GGGGGGGG":97,"CCCCCCCC":98,"LLLLLL":99,"DDDDDDDD":100,"AAAAAA":101,"HHHHHH":102,"BBBBBB":103,"FFFFFF":104,"KKKKKK":105,"PPPPPP":106,"GGGGGG":107,"NNNNNN":108,"EEEEEE":109,"DDDDDD":110,"OOOOOO":111,"CCCCCC":112,"MMMMMM":113,"JJJJJJ":114,"IIIIII":115,"LLLLLLL":116,"AAAAAAA":117,"HHHHHHH":118,"BBBBBBB":119,"FFFFFFF":120,"KKKKKKK":121,"PPPPPPP":122,"MMMMMMM":123,"OOOOOOO":124,"GGGGGGG":125,"CCCCCCC":126,"NNNNNNN":127,"EEEEEEE":128,"IIIIIII":129,"JJJJJJJ":130,"DDDDDDD":131,"FFFFFFFFF":132,"AAAAAAAAA":133,"HHHHHHHHH":134,"BBBBBBBBB":135,"LLLLLLLLL":136,"KKKKKKKKK":137,"OOOOOOOOO":138,"NNNNNNNNN":139,"PPPPPPPPP":140,"GGGGGGGGG":141,"CCCCCCCCC":142,"MMMMMMMMM":143,"IIIIIIIII":144,"DDDDDDDDD":145,"EEEEEEEEE":146,"JJJJJJJJJ":147,"HHHHHHHHHH":148,"FFFFFFFFFF":149,"AAAAAAAAAA":150,"LLLLLLLLLL":151,"KKKKKKKKKK":152,"BBBBBBBBBB":153,"PPPPPPPPPP":154,"OOOOOOOOOO":155,"HHHHHHHHHHH":156,"NNNNNNNNNN":157,"EEEEEEEEEE":158,"MMMMMMMMMM":159,"FFFFFFFFFFF":160,"JJJJJJJJJJ":161,"DDDDDDDDDD":162,"CCCCCCCCCC":163,"IIIIIIIIII":164,"GGGGGGGGGG":165,"KKKKKKKKKKK":166,"AAAAAAAAAAA":167,"LLLLLLLLLLL":168,"BBBBBBBBBBB":169,"FFFFFFFFFFFFFFFF":170,"NNNNNNNNNNN":171,"MMMMMMMMMMM":172,"OOOOOOOOOOO":173,"PPPPPPPPPPP":174,"HHHHHHHHHHHHHHHH":175,"EEEEEEEEEEE":176,"JJJJJJJJJJJ":177,"IIIIIIIIIII":178,"AAAAAAAAAAAAAAAA":179,"CCCCCCCCCCC":180,"GGGGGGGGGGG":181,"LLLLLLLLLLLLLLLL":182,"DDDDDDDDDDD":183,"HHHHHHHHHHHH":184,"KKKKKKKKKKKKKKKK":185,"AAAAAAAAAAAA":186,"LLLMMM":187,"MMMMMMMMMMMMMMMM":188,"NNNNNNNNNNNNNNNN":189,"BBBBBBBBBBBBBBBB":190,"LLLHHH":191,"FFFFFFFFFFFF":192,"JJJJJJJJJJJJJJJJ":193,"LLLCCC":194,"LLLLLLLLLLLL":195,"LLLBBB":196,"KKKKKKKKKKKK":197,"IIIIIIIIIIII":198,"PPPPPPPPPPPP":199,"BBBHHH":200,"OOOOOOOOOOOOOOOO":201,"BBBBBBBBBBBB":202,"EEEEEEEEEEEE":203,"AAAOOO":204,"NNNNNNNNNNNN":205,"DDDDDDDDDDDD":206,"MMMMMMMMMMMM":207,"CCCCCCCCCCCC":208,"MMMCCC":209,"GGGGGGGGGGGG":210,"BBBLLL":211,"FFFFFFFFFFFFF":212,"BBBCCC":213,"BBBMMM":214,"DDDKKK":215,"BBBGGG":216,"LLLGGG":217,"OOOOOOOOOOOO":218,"HHHCCC":219,"FFFKKK":220,"HHHLLL":221,"AAAKKK":222,"EEEGGG":223,"MMMLLL":224,"BBBFFF":225,"AAAPPP":226,"AAAAAAAAAAAAA":227,"HHHHHHHHHHHHH":228,"AAAHHH":229,"JJJJJJJJJJJJ":230,"PPPPPPPPPPPPPPPP":231,"AAAFFF":232,"LLLKKK":233,"AAAGGG":234,"EEEKKK":235,"PPPFFF":236,"IIICCC":237,"EEEEEEEEEEEEEEEE":238,"LLLLCCC":239,"KKKKKKKKKKKKK":240,"NNNHHH":241,"NNNGGG":242,"LLLLLLLLLLLLL":243,"MMMMMMMMMMMMM":244,"BBBKKK":245,"AAAJJJ":246,"BBBBBBBBBBBBB":247,"BBBPPP":248,"HHHJJJ":249,"NNNKKK":250,"IIIGGG":251,"LLLLHHHH":252,"DDDGGG":253,"BBBEEE":254,"PPPCCC":255,"LLLLHHH":256,"IIIFFF":257,"BBBOOO":258,"BBBJJJ":259,"LLLLCCCC":260,"GGGJJJ":261,"GGGGGGGGGGGGGGGG":262,"LLLOOO":263,"NNNOOO":264,"AAACCC":265,"LLLLBBBB":266,"DDDCCC":267,"MMMHHH":268,"IIIIIIIIIIIIIIII":269}