Kalslice commited on
Commit
4d264ea
1 Parent(s): 5d8168a

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +420 -0
  2. generation_config.json +6 -0
  3. model.safetensors +3 -0
  4. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2-large",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1280,
16
+ "n_head": 20,
17
+ "n_inner": null,
18
+ "n_layer": 36,
19
+ "n_positions": 1024,
20
+ "output_attentions": true,
21
+ "pruned_heads": {
22
+ "2": [
23
+ 5
24
+ ],
25
+ "5": [
26
+ 4
27
+ ],
28
+ "8": [
29
+ 1,
30
+ 18,
31
+ 19,
32
+ 14
33
+ ],
34
+ "9": [
35
+ 0,
36
+ 18,
37
+ 5,
38
+ 9,
39
+ 10,
40
+ 11
41
+ ],
42
+ "10": [
43
+ 1,
44
+ 18,
45
+ 17,
46
+ 19,
47
+ 15
48
+ ],
49
+ "11": [
50
+ 0,
51
+ 1,
52
+ 18,
53
+ 19,
54
+ 7,
55
+ 12,
56
+ 13,
57
+ 14
58
+ ],
59
+ "12": [
60
+ 1,
61
+ 3,
62
+ 4,
63
+ 5,
64
+ 6,
65
+ 7,
66
+ 9,
67
+ 11,
68
+ 13,
69
+ 14,
70
+ 18
71
+ ],
72
+ "13": [
73
+ 1,
74
+ 4,
75
+ 5,
76
+ 6,
77
+ 7,
78
+ 14,
79
+ 15
80
+ ],
81
+ "14": [
82
+ 19,
83
+ 3,
84
+ 15
85
+ ],
86
+ "15": [
87
+ 1,
88
+ 17,
89
+ 4,
90
+ 12,
91
+ 13
92
+ ],
93
+ "16": [
94
+ 0,
95
+ 2,
96
+ 6,
97
+ 9,
98
+ 15
99
+ ],
100
+ "17": [
101
+ 0,
102
+ 4,
103
+ 5,
104
+ 6,
105
+ 11,
106
+ 12,
107
+ 13,
108
+ 14,
109
+ 16,
110
+ 17,
111
+ 19
112
+ ],
113
+ "18": [
114
+ 2,
115
+ 4,
116
+ 7,
117
+ 9,
118
+ 11,
119
+ 12,
120
+ 16,
121
+ 17,
122
+ 19
123
+ ],
124
+ "19": [
125
+ 1,
126
+ 2,
127
+ 4,
128
+ 6,
129
+ 8,
130
+ 11,
131
+ 12,
132
+ 13,
133
+ 15,
134
+ 17,
135
+ 18
136
+ ],
137
+ "20": [
138
+ 0,
139
+ 1,
140
+ 2,
141
+ 4,
142
+ 6,
143
+ 7,
144
+ 8,
145
+ 10,
146
+ 11,
147
+ 14,
148
+ 15,
149
+ 16,
150
+ 17,
151
+ 18,
152
+ 19
153
+ ],
154
+ "21": [
155
+ 1,
156
+ 2,
157
+ 4,
158
+ 5,
159
+ 8,
160
+ 11,
161
+ 12,
162
+ 14,
163
+ 15,
164
+ 16,
165
+ 17,
166
+ 18,
167
+ 19
168
+ ],
169
+ "22": [
170
+ 0,
171
+ 2,
172
+ 3,
173
+ 4,
174
+ 5,
175
+ 6,
176
+ 7,
177
+ 8,
178
+ 10,
179
+ 12,
180
+ 13,
181
+ 14,
182
+ 16,
183
+ 18,
184
+ 19
185
+ ],
186
+ "23": [
187
+ 0,
188
+ 1,
189
+ 2,
190
+ 3,
191
+ 5,
192
+ 7,
193
+ 8,
194
+ 12,
195
+ 13,
196
+ 14,
197
+ 15
198
+ ],
199
+ "24": [
200
+ 1,
201
+ 5,
202
+ 6,
203
+ 8,
204
+ 10,
205
+ 11,
206
+ 13,
207
+ 14,
208
+ 15,
209
+ 16,
210
+ 18,
211
+ 19
212
+ ],
213
+ "25": [
214
+ 0,
215
+ 1,
216
+ 3,
217
+ 5,
218
+ 6,
219
+ 7,
220
+ 8,
221
+ 9,
222
+ 10,
223
+ 11,
224
+ 13,
225
+ 14,
226
+ 15,
227
+ 16,
228
+ 18,
229
+ 19
230
+ ],
231
+ "26": [
232
+ 0,
233
+ 1,
234
+ 2,
235
+ 3,
236
+ 4,
237
+ 6,
238
+ 8,
239
+ 9,
240
+ 10,
241
+ 12,
242
+ 13,
243
+ 14,
244
+ 15,
245
+ 16,
246
+ 17
247
+ ],
248
+ "27": [
249
+ 0,
250
+ 1,
251
+ 2,
252
+ 4,
253
+ 6,
254
+ 7,
255
+ 9,
256
+ 10,
257
+ 12,
258
+ 13,
259
+ 14,
260
+ 16,
261
+ 17,
262
+ 18
263
+ ],
264
+ "28": [
265
+ 0,
266
+ 1,
267
+ 2,
268
+ 3,
269
+ 4,
270
+ 5,
271
+ 6,
272
+ 7,
273
+ 8,
274
+ 9,
275
+ 10,
276
+ 12,
277
+ 13,
278
+ 14,
279
+ 16,
280
+ 17,
281
+ 18,
282
+ 19
283
+ ],
284
+ "29": [
285
+ 0,
286
+ 1,
287
+ 2,
288
+ 3,
289
+ 5,
290
+ 6,
291
+ 7,
292
+ 8,
293
+ 9,
294
+ 10,
295
+ 11,
296
+ 12,
297
+ 13,
298
+ 14,
299
+ 15,
300
+ 16,
301
+ 17,
302
+ 18,
303
+ 19
304
+ ],
305
+ "30": [
306
+ 0,
307
+ 1,
308
+ 2,
309
+ 3,
310
+ 4,
311
+ 6,
312
+ 7,
313
+ 8,
314
+ 9,
315
+ 10,
316
+ 11,
317
+ 12,
318
+ 13,
319
+ 14,
320
+ 15,
321
+ 16,
322
+ 17,
323
+ 18,
324
+ 19
325
+ ],
326
+ "31": [
327
+ 0,
328
+ 1,
329
+ 2,
330
+ 4,
331
+ 5,
332
+ 6,
333
+ 7,
334
+ 8,
335
+ 9,
336
+ 10,
337
+ 11,
338
+ 12,
339
+ 13,
340
+ 14,
341
+ 15,
342
+ 16,
343
+ 17,
344
+ 18,
345
+ 19
346
+ ],
347
+ "32": [
348
+ 0,
349
+ 6,
350
+ 7,
351
+ 8,
352
+ 9,
353
+ 10,
354
+ 11,
355
+ 12,
356
+ 13,
357
+ 14,
358
+ 15,
359
+ 16,
360
+ 17,
361
+ 19
362
+ ],
363
+ "33": [
364
+ 0,
365
+ 1,
366
+ 2,
367
+ 3,
368
+ 4,
369
+ 5,
370
+ 6,
371
+ 7,
372
+ 9,
373
+ 10,
374
+ 12,
375
+ 13,
376
+ 14,
377
+ 15,
378
+ 17,
379
+ 18,
380
+ 19
381
+ ],
382
+ "34": [
383
+ 0,
384
+ 1,
385
+ 3,
386
+ 5,
387
+ 7,
388
+ 9,
389
+ 12,
390
+ 13,
391
+ 14,
392
+ 16,
393
+ 17,
394
+ 18,
395
+ 19
396
+ ],
397
+ "35": [
398
+ 1
399
+ ]
400
+ },
401
+ "reorder_and_upcast_attn": false,
402
+ "resid_pdrop": 0.1,
403
+ "scale_attn_by_inverse_layer_idx": false,
404
+ "scale_attn_weights": true,
405
+ "summary_activation": null,
406
+ "summary_first_dropout": 0.1,
407
+ "summary_proj_to_labels": true,
408
+ "summary_type": "cls_index",
409
+ "summary_use_proj": true,
410
+ "task_specific_params": {
411
+ "text-generation": {
412
+ "do_sample": true,
413
+ "max_length": 50
414
+ }
415
+ },
416
+ "torch_dtype": "float32",
417
+ "transformers_version": "4.36.2",
418
+ "use_cache": true,
419
+ "vocab_size": 50257
420
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.36.2"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63811eb209e5cddf23510dd24c874a28b6d889f8a3387f5e728a7497335957ce
3
+ size 2679112656
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be2532a4612ca886a9a2e75cc42da8908dc8975b917c63ef6548b223c17acfaf
3
+ size 4283