echarlaix HF staff commited on
Commit
096fd74
1 Parent(s): a165fab

commit files to HF hub

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9140625,
4
+ "eval_loss": 0.4163683354854584,
5
+ "eval_runtime": 7.4147,
6
+ "eval_samples_per_second": 34.526,
7
+ "eval_steps_per_second": 4.316
8
+ }
best_configure.yaml ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ advance: null
2
+ approach: post_training_dynamic_quant
3
+ bf16_ops_list: []
4
+ calib_iteration: 1
5
+ calib_sampling_size: 100
6
+ framework: pytorch
7
+ op:
8
+ ? !!python/tuple
9
+ - distilbert.transformer.layer.0.attention.q_lin
10
+ - Linear
11
+ : weight:
12
+ dtype: int8
13
+ scheme: sym
14
+ granularity: per_channel
15
+ algorithm: minmax
16
+ bit: 7.0
17
+ activation:
18
+ dtype: uint8
19
+ scheme: asym
20
+ granularity: per_tensor
21
+ algorithm: minmax
22
+ ? !!python/tuple
23
+ - distilbert.transformer.layer.0.attention.k_lin
24
+ - Linear
25
+ : weight:
26
+ dtype: int8
27
+ scheme: sym
28
+ granularity: per_channel
29
+ algorithm: minmax
30
+ bit: 7.0
31
+ activation:
32
+ dtype: uint8
33
+ scheme: asym
34
+ granularity: per_tensor
35
+ algorithm: minmax
36
+ ? !!python/tuple
37
+ - distilbert.transformer.layer.0.attention.v_lin
38
+ - Linear
39
+ : weight:
40
+ dtype: int8
41
+ scheme: sym
42
+ granularity: per_channel
43
+ algorithm: minmax
44
+ bit: 7.0
45
+ activation:
46
+ dtype: uint8
47
+ scheme: asym
48
+ granularity: per_tensor
49
+ algorithm: minmax
50
+ ? !!python/tuple
51
+ - distilbert.transformer.layer.0.attention.out_lin
52
+ - Linear
53
+ : weight:
54
+ dtype: int8
55
+ scheme: sym
56
+ granularity: per_channel
57
+ algorithm: minmax
58
+ bit: 7.0
59
+ activation:
60
+ dtype: uint8
61
+ scheme: asym
62
+ granularity: per_tensor
63
+ algorithm: minmax
64
+ ? !!python/tuple
65
+ - distilbert.transformer.layer.0.ffn.lin1
66
+ - Linear
67
+ : weight:
68
+ dtype: int8
69
+ scheme: sym
70
+ granularity: per_channel
71
+ algorithm: minmax
72
+ bit: 7.0
73
+ activation:
74
+ dtype: uint8
75
+ scheme: asym
76
+ granularity: per_tensor
77
+ algorithm: minmax
78
+ ? !!python/tuple
79
+ - distilbert.transformer.layer.0.ffn.lin2
80
+ - Linear
81
+ : weight:
82
+ dtype: int8
83
+ scheme: sym
84
+ granularity: per_channel
85
+ algorithm: minmax
86
+ bit: 7.0
87
+ activation:
88
+ dtype: uint8
89
+ scheme: asym
90
+ granularity: per_tensor
91
+ algorithm: minmax
92
+ ? !!python/tuple
93
+ - distilbert.transformer.layer.1.attention.q_lin
94
+ - Linear
95
+ : weight:
96
+ dtype: int8
97
+ scheme: sym
98
+ granularity: per_channel
99
+ algorithm: minmax
100
+ bit: 7.0
101
+ activation:
102
+ dtype: uint8
103
+ scheme: asym
104
+ granularity: per_tensor
105
+ algorithm: minmax
106
+ ? !!python/tuple
107
+ - distilbert.transformer.layer.1.attention.k_lin
108
+ - Linear
109
+ : weight:
110
+ dtype: int8
111
+ scheme: sym
112
+ granularity: per_channel
113
+ algorithm: minmax
114
+ bit: 7.0
115
+ activation:
116
+ dtype: uint8
117
+ scheme: asym
118
+ granularity: per_tensor
119
+ algorithm: minmax
120
+ ? !!python/tuple
121
+ - distilbert.transformer.layer.1.attention.v_lin
122
+ - Linear
123
+ : weight:
124
+ dtype: int8
125
+ scheme: sym
126
+ granularity: per_channel
127
+ algorithm: minmax
128
+ bit: 7.0
129
+ activation:
130
+ dtype: uint8
131
+ scheme: asym
132
+ granularity: per_tensor
133
+ algorithm: minmax
134
+ ? !!python/tuple
135
+ - distilbert.transformer.layer.1.attention.out_lin
136
+ - Linear
137
+ : weight:
138
+ dtype: int8
139
+ scheme: sym
140
+ granularity: per_channel
141
+ algorithm: minmax
142
+ bit: 7.0
143
+ activation:
144
+ dtype: uint8
145
+ scheme: asym
146
+ granularity: per_tensor
147
+ algorithm: minmax
148
+ ? !!python/tuple
149
+ - distilbert.transformer.layer.1.ffn.lin1
150
+ - Linear
151
+ : weight:
152
+ dtype: int8
153
+ scheme: sym
154
+ granularity: per_channel
155
+ algorithm: minmax
156
+ bit: 7.0
157
+ activation:
158
+ dtype: uint8
159
+ scheme: asym
160
+ granularity: per_tensor
161
+ algorithm: minmax
162
+ ? !!python/tuple
163
+ - distilbert.transformer.layer.1.ffn.lin2
164
+ - Linear
165
+ : weight:
166
+ dtype: int8
167
+ scheme: sym
168
+ granularity: per_channel
169
+ algorithm: minmax
170
+ bit: 7.0
171
+ activation:
172
+ dtype: uint8
173
+ scheme: asym
174
+ granularity: per_tensor
175
+ algorithm: minmax
176
+ ? !!python/tuple
177
+ - distilbert.transformer.layer.2.attention.q_lin
178
+ - Linear
179
+ : weight:
180
+ dtype: int8
181
+ scheme: sym
182
+ granularity: per_channel
183
+ algorithm: minmax
184
+ bit: 7.0
185
+ activation:
186
+ dtype: uint8
187
+ scheme: asym
188
+ granularity: per_tensor
189
+ algorithm: minmax
190
+ ? !!python/tuple
191
+ - distilbert.transformer.layer.2.attention.k_lin
192
+ - Linear
193
+ : weight:
194
+ dtype: int8
195
+ scheme: sym
196
+ granularity: per_channel
197
+ algorithm: minmax
198
+ bit: 7.0
199
+ activation:
200
+ dtype: uint8
201
+ scheme: asym
202
+ granularity: per_tensor
203
+ algorithm: minmax
204
+ ? !!python/tuple
205
+ - distilbert.transformer.layer.2.attention.v_lin
206
+ - Linear
207
+ : weight:
208
+ dtype: int8
209
+ scheme: sym
210
+ granularity: per_channel
211
+ algorithm: minmax
212
+ bit: 7.0
213
+ activation:
214
+ dtype: uint8
215
+ scheme: asym
216
+ granularity: per_tensor
217
+ algorithm: minmax
218
+ ? !!python/tuple
219
+ - distilbert.transformer.layer.2.attention.out_lin
220
+ - Linear
221
+ : weight:
222
+ dtype: int8
223
+ scheme: sym
224
+ granularity: per_channel
225
+ algorithm: minmax
226
+ bit: 7.0
227
+ activation:
228
+ dtype: uint8
229
+ scheme: asym
230
+ granularity: per_tensor
231
+ algorithm: minmax
232
+ ? !!python/tuple
233
+ - distilbert.transformer.layer.2.ffn.lin1
234
+ - Linear
235
+ : weight:
236
+ dtype: int8
237
+ scheme: sym
238
+ granularity: per_channel
239
+ algorithm: minmax
240
+ bit: 7.0
241
+ activation:
242
+ dtype: uint8
243
+ scheme: asym
244
+ granularity: per_tensor
245
+ algorithm: minmax
246
+ ? !!python/tuple
247
+ - distilbert.transformer.layer.2.ffn.lin2
248
+ - Linear
249
+ : weight:
250
+ dtype: int8
251
+ scheme: sym
252
+ granularity: per_channel
253
+ algorithm: minmax
254
+ bit: 7.0
255
+ activation:
256
+ dtype: uint8
257
+ scheme: asym
258
+ granularity: per_tensor
259
+ algorithm: minmax
260
+ ? !!python/tuple
261
+ - distilbert.transformer.layer.3.attention.q_lin
262
+ - Linear
263
+ : weight:
264
+ dtype: int8
265
+ scheme: sym
266
+ granularity: per_channel
267
+ algorithm: minmax
268
+ bit: 7.0
269
+ activation:
270
+ dtype: uint8
271
+ scheme: asym
272
+ granularity: per_tensor
273
+ algorithm: minmax
274
+ ? !!python/tuple
275
+ - distilbert.transformer.layer.3.attention.k_lin
276
+ - Linear
277
+ : weight:
278
+ dtype: int8
279
+ scheme: sym
280
+ granularity: per_channel
281
+ algorithm: minmax
282
+ bit: 7.0
283
+ activation:
284
+ dtype: uint8
285
+ scheme: asym
286
+ granularity: per_tensor
287
+ algorithm: minmax
288
+ ? !!python/tuple
289
+ - distilbert.transformer.layer.3.attention.v_lin
290
+ - Linear
291
+ : weight:
292
+ dtype: int8
293
+ scheme: sym
294
+ granularity: per_channel
295
+ algorithm: minmax
296
+ bit: 7.0
297
+ activation:
298
+ dtype: uint8
299
+ scheme: asym
300
+ granularity: per_tensor
301
+ algorithm: minmax
302
+ ? !!python/tuple
303
+ - distilbert.transformer.layer.3.attention.out_lin
304
+ - Linear
305
+ : weight:
306
+ dtype: int8
307
+ scheme: sym
308
+ granularity: per_channel
309
+ algorithm: minmax
310
+ bit: 7.0
311
+ activation:
312
+ dtype: uint8
313
+ scheme: asym
314
+ granularity: per_tensor
315
+ algorithm: minmax
316
+ ? !!python/tuple
317
+ - distilbert.transformer.layer.3.ffn.lin1
318
+ - Linear
319
+ : weight:
320
+ dtype: int8
321
+ scheme: sym
322
+ granularity: per_channel
323
+ algorithm: minmax
324
+ bit: 7.0
325
+ activation:
326
+ dtype: uint8
327
+ scheme: asym
328
+ granularity: per_tensor
329
+ algorithm: minmax
330
+ ? !!python/tuple
331
+ - distilbert.transformer.layer.3.ffn.lin2
332
+ - Linear
333
+ : weight:
334
+ dtype: int8
335
+ scheme: sym
336
+ granularity: per_channel
337
+ algorithm: minmax
338
+ bit: 7.0
339
+ activation:
340
+ dtype: uint8
341
+ scheme: asym
342
+ granularity: per_tensor
343
+ algorithm: minmax
344
+ ? !!python/tuple
345
+ - distilbert.transformer.layer.4.attention.q_lin
346
+ - Linear
347
+ : weight:
348
+ dtype: int8
349
+ scheme: sym
350
+ granularity: per_channel
351
+ algorithm: minmax
352
+ bit: 7.0
353
+ activation:
354
+ dtype: uint8
355
+ scheme: asym
356
+ granularity: per_tensor
357
+ algorithm: minmax
358
+ ? !!python/tuple
359
+ - distilbert.transformer.layer.4.attention.k_lin
360
+ - Linear
361
+ : weight:
362
+ dtype: int8
363
+ scheme: sym
364
+ granularity: per_channel
365
+ algorithm: minmax
366
+ bit: 7.0
367
+ activation:
368
+ dtype: uint8
369
+ scheme: asym
370
+ granularity: per_tensor
371
+ algorithm: minmax
372
+ ? !!python/tuple
373
+ - distilbert.transformer.layer.4.attention.v_lin
374
+ - Linear
375
+ : weight:
376
+ dtype: int8
377
+ scheme: sym
378
+ granularity: per_channel
379
+ algorithm: minmax
380
+ bit: 7.0
381
+ activation:
382
+ dtype: uint8
383
+ scheme: asym
384
+ granularity: per_tensor
385
+ algorithm: minmax
386
+ ? !!python/tuple
387
+ - distilbert.transformer.layer.4.attention.out_lin
388
+ - Linear
389
+ : weight:
390
+ dtype: int8
391
+ scheme: sym
392
+ granularity: per_channel
393
+ algorithm: minmax
394
+ bit: 7.0
395
+ activation:
396
+ dtype: uint8
397
+ scheme: asym
398
+ granularity: per_tensor
399
+ algorithm: minmax
400
+ ? !!python/tuple
401
+ - distilbert.transformer.layer.4.ffn.lin1
402
+ - Linear
403
+ : weight:
404
+ dtype: int8
405
+ scheme: sym
406
+ granularity: per_channel
407
+ algorithm: minmax
408
+ bit: 7.0
409
+ activation:
410
+ dtype: uint8
411
+ scheme: asym
412
+ granularity: per_tensor
413
+ algorithm: minmax
414
+ ? !!python/tuple
415
+ - distilbert.transformer.layer.4.ffn.lin2
416
+ - Linear
417
+ : weight:
418
+ dtype: int8
419
+ scheme: sym
420
+ granularity: per_channel
421
+ algorithm: minmax
422
+ bit: 7.0
423
+ activation:
424
+ dtype: uint8
425
+ scheme: asym
426
+ granularity: per_tensor
427
+ algorithm: minmax
428
+ ? !!python/tuple
429
+ - distilbert.transformer.layer.5.attention.q_lin
430
+ - Linear
431
+ : weight:
432
+ dtype: int8
433
+ scheme: sym
434
+ granularity: per_channel
435
+ algorithm: minmax
436
+ bit: 7.0
437
+ activation:
438
+ dtype: uint8
439
+ scheme: asym
440
+ granularity: per_tensor
441
+ algorithm: minmax
442
+ ? !!python/tuple
443
+ - distilbert.transformer.layer.5.attention.k_lin
444
+ - Linear
445
+ : weight:
446
+ dtype: int8
447
+ scheme: sym
448
+ granularity: per_channel
449
+ algorithm: minmax
450
+ bit: 7.0
451
+ activation:
452
+ dtype: uint8
453
+ scheme: asym
454
+ granularity: per_tensor
455
+ algorithm: minmax
456
+ ? !!python/tuple
457
+ - distilbert.transformer.layer.5.attention.v_lin
458
+ - Linear
459
+ : weight:
460
+ dtype: int8
461
+ scheme: sym
462
+ granularity: per_channel
463
+ algorithm: minmax
464
+ bit: 7.0
465
+ activation:
466
+ dtype: uint8
467
+ scheme: asym
468
+ granularity: per_tensor
469
+ algorithm: minmax
470
+ ? !!python/tuple
471
+ - distilbert.transformer.layer.5.attention.out_lin
472
+ - Linear
473
+ : weight:
474
+ dtype: int8
475
+ scheme: sym
476
+ granularity: per_channel
477
+ algorithm: minmax
478
+ bit: 7.0
479
+ activation:
480
+ dtype: uint8
481
+ scheme: asym
482
+ granularity: per_tensor
483
+ algorithm: minmax
484
+ ? !!python/tuple
485
+ - distilbert.transformer.layer.5.ffn.lin1
486
+ - Linear
487
+ : weight:
488
+ dtype: int8
489
+ scheme: sym
490
+ granularity: per_channel
491
+ algorithm: minmax
492
+ bit: 7.0
493
+ activation:
494
+ dtype: uint8
495
+ scheme: asym
496
+ granularity: per_tensor
497
+ algorithm: minmax
498
+ ? !!python/tuple
499
+ - distilbert.transformer.layer.5.ffn.lin2
500
+ - Linear
501
+ : weight:
502
+ dtype: int8
503
+ scheme: sym
504
+ granularity: per_channel
505
+ algorithm: minmax
506
+ bit: 7.0
507
+ activation:
508
+ dtype: uint8
509
+ scheme: asym
510
+ granularity: per_tensor
511
+ algorithm: minmax
512
+ ? !!python/tuple
513
+ - pre_classifier
514
+ - Linear
515
+ : weight:
516
+ dtype: int8
517
+ scheme: sym
518
+ granularity: per_channel
519
+ algorithm: minmax
520
+ bit: 7.0
521
+ activation:
522
+ dtype: uint8
523
+ scheme: asym
524
+ granularity: per_tensor
525
+ algorithm: minmax
526
+ ? !!python/tuple
527
+ - classifier
528
+ - Linear
529
+ : weight:
530
+ dtype: int8
531
+ scheme: sym
532
+ granularity: per_channel
533
+ algorithm: minmax
534
+ bit: 7.0
535
+ activation:
536
+ dtype: uint8
537
+ scheme: asym
538
+ granularity: per_tensor
539
+ algorithm: minmax
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "finetuning_task": "sst-2",
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "NEGATIVE",
14
+ "1": "POSITIVE"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "label2id": {
18
+ "NEGATIVE": 0,
19
+ "POSITIVE": 1
20
+ },
21
+ "max_position_embeddings": 512,
22
+ "model_type": "distilbert",
23
+ "n_heads": 12,
24
+ "n_layers": 6,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "problem_type": "single_label_classification",
28
+ "qa_dropout": 0.1,
29
+ "seq_classif_dropout": 0.2,
30
+ "sinusoidal_pos_embds": false,
31
+ "tie_weights_": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.16.2",
34
+ "vocab_size": 30522
35
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9140625,
4
+ "eval_loss": 0.4163683354854584,
5
+ "eval_runtime": 7.4147,
6
+ "eval_samples_per_second": 34.526,
7
+ "eval_steps_per_second": 4.316
8
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c26bbb1c8fe2e14ebb47bb01a181fbecbc9be9701eebca44aa7aa59af26fa0d
3
+ size 139407425
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased-finetuned-sst-2-english", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "DistilBertTokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517b5da8c631bd38130817f6712c8388a1c2674d4b6c2214a2d0ba5cd8999187
3
+ size 2991
vocab.txt ADDED
The diff for this file is too large to render. See raw diff