GeneZC commited on
Commit
fc255e2
1 Parent(s): 3829595

Upload 5 files

Browse files
config.json ADDED
@@ -0,0 +1,706 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-aipnlp/zhangchen76/afo_outputs/sparsebert_hidden_minilm_0.8,0.9S_1/ckpt-last",
3
+ "architectures": [
4
+ "SparseBertHiddenMiniLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "layer_skip": 1,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "num_relation_heads": 32,
21
+ "pad_token_id": 0,
22
+ "position_embedding_type": "absolute",
23
+ "sparsity": "90",
24
+ "sparsity_map": {
25
+ "0": {
26
+ "head": {},
27
+ "hidden": {
28
+ "-1": 0,
29
+ "0": 0,
30
+ "1": 0,
31
+ "10": 0,
32
+ "11": 0,
33
+ "2": 0,
34
+ "3": 0,
35
+ "4": 0,
36
+ "5": 0,
37
+ "6": 0,
38
+ "7": 0,
39
+ "8": 0,
40
+ "9": 0
41
+ },
42
+ "neuron": {}
43
+ },
44
+ "10": {
45
+ "head": {
46
+ "1": 5,
47
+ "11": 1,
48
+ "3": 1
49
+ },
50
+ "hidden": {
51
+ "-1": 38,
52
+ "0": 38,
53
+ "1": 38,
54
+ "10": 38,
55
+ "11": 38,
56
+ "2": 38,
57
+ "3": 38,
58
+ "4": 38,
59
+ "5": 38,
60
+ "6": 38,
61
+ "7": 38,
62
+ "8": 38,
63
+ "9": 38
64
+ },
65
+ "neuron": {
66
+ "0": 190,
67
+ "1": 298,
68
+ "11": 1,
69
+ "2": 1323,
70
+ "3": 4,
71
+ "4": 5,
72
+ "5": 8,
73
+ "6": 4,
74
+ "7": 7,
75
+ "8": 2,
76
+ "9": 1
77
+ }
78
+ },
79
+ "20": {
80
+ "head": {
81
+ "0": 1,
82
+ "1": 7,
83
+ "10": 1,
84
+ "11": 4,
85
+ "3": 2,
86
+ "9": 1
87
+ },
88
+ "hidden": {
89
+ "-1": 84,
90
+ "0": 84,
91
+ "1": 84,
92
+ "10": 84,
93
+ "11": 84,
94
+ "2": 84,
95
+ "3": 84,
96
+ "4": 84,
97
+ "5": 84,
98
+ "6": 84,
99
+ "7": 84,
100
+ "8": 84,
101
+ "9": 84
102
+ },
103
+ "neuron": {
104
+ "0": 597,
105
+ "1": 802,
106
+ "11": 16,
107
+ "2": 2294,
108
+ "3": 48,
109
+ "4": 78,
110
+ "5": 123,
111
+ "6": 22,
112
+ "7": 47,
113
+ "8": 23,
114
+ "9": 5
115
+ }
116
+ },
117
+ "30": {
118
+ "head": {
119
+ "0": 1,
120
+ "1": 8,
121
+ "10": 2,
122
+ "11": 4,
123
+ "2": 1,
124
+ "3": 2,
125
+ "6": 1,
126
+ "7": 1,
127
+ "8": 1,
128
+ "9": 2
129
+ },
130
+ "hidden": {
131
+ "-1": 123,
132
+ "0": 123,
133
+ "1": 123,
134
+ "10": 123,
135
+ "11": 123,
136
+ "2": 123,
137
+ "3": 123,
138
+ "4": 123,
139
+ "5": 123,
140
+ "6": 123,
141
+ "7": 123,
142
+ "8": 123,
143
+ "9": 123
144
+ },
145
+ "neuron": {
146
+ "0": 980,
147
+ "1": 1233,
148
+ "10": 2,
149
+ "11": 116,
150
+ "2": 2512,
151
+ "3": 132,
152
+ "4": 233,
153
+ "5": 386,
154
+ "6": 61,
155
+ "7": 146,
156
+ "8": 73,
157
+ "9": 24
158
+ }
159
+ },
160
+ "40": {
161
+ "head": {
162
+ "0": 3,
163
+ "1": 8,
164
+ "10": 2,
165
+ "11": 5,
166
+ "2": 1,
167
+ "3": 3,
168
+ "4": 2,
169
+ "5": 1,
170
+ "6": 2,
171
+ "7": 1,
172
+ "8": 1,
173
+ "9": 4
174
+ },
175
+ "hidden": {
176
+ "-1": 177,
177
+ "0": 177,
178
+ "1": 177,
179
+ "10": 177,
180
+ "11": 177,
181
+ "2": 177,
182
+ "3": 177,
183
+ "4": 177,
184
+ "5": 177,
185
+ "6": 177,
186
+ "7": 177,
187
+ "8": 177,
188
+ "9": 177
189
+ },
190
+ "neuron": {
191
+ "0": 1436,
192
+ "1": 1664,
193
+ "10": 6,
194
+ "11": 518,
195
+ "2": 2634,
196
+ "3": 306,
197
+ "4": 449,
198
+ "5": 732,
199
+ "6": 138,
200
+ "7": 321,
201
+ "8": 184,
202
+ "9": 91
203
+ }
204
+ },
205
+ "50": {
206
+ "head": {
207
+ "0": 5,
208
+ "1": 8,
209
+ "10": 3,
210
+ "11": 5,
211
+ "2": 2,
212
+ "3": 3,
213
+ "4": 3,
214
+ "5": 2,
215
+ "6": 3,
216
+ "7": 2,
217
+ "8": 2,
218
+ "9": 4
219
+ },
220
+ "hidden": {
221
+ "-1": 223,
222
+ "0": 223,
223
+ "1": 223,
224
+ "10": 223,
225
+ "11": 223,
226
+ "2": 223,
227
+ "3": 223,
228
+ "4": 223,
229
+ "5": 223,
230
+ "6": 223,
231
+ "7": 223,
232
+ "8": 223,
233
+ "9": 223
234
+ },
235
+ "neuron": {
236
+ "0": 1749,
237
+ "1": 1933,
238
+ "10": 21,
239
+ "11": 915,
240
+ "2": 2699,
241
+ "3": 519,
242
+ "4": 664,
243
+ "5": 976,
244
+ "6": 252,
245
+ "7": 469,
246
+ "8": 307,
247
+ "9": 187
248
+ }
249
+ },
250
+ "60": {
251
+ "head": {
252
+ "0": 6,
253
+ "1": 8,
254
+ "10": 5,
255
+ "11": 7,
256
+ "2": 3,
257
+ "3": 4,
258
+ "4": 4,
259
+ "5": 3,
260
+ "6": 5,
261
+ "7": 2,
262
+ "8": 2,
263
+ "9": 4
264
+ },
265
+ "hidden": {
266
+ "-1": 284,
267
+ "0": 284,
268
+ "1": 284,
269
+ "10": 284,
270
+ "11": 284,
271
+ "2": 284,
272
+ "3": 284,
273
+ "4": 284,
274
+ "5": 284,
275
+ "6": 284,
276
+ "7": 284,
277
+ "8": 284,
278
+ "9": 284
279
+ },
280
+ "neuron": {
281
+ "0": 2065,
282
+ "1": 2200,
283
+ "10": 67,
284
+ "11": 1392,
285
+ "2": 2762,
286
+ "3": 817,
287
+ "4": 991,
288
+ "5": 1279,
289
+ "6": 454,
290
+ "7": 695,
291
+ "8": 521,
292
+ "9": 397
293
+ }
294
+ },
295
+ "70": {
296
+ "head": {
297
+ "0": 6,
298
+ "1": 8,
299
+ "10": 6,
300
+ "11": 7,
301
+ "2": 4,
302
+ "3": 6,
303
+ "4": 4,
304
+ "5": 5,
305
+ "6": 6,
306
+ "7": 3,
307
+ "8": 5,
308
+ "9": 5
309
+ },
310
+ "hidden": {
311
+ "-1": 346,
312
+ "0": 346,
313
+ "1": 346,
314
+ "10": 346,
315
+ "11": 346,
316
+ "2": 346,
317
+ "3": 346,
318
+ "4": 346,
319
+ "5": 346,
320
+ "6": 346,
321
+ "7": 346,
322
+ "8": 346,
323
+ "9": 346
324
+ },
325
+ "neuron": {
326
+ "0": 2266,
327
+ "1": 2390,
328
+ "10": 206,
329
+ "11": 1745,
330
+ "2": 2820,
331
+ "3": 1173,
332
+ "4": 1308,
333
+ "5": 1537,
334
+ "6": 694,
335
+ "7": 951,
336
+ "8": 791,
337
+ "9": 708
338
+ }
339
+ },
340
+ "80": {
341
+ "head": {
342
+ "0": 6,
343
+ "1": 9,
344
+ "10": 8,
345
+ "11": 8,
346
+ "2": 6,
347
+ "3": 6,
348
+ "4": 6,
349
+ "5": 9,
350
+ "6": 7,
351
+ "7": 4,
352
+ "8": 5,
353
+ "9": 5
354
+ },
355
+ "hidden": {
356
+ "-1": 422,
357
+ "0": 422,
358
+ "1": 422,
359
+ "10": 422,
360
+ "11": 422,
361
+ "2": 422,
362
+ "3": 422,
363
+ "4": 422,
364
+ "5": 422,
365
+ "6": 422,
366
+ "7": 422,
367
+ "8": 422,
368
+ "9": 422
369
+ },
370
+ "neuron": {
371
+ "0": 2450,
372
+ "1": 2541,
373
+ "10": 574,
374
+ "11": 2105,
375
+ "2": 2863,
376
+ "3": 1559,
377
+ "4": 1676,
378
+ "5": 1862,
379
+ "6": 1071,
380
+ "7": 1294,
381
+ "8": 1154,
382
+ "9": 1126
383
+ }
384
+ },
385
+ "85": {
386
+ "head": {
387
+ "0": 6,
388
+ "1": 9,
389
+ "10": 10,
390
+ "11": 8,
391
+ "2": 6,
392
+ "3": 6,
393
+ "4": 7,
394
+ "5": 9,
395
+ "6": 8,
396
+ "7": 7,
397
+ "8": 5,
398
+ "9": 7
399
+ },
400
+ "hidden": {
401
+ "-1": 468,
402
+ "0": 468,
403
+ "1": 468,
404
+ "10": 468,
405
+ "11": 468,
406
+ "2": 468,
407
+ "3": 468,
408
+ "4": 468,
409
+ "5": 468,
410
+ "6": 468,
411
+ "7": 468,
412
+ "8": 468,
413
+ "9": 468
414
+ },
415
+ "neuron": {
416
+ "0": 2554,
417
+ "1": 2635,
418
+ "10": 936,
419
+ "11": 2278,
420
+ "2": 2889,
421
+ "3": 1780,
422
+ "4": 1836,
423
+ "5": 2037,
424
+ "6": 1292,
425
+ "7": 1489,
426
+ "8": 1360,
427
+ "9": 1401
428
+ }
429
+ },
430
+ "90": {
431
+ "head": {
432
+ "0": 6,
433
+ "1": 9,
434
+ "10": 11,
435
+ "11": 8,
436
+ "2": 7,
437
+ "3": 7,
438
+ "4": 9,
439
+ "5": 9,
440
+ "6": 9,
441
+ "7": 9,
442
+ "8": 6,
443
+ "9": 8
444
+ },
445
+ "hidden": {
446
+ "-1": 522,
447
+ "0": 522,
448
+ "1": 522,
449
+ "10": 522,
450
+ "11": 522,
451
+ "2": 522,
452
+ "3": 522,
453
+ "4": 522,
454
+ "5": 522,
455
+ "6": 522,
456
+ "7": 522,
457
+ "8": 522,
458
+ "9": 522
459
+ },
460
+ "neuron": {
461
+ "0": 2653,
462
+ "1": 2713,
463
+ "10": 1390,
464
+ "11": 2433,
465
+ "2": 2913,
466
+ "3": 2013,
467
+ "4": 2064,
468
+ "5": 2243,
469
+ "6": 1569,
470
+ "7": 1741,
471
+ "8": 1648,
472
+ "9": 1688
473
+ }
474
+ },
475
+ "95": {
476
+ "head": {
477
+ "0": 8,
478
+ "1": 10,
479
+ "10": 11,
480
+ "11": 9,
481
+ "2": 9,
482
+ "3": 9,
483
+ "4": 9,
484
+ "5": 9,
485
+ "6": 11,
486
+ "7": 9,
487
+ "8": 9,
488
+ "9": 9
489
+ },
490
+ "hidden": {
491
+ "-1": 599,
492
+ "0": 599,
493
+ "1": 599,
494
+ "10": 599,
495
+ "11": 599,
496
+ "2": 599,
497
+ "3": 599,
498
+ "4": 599,
499
+ "5": 599,
500
+ "6": 599,
501
+ "7": 599,
502
+ "8": 599,
503
+ "9": 599
504
+ },
505
+ "neuron": {
506
+ "0": 2753,
507
+ "1": 2799,
508
+ "10": 2001,
509
+ "11": 2654,
510
+ "2": 2948,
511
+ "3": 2337,
512
+ "4": 2381,
513
+ "5": 2519,
514
+ "6": 1992,
515
+ "7": 2148,
516
+ "8": 2061,
517
+ "9": 2161
518
+ }
519
+ },
520
+ "96": {
521
+ "head": {
522
+ "0": 9,
523
+ "1": 10,
524
+ "10": 11,
525
+ "11": 10,
526
+ "2": 9,
527
+ "3": 9,
528
+ "4": 9,
529
+ "5": 10,
530
+ "6": 11,
531
+ "7": 9,
532
+ "8": 9,
533
+ "9": 9
534
+ },
535
+ "hidden": {
536
+ "-1": 614,
537
+ "0": 614,
538
+ "1": 614,
539
+ "10": 614,
540
+ "11": 614,
541
+ "2": 614,
542
+ "3": 614,
543
+ "4": 614,
544
+ "5": 614,
545
+ "6": 614,
546
+ "7": 614,
547
+ "8": 614,
548
+ "9": 614
549
+ },
550
+ "neuron": {
551
+ "0": 2768,
552
+ "1": 2822,
553
+ "10": 2133,
554
+ "11": 2689,
555
+ "2": 2955,
556
+ "3": 2392,
557
+ "4": 2439,
558
+ "5": 2579,
559
+ "6": 2077,
560
+ "7": 2223,
561
+ "8": 2148,
562
+ "9": 2266
563
+ }
564
+ },
565
+ "97": {
566
+ "head": {
567
+ "0": 10,
568
+ "1": 10,
569
+ "10": 11,
570
+ "11": 10,
571
+ "2": 9,
572
+ "3": 9,
573
+ "4": 9,
574
+ "5": 10,
575
+ "6": 11,
576
+ "7": 10,
577
+ "8": 11,
578
+ "9": 10
579
+ },
580
+ "hidden": {
581
+ "-1": 637,
582
+ "0": 637,
583
+ "1": 637,
584
+ "10": 637,
585
+ "11": 637,
586
+ "2": 637,
587
+ "3": 637,
588
+ "4": 637,
589
+ "5": 637,
590
+ "6": 637,
591
+ "7": 637,
592
+ "8": 637,
593
+ "9": 637
594
+ },
595
+ "neuron": {
596
+ "0": 2799,
597
+ "1": 2846,
598
+ "10": 2294,
599
+ "11": 2746,
600
+ "2": 2970,
601
+ "3": 2487,
602
+ "4": 2537,
603
+ "5": 2656,
604
+ "6": 2207,
605
+ "7": 2355,
606
+ "8": 2291,
607
+ "9": 2409
608
+ }
609
+ },
610
+ "98": {
611
+ "head": {
612
+ "0": 11,
613
+ "1": 10,
614
+ "10": 11,
615
+ "11": 10,
616
+ "2": 10,
617
+ "3": 10,
618
+ "4": 10,
619
+ "5": 10,
620
+ "6": 11,
621
+ "7": 10,
622
+ "8": 11,
623
+ "9": 10
624
+ },
625
+ "hidden": {
626
+ "-1": 660,
627
+ "0": 660,
628
+ "1": 660,
629
+ "10": 660,
630
+ "11": 660,
631
+ "2": 660,
632
+ "3": 660,
633
+ "4": 660,
634
+ "5": 660,
635
+ "6": 660,
636
+ "7": 660,
637
+ "8": 660,
638
+ "9": 660
639
+ },
640
+ "neuron": {
641
+ "0": 2826,
642
+ "1": 2870,
643
+ "10": 2455,
644
+ "11": 2794,
645
+ "2": 2984,
646
+ "3": 2588,
647
+ "4": 2634,
648
+ "5": 2726,
649
+ "6": 2350,
650
+ "7": 2501,
651
+ "8": 2440,
652
+ "9": 2535
653
+ }
654
+ },
655
+ "99": {
656
+ "head": {
657
+ "0": 11,
658
+ "1": 10,
659
+ "10": 11,
660
+ "11": 11,
661
+ "2": 11,
662
+ "3": 11,
663
+ "4": 11,
664
+ "5": 11,
665
+ "6": 11,
666
+ "7": 11,
667
+ "8": 11,
668
+ "9": 10
669
+ },
670
+ "hidden": {
671
+ "-1": 691,
672
+ "0": 691,
673
+ "1": 691,
674
+ "10": 691,
675
+ "11": 691,
676
+ "2": 691,
677
+ "3": 691,
678
+ "4": 691,
679
+ "5": 691,
680
+ "6": 691,
681
+ "7": 691,
682
+ "8": 691,
683
+ "9": 691
684
+ },
685
+ "neuron": {
686
+ "0": 2861,
687
+ "1": 2915,
688
+ "10": 2683,
689
+ "11": 2862,
690
+ "2": 2998,
691
+ "3": 2709,
692
+ "4": 2746,
693
+ "5": 2845,
694
+ "6": 2551,
695
+ "7": 2678,
696
+ "8": 2635,
697
+ "9": 2695
698
+ }
699
+ }
700
+ },
701
+ "torch_dtype": "float32",
702
+ "transformers_version": "4.9.2",
703
+ "type_vocab_size": 2,
704
+ "use_cache": true,
705
+ "vocab_size": 30522
706
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12ad6d7600e11f76aae2c8d3e4ec577c55f74fac5a9ba60cfcc5a418874c9ca
3
+ size 75559930
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-aipnlp/zhangchen76/afo_outputs/sparsebert_hidden_minilm_0.8,0.9S_1/ckpt-last", "use_fast": true, "tokenizer_file": null, "tokenizer_class": "BertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff