d-matrix commited on
Commit
014e3d0
·
verified ·
1 Parent(s): 3cfb3ac

Delete configs/BASIC.yaml

Browse files
Files changed (1) hide show
  1. configs/BASIC.yaml +0 -2137
configs/BASIC.yaml DELETED
@@ -1,2137 +0,0 @@
1
- model:
2
- lm_head:
3
- accum_format: SAME
4
- approximation_function: NONE
5
- input_format: SAME
6
- instance: Linear
7
- output_format: SAME
8
- weight_format: SAME
9
- weight_sparseness: DENSE
10
- model.decoder.layers.0.activation_fn:
11
- approximation_function: NONE
12
- input_format: SAME
13
- instance: ReLU
14
- output_format: SAME
15
- model.decoder.layers.0.dropout:
16
- approximation_function: NONE
17
- input_format: SAME
18
- instance: Dropout
19
- output_format: SAME
20
- model.decoder.layers.0.fc1:
21
- accum_format: SAME
22
- approximation_function: NONE
23
- bias_format: SAME
24
- input_format: BFP[8|8]{64,-1}(SN)
25
- instance: Linear
26
- output_format: SAME
27
- weight_format: BFP[8|8]{64,-1}(SN)
28
- weight_sparseness: DENSE
29
- model.decoder.layers.0.fc2:
30
- accum_format: SAME
31
- approximation_function: NONE
32
- bias_format: SAME
33
- input_format: BFP[8|8]{64,-1}(SN)
34
- instance: Linear
35
- output_format: SAME
36
- weight_format: BFP[8|8]{64,-1}(SN)
37
- weight_sparseness: DENSE
38
- model.decoder.layers.0.final_layer_norm:
39
- approximation_function: NONE
40
- bias_format: SAME
41
- input_format: SAME
42
- instance: LayerNorm
43
- output_format: SAME
44
- weight_format: SAME
45
- model.decoder.layers.0.self_attn.dropout:
46
- approximation_function: NONE
47
- input_format: SAME
48
- instance: Dropout
49
- output_format: BFP[8|8]{64,-1}(SN)
50
- model.decoder.layers.0.self_attn.k_proj:
51
- accum_format: SAME
52
- approximation_function: NONE
53
- bias_format: SAME
54
- input_format: BFP[8|8]{64,-1}(SN)
55
- instance: Linear
56
- output_format: BFP[8|8]{64,-1}(SN)
57
- weight_format: BFP[8|8]{64,-1}(SN)
58
- weight_sparseness: DENSE
59
- model.decoder.layers.0.self_attn.out_proj:
60
- accum_format: SAME
61
- approximation_function: NONE
62
- bias_format: SAME
63
- input_format: BFP[8|8]{64,-1}(SN)
64
- instance: Linear
65
- output_format: SAME
66
- weight_format: BFP[8|8]{64,-1}(SN)
67
- weight_sparseness: DENSE
68
- model.decoder.layers.0.self_attn.q_proj:
69
- accum_format: SAME
70
- approximation_function: NONE
71
- bias_format: SAME
72
- input_format: BFP[8|8]{64,-1}(SN)
73
- instance: Linear
74
- output_format: BFP[8|8]{64,-1}(SN)
75
- weight_format: BFP[8|8]{64,-1}(SN)
76
- weight_sparseness: DENSE
77
- model.decoder.layers.0.self_attn.softmax:
78
- approximation_function: NONE
79
- input_format: SAME
80
- instance: Softmax
81
- output_format: SAME
82
- model.decoder.layers.0.self_attn.v_proj:
83
- accum_format: SAME
84
- approximation_function: NONE
85
- bias_format: SAME
86
- input_format: BFP[8|8]{64,-1}(SN)
87
- instance: Linear
88
- output_format: BFP[8|8]{64,-1}(SN)
89
- weight_format: BFP[8|8]{64,-1}(SN)
90
- weight_sparseness: DENSE
91
- model.decoder.layers.0.self_attn_layer_norm:
92
- approximation_function: NONE
93
- bias_format: SAME
94
- input_format: SAME
95
- instance: LayerNorm
96
- output_format: SAME
97
- weight_format: SAME
98
- model.decoder.layers.1.activation_fn:
99
- approximation_function: NONE
100
- input_format: SAME
101
- instance: ReLU
102
- output_format: SAME
103
- model.decoder.layers.1.dropout:
104
- approximation_function: NONE
105
- input_format: SAME
106
- instance: Dropout
107
- output_format: SAME
108
- model.decoder.layers.1.fc1:
109
- accum_format: SAME
110
- approximation_function: NONE
111
- bias_format: SAME
112
- input_format: BFP[8|8]{64,-1}(SN)
113
- instance: Linear
114
- output_format: SAME
115
- weight_format: BFP[8|8]{64,-1}(SN)
116
- weight_sparseness: DENSE
117
- model.decoder.layers.1.fc2:
118
- accum_format: SAME
119
- approximation_function: NONE
120
- bias_format: SAME
121
- input_format: BFP[8|8]{64,-1}(SN)
122
- instance: Linear
123
- output_format: SAME
124
- weight_format: BFP[8|8]{64,-1}(SN)
125
- weight_sparseness: DENSE
126
- model.decoder.layers.1.final_layer_norm:
127
- approximation_function: NONE
128
- bias_format: SAME
129
- input_format: SAME
130
- instance: LayerNorm
131
- output_format: SAME
132
- weight_format: SAME
133
- model.decoder.layers.1.self_attn.dropout:
134
- approximation_function: NONE
135
- input_format: SAME
136
- instance: Dropout
137
- output_format: BFP[8|8]{64,-1}(SN)
138
- model.decoder.layers.1.self_attn.k_proj:
139
- accum_format: SAME
140
- approximation_function: NONE
141
- bias_format: SAME
142
- input_format: BFP[8|8]{64,-1}(SN)
143
- instance: Linear
144
- output_format: BFP[8|8]{64,-1}(SN)
145
- weight_format: BFP[8|8]{64,-1}(SN)
146
- weight_sparseness: DENSE
147
- model.decoder.layers.1.self_attn.out_proj:
148
- accum_format: SAME
149
- approximation_function: NONE
150
- bias_format: SAME
151
- input_format: BFP[8|8]{64,-1}(SN)
152
- instance: Linear
153
- output_format: SAME
154
- weight_format: BFP[8|8]{64,-1}(SN)
155
- weight_sparseness: DENSE
156
- model.decoder.layers.1.self_attn.q_proj:
157
- accum_format: SAME
158
- approximation_function: NONE
159
- bias_format: SAME
160
- input_format: BFP[8|8]{64,-1}(SN)
161
- instance: Linear
162
- output_format: BFP[8|8]{64,-1}(SN)
163
- weight_format: BFP[8|8]{64,-1}(SN)
164
- weight_sparseness: DENSE
165
- model.decoder.layers.1.self_attn.softmax:
166
- approximation_function: NONE
167
- input_format: SAME
168
- instance: Softmax
169
- output_format: SAME
170
- model.decoder.layers.1.self_attn.v_proj:
171
- accum_format: SAME
172
- approximation_function: NONE
173
- bias_format: SAME
174
- input_format: BFP[8|8]{64,-1}(SN)
175
- instance: Linear
176
- output_format: BFP[8|8]{64,-1}(SN)
177
- weight_format: BFP[8|8]{64,-1}(SN)
178
- weight_sparseness: DENSE
179
- model.decoder.layers.1.self_attn_layer_norm:
180
- approximation_function: NONE
181
- bias_format: SAME
182
- input_format: SAME
183
- instance: LayerNorm
184
- output_format: SAME
185
- weight_format: SAME
186
- model.decoder.layers.10.activation_fn:
187
- approximation_function: NONE
188
- input_format: SAME
189
- instance: ReLU
190
- output_format: SAME
191
- model.decoder.layers.10.dropout:
192
- approximation_function: NONE
193
- input_format: SAME
194
- instance: Dropout
195
- output_format: SAME
196
- model.decoder.layers.10.fc1:
197
- accum_format: SAME
198
- approximation_function: NONE
199
- bias_format: SAME
200
- input_format: BFP[8|8]{64,-1}(SN)
201
- instance: Linear
202
- output_format: SAME
203
- weight_format: BFP[8|8]{64,-1}(SN)
204
- weight_sparseness: DENSE
205
- model.decoder.layers.10.fc2:
206
- accum_format: SAME
207
- approximation_function: NONE
208
- bias_format: SAME
209
- input_format: BFP[8|8]{64,-1}(SN)
210
- instance: Linear
211
- output_format: SAME
212
- weight_format: BFP[8|8]{64,-1}(SN)
213
- weight_sparseness: DENSE
214
- model.decoder.layers.10.final_layer_norm:
215
- approximation_function: NONE
216
- bias_format: SAME
217
- input_format: SAME
218
- instance: LayerNorm
219
- output_format: SAME
220
- weight_format: SAME
221
- model.decoder.layers.10.self_attn.dropout:
222
- approximation_function: NONE
223
- input_format: SAME
224
- instance: Dropout
225
- output_format: BFP[8|8]{64,-1}(SN)
226
- model.decoder.layers.10.self_attn.k_proj:
227
- accum_format: SAME
228
- approximation_function: NONE
229
- bias_format: SAME
230
- input_format: BFP[8|8]{64,-1}(SN)
231
- instance: Linear
232
- output_format: BFP[8|8]{64,-1}(SN)
233
- weight_format: BFP[8|8]{64,-1}(SN)
234
- weight_sparseness: DENSE
235
- model.decoder.layers.10.self_attn.out_proj:
236
- accum_format: SAME
237
- approximation_function: NONE
238
- bias_format: SAME
239
- input_format: BFP[8|8]{64,-1}(SN)
240
- instance: Linear
241
- output_format: SAME
242
- weight_format: BFP[8|8]{64,-1}(SN)
243
- weight_sparseness: DENSE
244
- model.decoder.layers.10.self_attn.q_proj:
245
- accum_format: SAME
246
- approximation_function: NONE
247
- bias_format: SAME
248
- input_format: BFP[8|8]{64,-1}(SN)
249
- instance: Linear
250
- output_format: BFP[8|8]{64,-1}(SN)
251
- weight_format: BFP[8|8]{64,-1}(SN)
252
- weight_sparseness: DENSE
253
- model.decoder.layers.10.self_attn.softmax:
254
- approximation_function: NONE
255
- input_format: SAME
256
- instance: Softmax
257
- output_format: SAME
258
- model.decoder.layers.10.self_attn.v_proj:
259
- accum_format: SAME
260
- approximation_function: NONE
261
- bias_format: SAME
262
- input_format: BFP[8|8]{64,-1}(SN)
263
- instance: Linear
264
- output_format: BFP[8|8]{64,-1}(SN)
265
- weight_format: BFP[8|8]{64,-1}(SN)
266
- weight_sparseness: DENSE
267
- model.decoder.layers.10.self_attn_layer_norm:
268
- approximation_function: NONE
269
- bias_format: SAME
270
- input_format: SAME
271
- instance: LayerNorm
272
- output_format: SAME
273
- weight_format: SAME
274
- model.decoder.layers.11.activation_fn:
275
- approximation_function: NONE
276
- input_format: SAME
277
- instance: ReLU
278
- output_format: SAME
279
- model.decoder.layers.11.dropout:
280
- approximation_function: NONE
281
- input_format: SAME
282
- instance: Dropout
283
- output_format: SAME
284
- model.decoder.layers.11.fc1:
285
- accum_format: SAME
286
- approximation_function: NONE
287
- bias_format: SAME
288
- input_format: BFP[8|8]{64,-1}(SN)
289
- instance: Linear
290
- output_format: SAME
291
- weight_format: BFP[8|8]{64,-1}(SN)
292
- weight_sparseness: DENSE
293
- model.decoder.layers.11.fc2:
294
- accum_format: SAME
295
- approximation_function: NONE
296
- bias_format: SAME
297
- input_format: BFP[8|8]{64,-1}(SN)
298
- instance: Linear
299
- output_format: SAME
300
- weight_format: BFP[8|8]{64,-1}(SN)
301
- weight_sparseness: DENSE
302
- model.decoder.layers.11.final_layer_norm:
303
- approximation_function: NONE
304
- bias_format: SAME
305
- input_format: SAME
306
- instance: LayerNorm
307
- output_format: SAME
308
- weight_format: SAME
309
- model.decoder.layers.11.self_attn.dropout:
310
- approximation_function: NONE
311
- input_format: SAME
312
- instance: Dropout
313
- output_format: BFP[8|8]{64,-1}(SN)
314
- model.decoder.layers.11.self_attn.k_proj:
315
- accum_format: SAME
316
- approximation_function: NONE
317
- bias_format: SAME
318
- input_format: BFP[8|8]{64,-1}(SN)
319
- instance: Linear
320
- output_format: BFP[8|8]{64,-1}(SN)
321
- weight_format: BFP[8|8]{64,-1}(SN)
322
- weight_sparseness: DENSE
323
- model.decoder.layers.11.self_attn.out_proj:
324
- accum_format: SAME
325
- approximation_function: NONE
326
- bias_format: SAME
327
- input_format: BFP[8|8]{64,-1}(SN)
328
- instance: Linear
329
- output_format: SAME
330
- weight_format: BFP[8|8]{64,-1}(SN)
331
- weight_sparseness: DENSE
332
- model.decoder.layers.11.self_attn.q_proj:
333
- accum_format: SAME
334
- approximation_function: NONE
335
- bias_format: SAME
336
- input_format: BFP[8|8]{64,-1}(SN)
337
- instance: Linear
338
- output_format: BFP[8|8]{64,-1}(SN)
339
- weight_format: BFP[8|8]{64,-1}(SN)
340
- weight_sparseness: DENSE
341
- model.decoder.layers.11.self_attn.softmax:
342
- approximation_function: NONE
343
- input_format: SAME
344
- instance: Softmax
345
- output_format: SAME
346
- model.decoder.layers.11.self_attn.v_proj:
347
- accum_format: SAME
348
- approximation_function: NONE
349
- bias_format: SAME
350
- input_format: BFP[8|8]{64,-1}(SN)
351
- instance: Linear
352
- output_format: BFP[8|8]{64,-1}(SN)
353
- weight_format: BFP[8|8]{64,-1}(SN)
354
- weight_sparseness: DENSE
355
- model.decoder.layers.11.self_attn_layer_norm:
356
- approximation_function: NONE
357
- bias_format: SAME
358
- input_format: SAME
359
- instance: LayerNorm
360
- output_format: SAME
361
- weight_format: SAME
362
- model.decoder.layers.12.activation_fn:
363
- approximation_function: NONE
364
- input_format: SAME
365
- instance: ReLU
366
- output_format: SAME
367
- model.decoder.layers.12.dropout:
368
- approximation_function: NONE
369
- input_format: SAME
370
- instance: Dropout
371
- output_format: SAME
372
- model.decoder.layers.12.fc1:
373
- accum_format: SAME
374
- approximation_function: NONE
375
- bias_format: SAME
376
- input_format: BFP[8|8]{64,-1}(SN)
377
- instance: Linear
378
- output_format: SAME
379
- weight_format: BFP[8|8]{64,-1}(SN)
380
- weight_sparseness: DENSE
381
- model.decoder.layers.12.fc2:
382
- accum_format: SAME
383
- approximation_function: NONE
384
- bias_format: SAME
385
- input_format: BFP[8|8]{64,-1}(SN)
386
- instance: Linear
387
- output_format: SAME
388
- weight_format: BFP[8|8]{64,-1}(SN)
389
- weight_sparseness: DENSE
390
- model.decoder.layers.12.final_layer_norm:
391
- approximation_function: NONE
392
- bias_format: SAME
393
- input_format: SAME
394
- instance: LayerNorm
395
- output_format: SAME
396
- weight_format: SAME
397
- model.decoder.layers.12.self_attn.dropout:
398
- approximation_function: NONE
399
- input_format: SAME
400
- instance: Dropout
401
- output_format: BFP[8|8]{64,-1}(SN)
402
- model.decoder.layers.12.self_attn.k_proj:
403
- accum_format: SAME
404
- approximation_function: NONE
405
- bias_format: SAME
406
- input_format: BFP[8|8]{64,-1}(SN)
407
- instance: Linear
408
- output_format: BFP[8|8]{64,-1}(SN)
409
- weight_format: BFP[8|8]{64,-1}(SN)
410
- weight_sparseness: DENSE
411
- model.decoder.layers.12.self_attn.out_proj:
412
- accum_format: SAME
413
- approximation_function: NONE
414
- bias_format: SAME
415
- input_format: BFP[8|8]{64,-1}(SN)
416
- instance: Linear
417
- output_format: SAME
418
- weight_format: BFP[8|8]{64,-1}(SN)
419
- weight_sparseness: DENSE
420
- model.decoder.layers.12.self_attn.q_proj:
421
- accum_format: SAME
422
- approximation_function: NONE
423
- bias_format: SAME
424
- input_format: BFP[8|8]{64,-1}(SN)
425
- instance: Linear
426
- output_format: BFP[8|8]{64,-1}(SN)
427
- weight_format: BFP[8|8]{64,-1}(SN)
428
- weight_sparseness: DENSE
429
- model.decoder.layers.12.self_attn.softmax:
430
- approximation_function: NONE
431
- input_format: SAME
432
- instance: Softmax
433
- output_format: SAME
434
- model.decoder.layers.12.self_attn.v_proj:
435
- accum_format: SAME
436
- approximation_function: NONE
437
- bias_format: SAME
438
- input_format: BFP[8|8]{64,-1}(SN)
439
- instance: Linear
440
- output_format: BFP[8|8]{64,-1}(SN)
441
- weight_format: BFP[8|8]{64,-1}(SN)
442
- weight_sparseness: DENSE
443
- model.decoder.layers.12.self_attn_layer_norm:
444
- approximation_function: NONE
445
- bias_format: SAME
446
- input_format: SAME
447
- instance: LayerNorm
448
- output_format: SAME
449
- weight_format: SAME
450
- model.decoder.layers.13.activation_fn:
451
- approximation_function: NONE
452
- input_format: SAME
453
- instance: ReLU
454
- output_format: SAME
455
- model.decoder.layers.13.dropout:
456
- approximation_function: NONE
457
- input_format: SAME
458
- instance: Dropout
459
- output_format: SAME
460
- model.decoder.layers.13.fc1:
461
- accum_format: SAME
462
- approximation_function: NONE
463
- bias_format: SAME
464
- input_format: BFP[8|8]{64,-1}(SN)
465
- instance: Linear
466
- output_format: SAME
467
- weight_format: BFP[8|8]{64,-1}(SN)
468
- weight_sparseness: DENSE
469
- model.decoder.layers.13.fc2:
470
- accum_format: SAME
471
- approximation_function: NONE
472
- bias_format: SAME
473
- input_format: BFP[8|8]{64,-1}(SN)
474
- instance: Linear
475
- output_format: SAME
476
- weight_format: BFP[8|8]{64,-1}(SN)
477
- weight_sparseness: DENSE
478
- model.decoder.layers.13.final_layer_norm:
479
- approximation_function: NONE
480
- bias_format: SAME
481
- input_format: SAME
482
- instance: LayerNorm
483
- output_format: SAME
484
- weight_format: SAME
485
- model.decoder.layers.13.self_attn.dropout:
486
- approximation_function: NONE
487
- input_format: SAME
488
- instance: Dropout
489
- output_format: BFP[8|8]{64,-1}(SN)
490
- model.decoder.layers.13.self_attn.k_proj:
491
- accum_format: SAME
492
- approximation_function: NONE
493
- bias_format: SAME
494
- input_format: BFP[8|8]{64,-1}(SN)
495
- instance: Linear
496
- output_format: BFP[8|8]{64,-1}(SN)
497
- weight_format: BFP[8|8]{64,-1}(SN)
498
- weight_sparseness: DENSE
499
- model.decoder.layers.13.self_attn.out_proj:
500
- accum_format: SAME
501
- approximation_function: NONE
502
- bias_format: SAME
503
- input_format: BFP[8|8]{64,-1}(SN)
504
- instance: Linear
505
- output_format: SAME
506
- weight_format: BFP[8|8]{64,-1}(SN)
507
- weight_sparseness: DENSE
508
- model.decoder.layers.13.self_attn.q_proj:
509
- accum_format: SAME
510
- approximation_function: NONE
511
- bias_format: SAME
512
- input_format: BFP[8|8]{64,-1}(SN)
513
- instance: Linear
514
- output_format: BFP[8|8]{64,-1}(SN)
515
- weight_format: BFP[8|8]{64,-1}(SN)
516
- weight_sparseness: DENSE
517
- model.decoder.layers.13.self_attn.softmax:
518
- approximation_function: NONE
519
- input_format: SAME
520
- instance: Softmax
521
- output_format: SAME
522
- model.decoder.layers.13.self_attn.v_proj:
523
- accum_format: SAME
524
- approximation_function: NONE
525
- bias_format: SAME
526
- input_format: BFP[8|8]{64,-1}(SN)
527
- instance: Linear
528
- output_format: BFP[8|8]{64,-1}(SN)
529
- weight_format: BFP[8|8]{64,-1}(SN)
530
- weight_sparseness: DENSE
531
- model.decoder.layers.13.self_attn_layer_norm:
532
- approximation_function: NONE
533
- bias_format: SAME
534
- input_format: SAME
535
- instance: LayerNorm
536
- output_format: SAME
537
- weight_format: SAME
538
- model.decoder.layers.14.activation_fn:
539
- approximation_function: NONE
540
- input_format: SAME
541
- instance: ReLU
542
- output_format: SAME
543
- model.decoder.layers.14.dropout:
544
- approximation_function: NONE
545
- input_format: SAME
546
- instance: Dropout
547
- output_format: SAME
548
- model.decoder.layers.14.fc1:
549
- accum_format: SAME
550
- approximation_function: NONE
551
- bias_format: SAME
552
- input_format: BFP[8|8]{64,-1}(SN)
553
- instance: Linear
554
- output_format: SAME
555
- weight_format: BFP[8|8]{64,-1}(SN)
556
- weight_sparseness: DENSE
557
- model.decoder.layers.14.fc2:
558
- accum_format: SAME
559
- approximation_function: NONE
560
- bias_format: SAME
561
- input_format: BFP[8|8]{64,-1}(SN)
562
- instance: Linear
563
- output_format: SAME
564
- weight_format: BFP[8|8]{64,-1}(SN)
565
- weight_sparseness: DENSE
566
- model.decoder.layers.14.final_layer_norm:
567
- approximation_function: NONE
568
- bias_format: SAME
569
- input_format: SAME
570
- instance: LayerNorm
571
- output_format: SAME
572
- weight_format: SAME
573
- model.decoder.layers.14.self_attn.dropout:
574
- approximation_function: NONE
575
- input_format: SAME
576
- instance: Dropout
577
- output_format: BFP[8|8]{64,-1}(SN)
578
- model.decoder.layers.14.self_attn.k_proj:
579
- accum_format: SAME
580
- approximation_function: NONE
581
- bias_format: SAME
582
- input_format: BFP[8|8]{64,-1}(SN)
583
- instance: Linear
584
- output_format: BFP[8|8]{64,-1}(SN)
585
- weight_format: BFP[8|8]{64,-1}(SN)
586
- weight_sparseness: DENSE
587
- model.decoder.layers.14.self_attn.out_proj:
588
- accum_format: SAME
589
- approximation_function: NONE
590
- bias_format: SAME
591
- input_format: BFP[8|8]{64,-1}(SN)
592
- instance: Linear
593
- output_format: SAME
594
- weight_format: BFP[8|8]{64,-1}(SN)
595
- weight_sparseness: DENSE
596
- model.decoder.layers.14.self_attn.q_proj:
597
- accum_format: SAME
598
- approximation_function: NONE
599
- bias_format: SAME
600
- input_format: BFP[8|8]{64,-1}(SN)
601
- instance: Linear
602
- output_format: BFP[8|8]{64,-1}(SN)
603
- weight_format: BFP[8|8]{64,-1}(SN)
604
- weight_sparseness: DENSE
605
- model.decoder.layers.14.self_attn.softmax:
606
- approximation_function: NONE
607
- input_format: SAME
608
- instance: Softmax
609
- output_format: SAME
610
- model.decoder.layers.14.self_attn.v_proj:
611
- accum_format: SAME
612
- approximation_function: NONE
613
- bias_format: SAME
614
- input_format: BFP[8|8]{64,-1}(SN)
615
- instance: Linear
616
- output_format: BFP[8|8]{64,-1}(SN)
617
- weight_format: BFP[8|8]{64,-1}(SN)
618
- weight_sparseness: DENSE
619
- model.decoder.layers.14.self_attn_layer_norm:
620
- approximation_function: NONE
621
- bias_format: SAME
622
- input_format: SAME
623
- instance: LayerNorm
624
- output_format: SAME
625
- weight_format: SAME
626
- model.decoder.layers.15.activation_fn:
627
- approximation_function: NONE
628
- input_format: SAME
629
- instance: ReLU
630
- output_format: SAME
631
- model.decoder.layers.15.dropout:
632
- approximation_function: NONE
633
- input_format: SAME
634
- instance: Dropout
635
- output_format: SAME
636
- model.decoder.layers.15.fc1:
637
- accum_format: SAME
638
- approximation_function: NONE
639
- bias_format: SAME
640
- input_format: BFP[8|8]{64,-1}(SN)
641
- instance: Linear
642
- output_format: SAME
643
- weight_format: BFP[8|8]{64,-1}(SN)
644
- weight_sparseness: DENSE
645
- model.decoder.layers.15.fc2:
646
- accum_format: SAME
647
- approximation_function: NONE
648
- bias_format: SAME
649
- input_format: BFP[8|8]{64,-1}(SN)
650
- instance: Linear
651
- output_format: SAME
652
- weight_format: BFP[8|8]{64,-1}(SN)
653
- weight_sparseness: DENSE
654
- model.decoder.layers.15.final_layer_norm:
655
- approximation_function: NONE
656
- bias_format: SAME
657
- input_format: SAME
658
- instance: LayerNorm
659
- output_format: SAME
660
- weight_format: SAME
661
- model.decoder.layers.15.self_attn.dropout:
662
- approximation_function: NONE
663
- input_format: SAME
664
- instance: Dropout
665
- output_format: BFP[8|8]{64,-1}(SN)
666
- model.decoder.layers.15.self_attn.k_proj:
667
- accum_format: SAME
668
- approximation_function: NONE
669
- bias_format: SAME
670
- input_format: BFP[8|8]{64,-1}(SN)
671
- instance: Linear
672
- output_format: BFP[8|8]{64,-1}(SN)
673
- weight_format: BFP[8|8]{64,-1}(SN)
674
- weight_sparseness: DENSE
675
- model.decoder.layers.15.self_attn.out_proj:
676
- accum_format: SAME
677
- approximation_function: NONE
678
- bias_format: SAME
679
- input_format: BFP[8|8]{64,-1}(SN)
680
- instance: Linear
681
- output_format: SAME
682
- weight_format: BFP[8|8]{64,-1}(SN)
683
- weight_sparseness: DENSE
684
- model.decoder.layers.15.self_attn.q_proj:
685
- accum_format: SAME
686
- approximation_function: NONE
687
- bias_format: SAME
688
- input_format: BFP[8|8]{64,-1}(SN)
689
- instance: Linear
690
- output_format: BFP[8|8]{64,-1}(SN)
691
- weight_format: BFP[8|8]{64,-1}(SN)
692
- weight_sparseness: DENSE
693
- model.decoder.layers.15.self_attn.softmax:
694
- approximation_function: NONE
695
- input_format: SAME
696
- instance: Softmax
697
- output_format: SAME
698
- model.decoder.layers.15.self_attn.v_proj:
699
- accum_format: SAME
700
- approximation_function: NONE
701
- bias_format: SAME
702
- input_format: BFP[8|8]{64,-1}(SN)
703
- instance: Linear
704
- output_format: BFP[8|8]{64,-1}(SN)
705
- weight_format: BFP[8|8]{64,-1}(SN)
706
- weight_sparseness: DENSE
707
- model.decoder.layers.15.self_attn_layer_norm:
708
- approximation_function: NONE
709
- bias_format: SAME
710
- input_format: SAME
711
- instance: LayerNorm
712
- output_format: SAME
713
- weight_format: SAME
714
- model.decoder.layers.16.activation_fn:
715
- approximation_function: NONE
716
- input_format: SAME
717
- instance: ReLU
718
- output_format: SAME
719
- model.decoder.layers.16.dropout:
720
- approximation_function: NONE
721
- input_format: SAME
722
- instance: Dropout
723
- output_format: SAME
724
- model.decoder.layers.16.fc1:
725
- accum_format: SAME
726
- approximation_function: NONE
727
- bias_format: SAME
728
- input_format: BFP[8|8]{64,-1}(SN)
729
- instance: Linear
730
- output_format: SAME
731
- weight_format: BFP[8|8]{64,-1}(SN)
732
- weight_sparseness: DENSE
733
- model.decoder.layers.16.fc2:
734
- accum_format: SAME
735
- approximation_function: NONE
736
- bias_format: SAME
737
- input_format: BFP[8|8]{64,-1}(SN)
738
- instance: Linear
739
- output_format: SAME
740
- weight_format: BFP[8|8]{64,-1}(SN)
741
- weight_sparseness: DENSE
742
- model.decoder.layers.16.final_layer_norm:
743
- approximation_function: NONE
744
- bias_format: SAME
745
- input_format: SAME
746
- instance: LayerNorm
747
- output_format: SAME
748
- weight_format: SAME
749
- model.decoder.layers.16.self_attn.dropout:
750
- approximation_function: NONE
751
- input_format: SAME
752
- instance: Dropout
753
- output_format: BFP[8|8]{64,-1}(SN)
754
- model.decoder.layers.16.self_attn.k_proj:
755
- accum_format: SAME
756
- approximation_function: NONE
757
- bias_format: SAME
758
- input_format: BFP[8|8]{64,-1}(SN)
759
- instance: Linear
760
- output_format: BFP[8|8]{64,-1}(SN)
761
- weight_format: BFP[8|8]{64,-1}(SN)
762
- weight_sparseness: DENSE
763
- model.decoder.layers.16.self_attn.out_proj:
764
- accum_format: SAME
765
- approximation_function: NONE
766
- bias_format: SAME
767
- input_format: BFP[8|8]{64,-1}(SN)
768
- instance: Linear
769
- output_format: SAME
770
- weight_format: BFP[8|8]{64,-1}(SN)
771
- weight_sparseness: DENSE
772
- model.decoder.layers.16.self_attn.q_proj:
773
- accum_format: SAME
774
- approximation_function: NONE
775
- bias_format: SAME
776
- input_format: BFP[8|8]{64,-1}(SN)
777
- instance: Linear
778
- output_format: BFP[8|8]{64,-1}(SN)
779
- weight_format: BFP[8|8]{64,-1}(SN)
780
- weight_sparseness: DENSE
781
- model.decoder.layers.16.self_attn.softmax:
782
- approximation_function: NONE
783
- input_format: SAME
784
- instance: Softmax
785
- output_format: SAME
786
- model.decoder.layers.16.self_attn.v_proj:
787
- accum_format: SAME
788
- approximation_function: NONE
789
- bias_format: SAME
790
- input_format: BFP[8|8]{64,-1}(SN)
791
- instance: Linear
792
- output_format: BFP[8|8]{64,-1}(SN)
793
- weight_format: BFP[8|8]{64,-1}(SN)
794
- weight_sparseness: DENSE
795
- model.decoder.layers.16.self_attn_layer_norm:
796
- approximation_function: NONE
797
- bias_format: SAME
798
- input_format: SAME
799
- instance: LayerNorm
800
- output_format: SAME
801
- weight_format: SAME
802
- model.decoder.layers.17.activation_fn:
803
- approximation_function: NONE
804
- input_format: SAME
805
- instance: ReLU
806
- output_format: SAME
807
- model.decoder.layers.17.dropout:
808
- approximation_function: NONE
809
- input_format: SAME
810
- instance: Dropout
811
- output_format: SAME
812
- model.decoder.layers.17.fc1:
813
- accum_format: SAME
814
- approximation_function: NONE
815
- bias_format: SAME
816
- input_format: BFP[8|8]{64,-1}(SN)
817
- instance: Linear
818
- output_format: SAME
819
- weight_format: BFP[8|8]{64,-1}(SN)
820
- weight_sparseness: DENSE
821
- model.decoder.layers.17.fc2:
822
- accum_format: SAME
823
- approximation_function: NONE
824
- bias_format: SAME
825
- input_format: BFP[8|8]{64,-1}(SN)
826
- instance: Linear
827
- output_format: SAME
828
- weight_format: BFP[8|8]{64,-1}(SN)
829
- weight_sparseness: DENSE
830
- model.decoder.layers.17.final_layer_norm:
831
- approximation_function: NONE
832
- bias_format: SAME
833
- input_format: SAME
834
- instance: LayerNorm
835
- output_format: SAME
836
- weight_format: SAME
837
- model.decoder.layers.17.self_attn.dropout:
838
- approximation_function: NONE
839
- input_format: SAME
840
- instance: Dropout
841
- output_format: BFP[8|8]{64,-1}(SN)
842
- model.decoder.layers.17.self_attn.k_proj:
843
- accum_format: SAME
844
- approximation_function: NONE
845
- bias_format: SAME
846
- input_format: BFP[8|8]{64,-1}(SN)
847
- instance: Linear
848
- output_format: BFP[8|8]{64,-1}(SN)
849
- weight_format: BFP[8|8]{64,-1}(SN)
850
- weight_sparseness: DENSE
851
- model.decoder.layers.17.self_attn.out_proj:
852
- accum_format: SAME
853
- approximation_function: NONE
854
- bias_format: SAME
855
- input_format: BFP[8|8]{64,-1}(SN)
856
- instance: Linear
857
- output_format: SAME
858
- weight_format: BFP[8|8]{64,-1}(SN)
859
- weight_sparseness: DENSE
860
- model.decoder.layers.17.self_attn.q_proj:
861
- accum_format: SAME
862
- approximation_function: NONE
863
- bias_format: SAME
864
- input_format: BFP[8|8]{64,-1}(SN)
865
- instance: Linear
866
- output_format: BFP[8|8]{64,-1}(SN)
867
- weight_format: BFP[8|8]{64,-1}(SN)
868
- weight_sparseness: DENSE
869
- model.decoder.layers.17.self_attn.softmax:
870
- approximation_function: NONE
871
- input_format: SAME
872
- instance: Softmax
873
- output_format: SAME
874
- model.decoder.layers.17.self_attn.v_proj:
875
- accum_format: SAME
876
- approximation_function: NONE
877
- bias_format: SAME
878
- input_format: BFP[8|8]{64,-1}(SN)
879
- instance: Linear
880
- output_format: BFP[8|8]{64,-1}(SN)
881
- weight_format: BFP[8|8]{64,-1}(SN)
882
- weight_sparseness: DENSE
883
- model.decoder.layers.17.self_attn_layer_norm:
884
- approximation_function: NONE
885
- bias_format: SAME
886
- input_format: SAME
887
- instance: LayerNorm
888
- output_format: SAME
889
- weight_format: SAME
890
- model.decoder.layers.18.activation_fn:
891
- approximation_function: NONE
892
- input_format: SAME
893
- instance: ReLU
894
- output_format: SAME
895
- model.decoder.layers.18.dropout:
896
- approximation_function: NONE
897
- input_format: SAME
898
- instance: Dropout
899
- output_format: SAME
900
- model.decoder.layers.18.fc1:
901
- accum_format: SAME
902
- approximation_function: NONE
903
- bias_format: SAME
904
- input_format: BFP[8|8]{64,-1}(SN)
905
- instance: Linear
906
- output_format: SAME
907
- weight_format: BFP[8|8]{64,-1}(SN)
908
- weight_sparseness: DENSE
909
- model.decoder.layers.18.fc2:
910
- accum_format: SAME
911
- approximation_function: NONE
912
- bias_format: SAME
913
- input_format: BFP[8|8]{64,-1}(SN)
914
- instance: Linear
915
- output_format: SAME
916
- weight_format: BFP[8|8]{64,-1}(SN)
917
- weight_sparseness: DENSE
918
- model.decoder.layers.18.final_layer_norm:
919
- approximation_function: NONE
920
- bias_format: SAME
921
- input_format: SAME
922
- instance: LayerNorm
923
- output_format: SAME
924
- weight_format: SAME
925
- model.decoder.layers.18.self_attn.dropout:
926
- approximation_function: NONE
927
- input_format: SAME
928
- instance: Dropout
929
- output_format: BFP[8|8]{64,-1}(SN)
930
- model.decoder.layers.18.self_attn.k_proj:
931
- accum_format: SAME
932
- approximation_function: NONE
933
- bias_format: SAME
934
- input_format: BFP[8|8]{64,-1}(SN)
935
- instance: Linear
936
- output_format: BFP[8|8]{64,-1}(SN)
937
- weight_format: BFP[8|8]{64,-1}(SN)
938
- weight_sparseness: DENSE
939
- model.decoder.layers.18.self_attn.out_proj:
940
- accum_format: SAME
941
- approximation_function: NONE
942
- bias_format: SAME
943
- input_format: BFP[8|8]{64,-1}(SN)
944
- instance: Linear
945
- output_format: SAME
946
- weight_format: BFP[8|8]{64,-1}(SN)
947
- weight_sparseness: DENSE
948
- model.decoder.layers.18.self_attn.q_proj:
949
- accum_format: SAME
950
- approximation_function: NONE
951
- bias_format: SAME
952
- input_format: BFP[8|8]{64,-1}(SN)
953
- instance: Linear
954
- output_format: BFP[8|8]{64,-1}(SN)
955
- weight_format: BFP[8|8]{64,-1}(SN)
956
- weight_sparseness: DENSE
957
- model.decoder.layers.18.self_attn.softmax:
958
- approximation_function: NONE
959
- input_format: SAME
960
- instance: Softmax
961
- output_format: SAME
962
- model.decoder.layers.18.self_attn.v_proj:
963
- accum_format: SAME
964
- approximation_function: NONE
965
- bias_format: SAME
966
- input_format: BFP[8|8]{64,-1}(SN)
967
- instance: Linear
968
- output_format: BFP[8|8]{64,-1}(SN)
969
- weight_format: BFP[8|8]{64,-1}(SN)
970
- weight_sparseness: DENSE
971
- model.decoder.layers.18.self_attn_layer_norm:
972
- approximation_function: NONE
973
- bias_format: SAME
974
- input_format: SAME
975
- instance: LayerNorm
976
- output_format: SAME
977
- weight_format: SAME
978
- model.decoder.layers.19.activation_fn:
979
- approximation_function: NONE
980
- input_format: SAME
981
- instance: ReLU
982
- output_format: SAME
983
- model.decoder.layers.19.dropout:
984
- approximation_function: NONE
985
- input_format: SAME
986
- instance: Dropout
987
- output_format: SAME
988
- model.decoder.layers.19.fc1:
989
- accum_format: SAME
990
- approximation_function: NONE
991
- bias_format: SAME
992
- input_format: BFP[8|8]{64,-1}(SN)
993
- instance: Linear
994
- output_format: SAME
995
- weight_format: BFP[8|8]{64,-1}(SN)
996
- weight_sparseness: DENSE
997
- model.decoder.layers.19.fc2:
998
- accum_format: SAME
999
- approximation_function: NONE
1000
- bias_format: SAME
1001
- input_format: BFP[8|8]{64,-1}(SN)
1002
- instance: Linear
1003
- output_format: SAME
1004
- weight_format: BFP[8|8]{64,-1}(SN)
1005
- weight_sparseness: DENSE
1006
- model.decoder.layers.19.final_layer_norm:
1007
- approximation_function: NONE
1008
- bias_format: SAME
1009
- input_format: SAME
1010
- instance: LayerNorm
1011
- output_format: SAME
1012
- weight_format: SAME
1013
- model.decoder.layers.19.self_attn.dropout:
1014
- approximation_function: NONE
1015
- input_format: SAME
1016
- instance: Dropout
1017
- output_format: BFP[8|8]{64,-1}(SN)
1018
- model.decoder.layers.19.self_attn.k_proj:
1019
- accum_format: SAME
1020
- approximation_function: NONE
1021
- bias_format: SAME
1022
- input_format: BFP[8|8]{64,-1}(SN)
1023
- instance: Linear
1024
- output_format: BFP[8|8]{64,-1}(SN)
1025
- weight_format: BFP[8|8]{64,-1}(SN)
1026
- weight_sparseness: DENSE
1027
- model.decoder.layers.19.self_attn.out_proj:
1028
- accum_format: SAME
1029
- approximation_function: NONE
1030
- bias_format: SAME
1031
- input_format: BFP[8|8]{64,-1}(SN)
1032
- instance: Linear
1033
- output_format: SAME
1034
- weight_format: BFP[8|8]{64,-1}(SN)
1035
- weight_sparseness: DENSE
1036
- model.decoder.layers.19.self_attn.q_proj:
1037
- accum_format: SAME
1038
- approximation_function: NONE
1039
- bias_format: SAME
1040
- input_format: BFP[8|8]{64,-1}(SN)
1041
- instance: Linear
1042
- output_format: BFP[8|8]{64,-1}(SN)
1043
- weight_format: BFP[8|8]{64,-1}(SN)
1044
- weight_sparseness: DENSE
1045
- model.decoder.layers.19.self_attn.softmax:
1046
- approximation_function: NONE
1047
- input_format: SAME
1048
- instance: Softmax
1049
- output_format: SAME
1050
- model.decoder.layers.19.self_attn.v_proj:
1051
- accum_format: SAME
1052
- approximation_function: NONE
1053
- bias_format: SAME
1054
- input_format: BFP[8|8]{64,-1}(SN)
1055
- instance: Linear
1056
- output_format: BFP[8|8]{64,-1}(SN)
1057
- weight_format: BFP[8|8]{64,-1}(SN)
1058
- weight_sparseness: DENSE
1059
- model.decoder.layers.19.self_attn_layer_norm:
1060
- approximation_function: NONE
1061
- bias_format: SAME
1062
- input_format: SAME
1063
- instance: LayerNorm
1064
- output_format: SAME
1065
- weight_format: SAME
1066
- model.decoder.layers.2.activation_fn:
1067
- approximation_function: NONE
1068
- input_format: SAME
1069
- instance: ReLU
1070
- output_format: SAME
1071
- model.decoder.layers.2.dropout:
1072
- approximation_function: NONE
1073
- input_format: SAME
1074
- instance: Dropout
1075
- output_format: SAME
1076
- model.decoder.layers.2.fc1:
1077
- accum_format: SAME
1078
- approximation_function: NONE
1079
- bias_format: SAME
1080
- input_format: BFP[8|8]{64,-1}(SN)
1081
- instance: Linear
1082
- output_format: SAME
1083
- weight_format: BFP[8|8]{64,-1}(SN)
1084
- weight_sparseness: DENSE
1085
- model.decoder.layers.2.fc2:
1086
- accum_format: SAME
1087
- approximation_function: NONE
1088
- bias_format: SAME
1089
- input_format: BFP[8|8]{64,-1}(SN)
1090
- instance: Linear
1091
- output_format: SAME
1092
- weight_format: BFP[8|8]{64,-1}(SN)
1093
- weight_sparseness: DENSE
1094
- model.decoder.layers.2.final_layer_norm:
1095
- approximation_function: NONE
1096
- bias_format: SAME
1097
- input_format: SAME
1098
- instance: LayerNorm
1099
- output_format: SAME
1100
- weight_format: SAME
1101
- model.decoder.layers.2.self_attn.dropout:
1102
- approximation_function: NONE
1103
- input_format: SAME
1104
- instance: Dropout
1105
- output_format: BFP[8|8]{64,-1}(SN)
1106
- model.decoder.layers.2.self_attn.k_proj:
1107
- accum_format: SAME
1108
- approximation_function: NONE
1109
- bias_format: SAME
1110
- input_format: BFP[8|8]{64,-1}(SN)
1111
- instance: Linear
1112
- output_format: BFP[8|8]{64,-1}(SN)
1113
- weight_format: BFP[8|8]{64,-1}(SN)
1114
- weight_sparseness: DENSE
1115
- model.decoder.layers.2.self_attn.out_proj:
1116
- accum_format: SAME
1117
- approximation_function: NONE
1118
- bias_format: SAME
1119
- input_format: BFP[8|8]{64,-1}(SN)
1120
- instance: Linear
1121
- output_format: SAME
1122
- weight_format: BFP[8|8]{64,-1}(SN)
1123
- weight_sparseness: DENSE
1124
- model.decoder.layers.2.self_attn.q_proj:
1125
- accum_format: SAME
1126
- approximation_function: NONE
1127
- bias_format: SAME
1128
- input_format: BFP[8|8]{64,-1}(SN)
1129
- instance: Linear
1130
- output_format: BFP[8|8]{64,-1}(SN)
1131
- weight_format: BFP[8|8]{64,-1}(SN)
1132
- weight_sparseness: DENSE
1133
- model.decoder.layers.2.self_attn.softmax:
1134
- approximation_function: NONE
1135
- input_format: SAME
1136
- instance: Softmax
1137
- output_format: SAME
1138
- model.decoder.layers.2.self_attn.v_proj:
1139
- accum_format: SAME
1140
- approximation_function: NONE
1141
- bias_format: SAME
1142
- input_format: BFP[8|8]{64,-1}(SN)
1143
- instance: Linear
1144
- output_format: BFP[8|8]{64,-1}(SN)
1145
- weight_format: BFP[8|8]{64,-1}(SN)
1146
- weight_sparseness: DENSE
1147
- model.decoder.layers.2.self_attn_layer_norm:
1148
- approximation_function: NONE
1149
- bias_format: SAME
1150
- input_format: SAME
1151
- instance: LayerNorm
1152
- output_format: SAME
1153
- weight_format: SAME
1154
- model.decoder.layers.20.activation_fn:
1155
- approximation_function: NONE
1156
- input_format: SAME
1157
- instance: ReLU
1158
- output_format: SAME
1159
- model.decoder.layers.20.dropout:
1160
- approximation_function: NONE
1161
- input_format: SAME
1162
- instance: Dropout
1163
- output_format: SAME
1164
- model.decoder.layers.20.fc1:
1165
- accum_format: SAME
1166
- approximation_function: NONE
1167
- bias_format: SAME
1168
- input_format: BFP[8|8]{64,-1}(SN)
1169
- instance: Linear
1170
- output_format: SAME
1171
- weight_format: BFP[8|8]{64,-1}(SN)
1172
- weight_sparseness: DENSE
1173
- model.decoder.layers.20.fc2:
1174
- accum_format: SAME
1175
- approximation_function: NONE
1176
- bias_format: SAME
1177
- input_format: BFP[8|8]{64,-1}(SN)
1178
- instance: Linear
1179
- output_format: SAME
1180
- weight_format: BFP[8|8]{64,-1}(SN)
1181
- weight_sparseness: DENSE
1182
- model.decoder.layers.20.final_layer_norm:
1183
- approximation_function: NONE
1184
- bias_format: SAME
1185
- input_format: SAME
1186
- instance: LayerNorm
1187
- output_format: SAME
1188
- weight_format: SAME
1189
- model.decoder.layers.20.self_attn.dropout:
1190
- approximation_function: NONE
1191
- input_format: SAME
1192
- instance: Dropout
1193
- output_format: BFP[8|8]{64,-1}(SN)
1194
- model.decoder.layers.20.self_attn.k_proj:
1195
- accum_format: SAME
1196
- approximation_function: NONE
1197
- bias_format: SAME
1198
- input_format: BFP[8|8]{64,-1}(SN)
1199
- instance: Linear
1200
- output_format: BFP[8|8]{64,-1}(SN)
1201
- weight_format: BFP[8|8]{64,-1}(SN)
1202
- weight_sparseness: DENSE
1203
- model.decoder.layers.20.self_attn.out_proj:
1204
- accum_format: SAME
1205
- approximation_function: NONE
1206
- bias_format: SAME
1207
- input_format: BFP[8|8]{64,-1}(SN)
1208
- instance: Linear
1209
- output_format: SAME
1210
- weight_format: BFP[8|8]{64,-1}(SN)
1211
- weight_sparseness: DENSE
1212
- model.decoder.layers.20.self_attn.q_proj:
1213
- accum_format: SAME
1214
- approximation_function: NONE
1215
- bias_format: SAME
1216
- input_format: BFP[8|8]{64,-1}(SN)
1217
- instance: Linear
1218
- output_format: BFP[8|8]{64,-1}(SN)
1219
- weight_format: BFP[8|8]{64,-1}(SN)
1220
- weight_sparseness: DENSE
1221
- model.decoder.layers.20.self_attn.softmax:
1222
- approximation_function: NONE
1223
- input_format: SAME
1224
- instance: Softmax
1225
- output_format: SAME
1226
- model.decoder.layers.20.self_attn.v_proj:
1227
- accum_format: SAME
1228
- approximation_function: NONE
1229
- bias_format: SAME
1230
- input_format: BFP[8|8]{64,-1}(SN)
1231
- instance: Linear
1232
- output_format: BFP[8|8]{64,-1}(SN)
1233
- weight_format: BFP[8|8]{64,-1}(SN)
1234
- weight_sparseness: DENSE
1235
- model.decoder.layers.20.self_attn_layer_norm:
1236
- approximation_function: NONE
1237
- bias_format: SAME
1238
- input_format: SAME
1239
- instance: LayerNorm
1240
- output_format: SAME
1241
- weight_format: SAME
1242
- model.decoder.layers.21.activation_fn:
1243
- approximation_function: NONE
1244
- input_format: SAME
1245
- instance: ReLU
1246
- output_format: SAME
1247
- model.decoder.layers.21.dropout:
1248
- approximation_function: NONE
1249
- input_format: SAME
1250
- instance: Dropout
1251
- output_format: SAME
1252
- model.decoder.layers.21.fc1:
1253
- accum_format: SAME
1254
- approximation_function: NONE
1255
- bias_format: SAME
1256
- input_format: BFP[8|8]{64,-1}(SN)
1257
- instance: Linear
1258
- output_format: SAME
1259
- weight_format: BFP[8|8]{64,-1}(SN)
1260
- weight_sparseness: DENSE
1261
- model.decoder.layers.21.fc2:
1262
- accum_format: SAME
1263
- approximation_function: NONE
1264
- bias_format: SAME
1265
- input_format: BFP[8|8]{64,-1}(SN)
1266
- instance: Linear
1267
- output_format: SAME
1268
- weight_format: BFP[8|8]{64,-1}(SN)
1269
- weight_sparseness: DENSE
1270
- model.decoder.layers.21.final_layer_norm:
1271
- approximation_function: NONE
1272
- bias_format: SAME
1273
- input_format: SAME
1274
- instance: LayerNorm
1275
- output_format: SAME
1276
- weight_format: SAME
1277
- model.decoder.layers.21.self_attn.dropout:
1278
- approximation_function: NONE
1279
- input_format: SAME
1280
- instance: Dropout
1281
- output_format: BFP[8|8]{64,-1}(SN)
1282
- model.decoder.layers.21.self_attn.k_proj:
1283
- accum_format: SAME
1284
- approximation_function: NONE
1285
- bias_format: SAME
1286
- input_format: BFP[8|8]{64,-1}(SN)
1287
- instance: Linear
1288
- output_format: BFP[8|8]{64,-1}(SN)
1289
- weight_format: BFP[8|8]{64,-1}(SN)
1290
- weight_sparseness: DENSE
1291
- model.decoder.layers.21.self_attn.out_proj:
1292
- accum_format: SAME
1293
- approximation_function: NONE
1294
- bias_format: SAME
1295
- input_format: BFP[8|8]{64,-1}(SN)
1296
- instance: Linear
1297
- output_format: SAME
1298
- weight_format: BFP[8|8]{64,-1}(SN)
1299
- weight_sparseness: DENSE
1300
- model.decoder.layers.21.self_attn.q_proj:
1301
- accum_format: SAME
1302
- approximation_function: NONE
1303
- bias_format: SAME
1304
- input_format: BFP[8|8]{64,-1}(SN)
1305
- instance: Linear
1306
- output_format: BFP[8|8]{64,-1}(SN)
1307
- weight_format: BFP[8|8]{64,-1}(SN)
1308
- weight_sparseness: DENSE
1309
- model.decoder.layers.21.self_attn.softmax:
1310
- approximation_function: NONE
1311
- input_format: SAME
1312
- instance: Softmax
1313
- output_format: SAME
1314
- model.decoder.layers.21.self_attn.v_proj:
1315
- accum_format: SAME
1316
- approximation_function: NONE
1317
- bias_format: SAME
1318
- input_format: BFP[8|8]{64,-1}(SN)
1319
- instance: Linear
1320
- output_format: BFP[8|8]{64,-1}(SN)
1321
- weight_format: BFP[8|8]{64,-1}(SN)
1322
- weight_sparseness: DENSE
1323
- model.decoder.layers.21.self_attn_layer_norm:
1324
- approximation_function: NONE
1325
- bias_format: SAME
1326
- input_format: SAME
1327
- instance: LayerNorm
1328
- output_format: SAME
1329
- weight_format: SAME
1330
- model.decoder.layers.22.activation_fn:
1331
- approximation_function: NONE
1332
- input_format: SAME
1333
- instance: ReLU
1334
- output_format: SAME
1335
- model.decoder.layers.22.dropout:
1336
- approximation_function: NONE
1337
- input_format: SAME
1338
- instance: Dropout
1339
- output_format: SAME
1340
- model.decoder.layers.22.fc1:
1341
- accum_format: SAME
1342
- approximation_function: NONE
1343
- bias_format: SAME
1344
- input_format: BFP[8|8]{64,-1}(SN)
1345
- instance: Linear
1346
- output_format: SAME
1347
- weight_format: BFP[8|8]{64,-1}(SN)
1348
- weight_sparseness: DENSE
1349
- model.decoder.layers.22.fc2:
1350
- accum_format: SAME
1351
- approximation_function: NONE
1352
- bias_format: SAME
1353
- input_format: BFP[8|8]{64,-1}(SN)
1354
- instance: Linear
1355
- output_format: SAME
1356
- weight_format: BFP[8|8]{64,-1}(SN)
1357
- weight_sparseness: DENSE
1358
- model.decoder.layers.22.final_layer_norm:
1359
- approximation_function: NONE
1360
- bias_format: SAME
1361
- input_format: SAME
1362
- instance: LayerNorm
1363
- output_format: SAME
1364
- weight_format: SAME
1365
- model.decoder.layers.22.self_attn.dropout:
1366
- approximation_function: NONE
1367
- input_format: SAME
1368
- instance: Dropout
1369
- output_format: BFP[8|8]{64,-1}(SN)
1370
- model.decoder.layers.22.self_attn.k_proj:
1371
- accum_format: SAME
1372
- approximation_function: NONE
1373
- bias_format: SAME
1374
- input_format: BFP[8|8]{64,-1}(SN)
1375
- instance: Linear
1376
- output_format: BFP[8|8]{64,-1}(SN)
1377
- weight_format: BFP[8|8]{64,-1}(SN)
1378
- weight_sparseness: DENSE
1379
- model.decoder.layers.22.self_attn.out_proj:
1380
- accum_format: SAME
1381
- approximation_function: NONE
1382
- bias_format: SAME
1383
- input_format: BFP[8|8]{64,-1}(SN)
1384
- instance: Linear
1385
- output_format: SAME
1386
- weight_format: BFP[8|8]{64,-1}(SN)
1387
- weight_sparseness: DENSE
1388
- model.decoder.layers.22.self_attn.q_proj:
1389
- accum_format: SAME
1390
- approximation_function: NONE
1391
- bias_format: SAME
1392
- input_format: BFP[8|8]{64,-1}(SN)
1393
- instance: Linear
1394
- output_format: BFP[8|8]{64,-1}(SN)
1395
- weight_format: BFP[8|8]{64,-1}(SN)
1396
- weight_sparseness: DENSE
1397
- model.decoder.layers.22.self_attn.softmax:
1398
- approximation_function: NONE
1399
- input_format: SAME
1400
- instance: Softmax
1401
- output_format: SAME
1402
- model.decoder.layers.22.self_attn.v_proj:
1403
- accum_format: SAME
1404
- approximation_function: NONE
1405
- bias_format: SAME
1406
- input_format: BFP[8|8]{64,-1}(SN)
1407
- instance: Linear
1408
- output_format: BFP[8|8]{64,-1}(SN)
1409
- weight_format: BFP[8|8]{64,-1}(SN)
1410
- weight_sparseness: DENSE
1411
- model.decoder.layers.22.self_attn_layer_norm:
1412
- approximation_function: NONE
1413
- bias_format: SAME
1414
- input_format: SAME
1415
- instance: LayerNorm
1416
- output_format: SAME
1417
- weight_format: SAME
1418
- model.decoder.layers.23.activation_fn:
1419
- approximation_function: NONE
1420
- input_format: SAME
1421
- instance: ReLU
1422
- output_format: SAME
1423
- model.decoder.layers.23.dropout:
1424
- approximation_function: NONE
1425
- input_format: SAME
1426
- instance: Dropout
1427
- output_format: SAME
1428
- model.decoder.layers.23.fc1:
1429
- accum_format: SAME
1430
- approximation_function: NONE
1431
- bias_format: SAME
1432
- input_format: BFP[8|8]{64,-1}(SN)
1433
- instance: Linear
1434
- output_format: SAME
1435
- weight_format: BFP[8|8]{64,-1}(SN)
1436
- weight_sparseness: DENSE
1437
- model.decoder.layers.23.fc2:
1438
- accum_format: SAME
1439
- approximation_function: NONE
1440
- bias_format: SAME
1441
- input_format: BFP[8|8]{64,-1}(SN)
1442
- instance: Linear
1443
- output_format: SAME
1444
- weight_format: BFP[8|8]{64,-1}(SN)
1445
- weight_sparseness: DENSE
1446
- model.decoder.layers.23.final_layer_norm:
1447
- approximation_function: NONE
1448
- bias_format: SAME
1449
- input_format: SAME
1450
- instance: LayerNorm
1451
- output_format: SAME
1452
- weight_format: SAME
1453
- model.decoder.layers.23.self_attn.dropout:
1454
- approximation_function: NONE
1455
- input_format: SAME
1456
- instance: Dropout
1457
- output_format: BFP[8|8]{64,-1}(SN)
1458
- model.decoder.layers.23.self_attn.k_proj:
1459
- accum_format: SAME
1460
- approximation_function: NONE
1461
- bias_format: SAME
1462
- input_format: BFP[8|8]{64,-1}(SN)
1463
- instance: Linear
1464
- output_format: BFP[8|8]{64,-1}(SN)
1465
- weight_format: BFP[8|8]{64,-1}(SN)
1466
- weight_sparseness: DENSE
1467
- model.decoder.layers.23.self_attn.out_proj:
1468
- accum_format: SAME
1469
- approximation_function: NONE
1470
- bias_format: SAME
1471
- input_format: BFP[8|8]{64,-1}(SN)
1472
- instance: Linear
1473
- output_format: SAME
1474
- weight_format: BFP[8|8]{64,-1}(SN)
1475
- weight_sparseness: DENSE
1476
- model.decoder.layers.23.self_attn.q_proj:
1477
- accum_format: SAME
1478
- approximation_function: NONE
1479
- bias_format: SAME
1480
- input_format: BFP[8|8]{64,-1}(SN)
1481
- instance: Linear
1482
- output_format: BFP[8|8]{64,-1}(SN)
1483
- weight_format: BFP[8|8]{64,-1}(SN)
1484
- weight_sparseness: DENSE
1485
- model.decoder.layers.23.self_attn.softmax:
1486
- approximation_function: NONE
1487
- input_format: SAME
1488
- instance: Softmax
1489
- output_format: SAME
1490
- model.decoder.layers.23.self_attn.v_proj:
1491
- accum_format: SAME
1492
- approximation_function: NONE
1493
- bias_format: SAME
1494
- input_format: BFP[8|8]{64,-1}(SN)
1495
- instance: Linear
1496
- output_format: BFP[8|8]{64,-1}(SN)
1497
- weight_format: BFP[8|8]{64,-1}(SN)
1498
- weight_sparseness: DENSE
1499
- model.decoder.layers.23.self_attn_layer_norm:
1500
- approximation_function: NONE
1501
- bias_format: SAME
1502
- input_format: SAME
1503
- instance: LayerNorm
1504
- output_format: SAME
1505
- weight_format: SAME
1506
- model.decoder.layers.3.activation_fn:
1507
- approximation_function: NONE
1508
- input_format: SAME
1509
- instance: ReLU
1510
- output_format: SAME
1511
- model.decoder.layers.3.dropout:
1512
- approximation_function: NONE
1513
- input_format: SAME
1514
- instance: Dropout
1515
- output_format: SAME
1516
- model.decoder.layers.3.fc1:
1517
- accum_format: SAME
1518
- approximation_function: NONE
1519
- bias_format: SAME
1520
- input_format: BFP[8|8]{64,-1}(SN)
1521
- instance: Linear
1522
- output_format: SAME
1523
- weight_format: BFP[8|8]{64,-1}(SN)
1524
- weight_sparseness: DENSE
1525
- model.decoder.layers.3.fc2:
1526
- accum_format: SAME
1527
- approximation_function: NONE
1528
- bias_format: SAME
1529
- input_format: BFP[8|8]{64,-1}(SN)
1530
- instance: Linear
1531
- output_format: SAME
1532
- weight_format: BFP[8|8]{64,-1}(SN)
1533
- weight_sparseness: DENSE
1534
- model.decoder.layers.3.final_layer_norm:
1535
- approximation_function: NONE
1536
- bias_format: SAME
1537
- input_format: SAME
1538
- instance: LayerNorm
1539
- output_format: SAME
1540
- weight_format: SAME
1541
- model.decoder.layers.3.self_attn.dropout:
1542
- approximation_function: NONE
1543
- input_format: SAME
1544
- instance: Dropout
1545
- output_format: BFP[8|8]{64,-1}(SN)
1546
- model.decoder.layers.3.self_attn.k_proj:
1547
- accum_format: SAME
1548
- approximation_function: NONE
1549
- bias_format: SAME
1550
- input_format: BFP[8|8]{64,-1}(SN)
1551
- instance: Linear
1552
- output_format: BFP[8|8]{64,-1}(SN)
1553
- weight_format: BFP[8|8]{64,-1}(SN)
1554
- weight_sparseness: DENSE
1555
- model.decoder.layers.3.self_attn.out_proj:
1556
- accum_format: SAME
1557
- approximation_function: NONE
1558
- bias_format: SAME
1559
- input_format: BFP[8|8]{64,-1}(SN)
1560
- instance: Linear
1561
- output_format: SAME
1562
- weight_format: BFP[8|8]{64,-1}(SN)
1563
- weight_sparseness: DENSE
1564
- model.decoder.layers.3.self_attn.q_proj:
1565
- accum_format: SAME
1566
- approximation_function: NONE
1567
- bias_format: SAME
1568
- input_format: BFP[8|8]{64,-1}(SN)
1569
- instance: Linear
1570
- output_format: BFP[8|8]{64,-1}(SN)
1571
- weight_format: BFP[8|8]{64,-1}(SN)
1572
- weight_sparseness: DENSE
1573
- model.decoder.layers.3.self_attn.softmax:
1574
- approximation_function: NONE
1575
- input_format: SAME
1576
- instance: Softmax
1577
- output_format: SAME
1578
- model.decoder.layers.3.self_attn.v_proj:
1579
- accum_format: SAME
1580
- approximation_function: NONE
1581
- bias_format: SAME
1582
- input_format: BFP[8|8]{64,-1}(SN)
1583
- instance: Linear
1584
- output_format: BFP[8|8]{64,-1}(SN)
1585
- weight_format: BFP[8|8]{64,-1}(SN)
1586
- weight_sparseness: DENSE
1587
- model.decoder.layers.3.self_attn_layer_norm:
1588
- approximation_function: NONE
1589
- bias_format: SAME
1590
- input_format: SAME
1591
- instance: LayerNorm
1592
- output_format: SAME
1593
- weight_format: SAME
1594
- model.decoder.layers.4.activation_fn:
1595
- approximation_function: NONE
1596
- input_format: SAME
1597
- instance: ReLU
1598
- output_format: SAME
1599
- model.decoder.layers.4.dropout:
1600
- approximation_function: NONE
1601
- input_format: SAME
1602
- instance: Dropout
1603
- output_format: SAME
1604
- model.decoder.layers.4.fc1:
1605
- accum_format: SAME
1606
- approximation_function: NONE
1607
- bias_format: SAME
1608
- input_format: BFP[8|8]{64,-1}(SN)
1609
- instance: Linear
1610
- output_format: SAME
1611
- weight_format: BFP[8|8]{64,-1}(SN)
1612
- weight_sparseness: DENSE
1613
- model.decoder.layers.4.fc2:
1614
- accum_format: SAME
1615
- approximation_function: NONE
1616
- bias_format: SAME
1617
- input_format: BFP[8|8]{64,-1}(SN)
1618
- instance: Linear
1619
- output_format: SAME
1620
- weight_format: BFP[8|8]{64,-1}(SN)
1621
- weight_sparseness: DENSE
1622
- model.decoder.layers.4.final_layer_norm:
1623
- approximation_function: NONE
1624
- bias_format: SAME
1625
- input_format: SAME
1626
- instance: LayerNorm
1627
- output_format: SAME
1628
- weight_format: SAME
1629
- model.decoder.layers.4.self_attn.dropout:
1630
- approximation_function: NONE
1631
- input_format: SAME
1632
- instance: Dropout
1633
- output_format: BFP[8|8]{64,-1}(SN)
1634
- model.decoder.layers.4.self_attn.k_proj:
1635
- accum_format: SAME
1636
- approximation_function: NONE
1637
- bias_format: SAME
1638
- input_format: BFP[8|8]{64,-1}(SN)
1639
- instance: Linear
1640
- output_format: BFP[8|8]{64,-1}(SN)
1641
- weight_format: BFP[8|8]{64,-1}(SN)
1642
- weight_sparseness: DENSE
1643
- model.decoder.layers.4.self_attn.out_proj:
1644
- accum_format: SAME
1645
- approximation_function: NONE
1646
- bias_format: SAME
1647
- input_format: BFP[8|8]{64,-1}(SN)
1648
- instance: Linear
1649
- output_format: SAME
1650
- weight_format: BFP[8|8]{64,-1}(SN)
1651
- weight_sparseness: DENSE
1652
- model.decoder.layers.4.self_attn.q_proj:
1653
- accum_format: SAME
1654
- approximation_function: NONE
1655
- bias_format: SAME
1656
- input_format: BFP[8|8]{64,-1}(SN)
1657
- instance: Linear
1658
- output_format: BFP[8|8]{64,-1}(SN)
1659
- weight_format: BFP[8|8]{64,-1}(SN)
1660
- weight_sparseness: DENSE
1661
- model.decoder.layers.4.self_attn.softmax:
1662
- approximation_function: NONE
1663
- input_format: SAME
1664
- instance: Softmax
1665
- output_format: SAME
1666
- model.decoder.layers.4.self_attn.v_proj:
1667
- accum_format: SAME
1668
- approximation_function: NONE
1669
- bias_format: SAME
1670
- input_format: BFP[8|8]{64,-1}(SN)
1671
- instance: Linear
1672
- output_format: BFP[8|8]{64,-1}(SN)
1673
- weight_format: BFP[8|8]{64,-1}(SN)
1674
- weight_sparseness: DENSE
1675
- model.decoder.layers.4.self_attn_layer_norm:
1676
- approximation_function: NONE
1677
- bias_format: SAME
1678
- input_format: SAME
1679
- instance: LayerNorm
1680
- output_format: SAME
1681
- weight_format: SAME
1682
- model.decoder.layers.5.activation_fn:
1683
- approximation_function: NONE
1684
- input_format: SAME
1685
- instance: ReLU
1686
- output_format: SAME
1687
- model.decoder.layers.5.dropout:
1688
- approximation_function: NONE
1689
- input_format: SAME
1690
- instance: Dropout
1691
- output_format: SAME
1692
- model.decoder.layers.5.fc1:
1693
- accum_format: SAME
1694
- approximation_function: NONE
1695
- bias_format: SAME
1696
- input_format: BFP[8|8]{64,-1}(SN)
1697
- instance: Linear
1698
- output_format: SAME
1699
- weight_format: BFP[8|8]{64,-1}(SN)
1700
- weight_sparseness: DENSE
1701
- model.decoder.layers.5.fc2:
1702
- accum_format: SAME
1703
- approximation_function: NONE
1704
- bias_format: SAME
1705
- input_format: BFP[8|8]{64,-1}(SN)
1706
- instance: Linear
1707
- output_format: SAME
1708
- weight_format: BFP[8|8]{64,-1}(SN)
1709
- weight_sparseness: DENSE
1710
- model.decoder.layers.5.final_layer_norm:
1711
- approximation_function: NONE
1712
- bias_format: SAME
1713
- input_format: SAME
1714
- instance: LayerNorm
1715
- output_format: SAME
1716
- weight_format: SAME
1717
- model.decoder.layers.5.self_attn.dropout:
1718
- approximation_function: NONE
1719
- input_format: SAME
1720
- instance: Dropout
1721
- output_format: BFP[8|8]{64,-1}(SN)
1722
- model.decoder.layers.5.self_attn.k_proj:
1723
- accum_format: SAME
1724
- approximation_function: NONE
1725
- bias_format: SAME
1726
- input_format: BFP[8|8]{64,-1}(SN)
1727
- instance: Linear
1728
- output_format: BFP[8|8]{64,-1}(SN)
1729
- weight_format: BFP[8|8]{64,-1}(SN)
1730
- weight_sparseness: DENSE
1731
- model.decoder.layers.5.self_attn.out_proj:
1732
- accum_format: SAME
1733
- approximation_function: NONE
1734
- bias_format: SAME
1735
- input_format: BFP[8|8]{64,-1}(SN)
1736
- instance: Linear
1737
- output_format: SAME
1738
- weight_format: BFP[8|8]{64,-1}(SN)
1739
- weight_sparseness: DENSE
1740
- model.decoder.layers.5.self_attn.q_proj:
1741
- accum_format: SAME
1742
- approximation_function: NONE
1743
- bias_format: SAME
1744
- input_format: BFP[8|8]{64,-1}(SN)
1745
- instance: Linear
1746
- output_format: BFP[8|8]{64,-1}(SN)
1747
- weight_format: BFP[8|8]{64,-1}(SN)
1748
- weight_sparseness: DENSE
1749
- model.decoder.layers.5.self_attn.softmax:
1750
- approximation_function: NONE
1751
- input_format: SAME
1752
- instance: Softmax
1753
- output_format: SAME
1754
- model.decoder.layers.5.self_attn.v_proj:
1755
- accum_format: SAME
1756
- approximation_function: NONE
1757
- bias_format: SAME
1758
- input_format: BFP[8|8]{64,-1}(SN)
1759
- instance: Linear
1760
- output_format: BFP[8|8]{64,-1}(SN)
1761
- weight_format: BFP[8|8]{64,-1}(SN)
1762
- weight_sparseness: DENSE
1763
- model.decoder.layers.5.self_attn_layer_norm:
1764
- approximation_function: NONE
1765
- bias_format: SAME
1766
- input_format: SAME
1767
- instance: LayerNorm
1768
- output_format: SAME
1769
- weight_format: SAME
1770
- model.decoder.layers.6.activation_fn:
1771
- approximation_function: NONE
1772
- input_format: SAME
1773
- instance: ReLU
1774
- output_format: SAME
1775
- model.decoder.layers.6.dropout:
1776
- approximation_function: NONE
1777
- input_format: SAME
1778
- instance: Dropout
1779
- output_format: SAME
1780
- model.decoder.layers.6.fc1:
1781
- accum_format: SAME
1782
- approximation_function: NONE
1783
- bias_format: SAME
1784
- input_format: BFP[8|8]{64,-1}(SN)
1785
- instance: Linear
1786
- output_format: SAME
1787
- weight_format: BFP[8|8]{64,-1}(SN)
1788
- weight_sparseness: DENSE
1789
- model.decoder.layers.6.fc2:
1790
- accum_format: SAME
1791
- approximation_function: NONE
1792
- bias_format: SAME
1793
- input_format: BFP[8|8]{64,-1}(SN)
1794
- instance: Linear
1795
- output_format: SAME
1796
- weight_format: BFP[8|8]{64,-1}(SN)
1797
- weight_sparseness: DENSE
1798
- model.decoder.layers.6.final_layer_norm:
1799
- approximation_function: NONE
1800
- bias_format: SAME
1801
- input_format: SAME
1802
- instance: LayerNorm
1803
- output_format: SAME
1804
- weight_format: SAME
1805
- model.decoder.layers.6.self_attn.dropout:
1806
- approximation_function: NONE
1807
- input_format: SAME
1808
- instance: Dropout
1809
- output_format: BFP[8|8]{64,-1}(SN)
1810
- model.decoder.layers.6.self_attn.k_proj:
1811
- accum_format: SAME
1812
- approximation_function: NONE
1813
- bias_format: SAME
1814
- input_format: BFP[8|8]{64,-1}(SN)
1815
- instance: Linear
1816
- output_format: BFP[8|8]{64,-1}(SN)
1817
- weight_format: BFP[8|8]{64,-1}(SN)
1818
- weight_sparseness: DENSE
1819
- model.decoder.layers.6.self_attn.out_proj:
1820
- accum_format: SAME
1821
- approximation_function: NONE
1822
- bias_format: SAME
1823
- input_format: BFP[8|8]{64,-1}(SN)
1824
- instance: Linear
1825
- output_format: SAME
1826
- weight_format: BFP[8|8]{64,-1}(SN)
1827
- weight_sparseness: DENSE
1828
- model.decoder.layers.6.self_attn.q_proj:
1829
- accum_format: SAME
1830
- approximation_function: NONE
1831
- bias_format: SAME
1832
- input_format: BFP[8|8]{64,-1}(SN)
1833
- instance: Linear
1834
- output_format: BFP[8|8]{64,-1}(SN)
1835
- weight_format: BFP[8|8]{64,-1}(SN)
1836
- weight_sparseness: DENSE
1837
- model.decoder.layers.6.self_attn.softmax:
1838
- approximation_function: NONE
1839
- input_format: SAME
1840
- instance: Softmax
1841
- output_format: SAME
1842
- model.decoder.layers.6.self_attn.v_proj:
1843
- accum_format: SAME
1844
- approximation_function: NONE
1845
- bias_format: SAME
1846
- input_format: BFP[8|8]{64,-1}(SN)
1847
- instance: Linear
1848
- output_format: BFP[8|8]{64,-1}(SN)
1849
- weight_format: BFP[8|8]{64,-1}(SN)
1850
- weight_sparseness: DENSE
1851
- model.decoder.layers.6.self_attn_layer_norm:
1852
- approximation_function: NONE
1853
- bias_format: SAME
1854
- input_format: SAME
1855
- instance: LayerNorm
1856
- output_format: SAME
1857
- weight_format: SAME
1858
- model.decoder.layers.7.activation_fn:
1859
- approximation_function: NONE
1860
- input_format: SAME
1861
- instance: ReLU
1862
- output_format: SAME
1863
- model.decoder.layers.7.dropout:
1864
- approximation_function: NONE
1865
- input_format: SAME
1866
- instance: Dropout
1867
- output_format: SAME
1868
- model.decoder.layers.7.fc1:
1869
- accum_format: SAME
1870
- approximation_function: NONE
1871
- bias_format: SAME
1872
- input_format: BFP[8|8]{64,-1}(SN)
1873
- instance: Linear
1874
- output_format: SAME
1875
- weight_format: BFP[8|8]{64,-1}(SN)
1876
- weight_sparseness: DENSE
1877
- model.decoder.layers.7.fc2:
1878
- accum_format: SAME
1879
- approximation_function: NONE
1880
- bias_format: SAME
1881
- input_format: BFP[8|8]{64,-1}(SN)
1882
- instance: Linear
1883
- output_format: SAME
1884
- weight_format: BFP[8|8]{64,-1}(SN)
1885
- weight_sparseness: DENSE
1886
- model.decoder.layers.7.final_layer_norm:
1887
- approximation_function: NONE
1888
- bias_format: SAME
1889
- input_format: SAME
1890
- instance: LayerNorm
1891
- output_format: SAME
1892
- weight_format: SAME
1893
- model.decoder.layers.7.self_attn.dropout:
1894
- approximation_function: NONE
1895
- input_format: SAME
1896
- instance: Dropout
1897
- output_format: BFP[8|8]{64,-1}(SN)
1898
- model.decoder.layers.7.self_attn.k_proj:
1899
- accum_format: SAME
1900
- approximation_function: NONE
1901
- bias_format: SAME
1902
- input_format: BFP[8|8]{64,-1}(SN)
1903
- instance: Linear
1904
- output_format: BFP[8|8]{64,-1}(SN)
1905
- weight_format: BFP[8|8]{64,-1}(SN)
1906
- weight_sparseness: DENSE
1907
- model.decoder.layers.7.self_attn.out_proj:
1908
- accum_format: SAME
1909
- approximation_function: NONE
1910
- bias_format: SAME
1911
- input_format: BFP[8|8]{64,-1}(SN)
1912
- instance: Linear
1913
- output_format: SAME
1914
- weight_format: BFP[8|8]{64,-1}(SN)
1915
- weight_sparseness: DENSE
1916
- model.decoder.layers.7.self_attn.q_proj:
1917
- accum_format: SAME
1918
- approximation_function: NONE
1919
- bias_format: SAME
1920
- input_format: BFP[8|8]{64,-1}(SN)
1921
- instance: Linear
1922
- output_format: BFP[8|8]{64,-1}(SN)
1923
- weight_format: BFP[8|8]{64,-1}(SN)
1924
- weight_sparseness: DENSE
1925
- model.decoder.layers.7.self_attn.softmax:
1926
- approximation_function: NONE
1927
- input_format: SAME
1928
- instance: Softmax
1929
- output_format: SAME
1930
- model.decoder.layers.7.self_attn.v_proj:
1931
- accum_format: SAME
1932
- approximation_function: NONE
1933
- bias_format: SAME
1934
- input_format: BFP[8|8]{64,-1}(SN)
1935
- instance: Linear
1936
- output_format: BFP[8|8]{64,-1}(SN)
1937
- weight_format: BFP[8|8]{64,-1}(SN)
1938
- weight_sparseness: DENSE
1939
- model.decoder.layers.7.self_attn_layer_norm:
1940
- approximation_function: NONE
1941
- bias_format: SAME
1942
- input_format: SAME
1943
- instance: LayerNorm
1944
- output_format: SAME
1945
- weight_format: SAME
1946
- model.decoder.layers.8.activation_fn:
1947
- approximation_function: NONE
1948
- input_format: SAME
1949
- instance: ReLU
1950
- output_format: SAME
1951
- model.decoder.layers.8.dropout:
1952
- approximation_function: NONE
1953
- input_format: SAME
1954
- instance: Dropout
1955
- output_format: SAME
1956
- model.decoder.layers.8.fc1:
1957
- accum_format: SAME
1958
- approximation_function: NONE
1959
- bias_format: SAME
1960
- input_format: BFP[8|8]{64,-1}(SN)
1961
- instance: Linear
1962
- output_format: SAME
1963
- weight_format: BFP[8|8]{64,-1}(SN)
1964
- weight_sparseness: DENSE
1965
- model.decoder.layers.8.fc2:
1966
- accum_format: SAME
1967
- approximation_function: NONE
1968
- bias_format: SAME
1969
- input_format: BFP[8|8]{64,-1}(SN)
1970
- instance: Linear
1971
- output_format: SAME
1972
- weight_format: BFP[8|8]{64,-1}(SN)
1973
- weight_sparseness: DENSE
1974
- model.decoder.layers.8.final_layer_norm:
1975
- approximation_function: NONE
1976
- bias_format: SAME
1977
- input_format: SAME
1978
- instance: LayerNorm
1979
- output_format: SAME
1980
- weight_format: SAME
1981
- model.decoder.layers.8.self_attn.dropout:
1982
- approximation_function: NONE
1983
- input_format: SAME
1984
- instance: Dropout
1985
- output_format: BFP[8|8]{64,-1}(SN)
1986
- model.decoder.layers.8.self_attn.k_proj:
1987
- accum_format: SAME
1988
- approximation_function: NONE
1989
- bias_format: SAME
1990
- input_format: BFP[8|8]{64,-1}(SN)
1991
- instance: Linear
1992
- output_format: BFP[8|8]{64,-1}(SN)
1993
- weight_format: BFP[8|8]{64,-1}(SN)
1994
- weight_sparseness: DENSE
1995
- model.decoder.layers.8.self_attn.out_proj:
1996
- accum_format: SAME
1997
- approximation_function: NONE
1998
- bias_format: SAME
1999
- input_format: BFP[8|8]{64,-1}(SN)
2000
- instance: Linear
2001
- output_format: SAME
2002
- weight_format: BFP[8|8]{64,-1}(SN)
2003
- weight_sparseness: DENSE
2004
- model.decoder.layers.8.self_attn.q_proj:
2005
- accum_format: SAME
2006
- approximation_function: NONE
2007
- bias_format: SAME
2008
- input_format: BFP[8|8]{64,-1}(SN)
2009
- instance: Linear
2010
- output_format: BFP[8|8]{64,-1}(SN)
2011
- weight_format: BFP[8|8]{64,-1}(SN)
2012
- weight_sparseness: DENSE
2013
- model.decoder.layers.8.self_attn.softmax:
2014
- approximation_function: NONE
2015
- input_format: SAME
2016
- instance: Softmax
2017
- output_format: SAME
2018
- model.decoder.layers.8.self_attn.v_proj:
2019
- accum_format: SAME
2020
- approximation_function: NONE
2021
- bias_format: SAME
2022
- input_format: BFP[8|8]{64,-1}(SN)
2023
- instance: Linear
2024
- output_format: BFP[8|8]{64,-1}(SN)
2025
- weight_format: BFP[8|8]{64,-1}(SN)
2026
- weight_sparseness: DENSE
2027
- model.decoder.layers.8.self_attn_layer_norm:
2028
- approximation_function: NONE
2029
- bias_format: SAME
2030
- input_format: SAME
2031
- instance: LayerNorm
2032
- output_format: SAME
2033
- weight_format: SAME
2034
- model.decoder.layers.9.activation_fn:
2035
- approximation_function: NONE
2036
- input_format: SAME
2037
- instance: ReLU
2038
- output_format: SAME
2039
- model.decoder.layers.9.dropout:
2040
- approximation_function: NONE
2041
- input_format: SAME
2042
- instance: Dropout
2043
- output_format: SAME
2044
- model.decoder.layers.9.fc1:
2045
- accum_format: SAME
2046
- approximation_function: NONE
2047
- bias_format: SAME
2048
- input_format: BFP[8|8]{64,-1}(SN)
2049
- instance: Linear
2050
- output_format: SAME
2051
- weight_format: BFP[8|8]{64,-1}(SN)
2052
- weight_sparseness: DENSE
2053
- model.decoder.layers.9.fc2:
2054
- accum_format: SAME
2055
- approximation_function: NONE
2056
- bias_format: SAME
2057
- input_format: BFP[8|8]{64,-1}(SN)
2058
- instance: Linear
2059
- output_format: SAME
2060
- weight_format: BFP[8|8]{64,-1}(SN)
2061
- weight_sparseness: DENSE
2062
- model.decoder.layers.9.final_layer_norm:
2063
- approximation_function: NONE
2064
- bias_format: SAME
2065
- input_format: SAME
2066
- instance: LayerNorm
2067
- output_format: SAME
2068
- weight_format: SAME
2069
- model.decoder.layers.9.self_attn.dropout:
2070
- approximation_function: NONE
2071
- input_format: SAME
2072
- instance: Dropout
2073
- output_format: BFP[8|8]{64,-1}(SN)
2074
- model.decoder.layers.9.self_attn.k_proj:
2075
- accum_format: SAME
2076
- approximation_function: NONE
2077
- bias_format: SAME
2078
- input_format: BFP[8|8]{64,-1}(SN)
2079
- instance: Linear
2080
- output_format: BFP[8|8]{64,-1}(SN)
2081
- weight_format: BFP[8|8]{64,-1}(SN)
2082
- weight_sparseness: DENSE
2083
- model.decoder.layers.9.self_attn.out_proj:
2084
- accum_format: SAME
2085
- approximation_function: NONE
2086
- bias_format: SAME
2087
- input_format: BFP[8|8]{64,-1}(SN)
2088
- instance: Linear
2089
- output_format: SAME
2090
- weight_format: BFP[8|8]{64,-1}(SN)
2091
- weight_sparseness: DENSE
2092
- model.decoder.layers.9.self_attn.q_proj:
2093
- accum_format: SAME
2094
- approximation_function: NONE
2095
- bias_format: SAME
2096
- input_format: BFP[8|8]{64,-1}(SN)
2097
- instance: Linear
2098
- output_format: BFP[8|8]{64,-1}(SN)
2099
- weight_format: BFP[8|8]{64,-1}(SN)
2100
- weight_sparseness: DENSE
2101
- model.decoder.layers.9.self_attn.softmax:
2102
- approximation_function: NONE
2103
- input_format: SAME
2104
- instance: Softmax
2105
- output_format: SAME
2106
- model.decoder.layers.9.self_attn.v_proj:
2107
- accum_format: SAME
2108
- approximation_function: NONE
2109
- bias_format: SAME
2110
- input_format: BFP[8|8]{64,-1}(SN)
2111
- instance: Linear
2112
- output_format: BFP[8|8]{64,-1}(SN)
2113
- weight_format: BFP[8|8]{64,-1}(SN)
2114
- weight_sparseness: DENSE
2115
- model.decoder.layers.9.self_attn_layer_norm:
2116
- approximation_function: NONE
2117
- bias_format: SAME
2118
- input_format: SAME
2119
- instance: LayerNorm
2120
- output_format: SAME
2121
- weight_format: SAME
2122
- model.decoder.project_in:
2123
- accum_format: SAME
2124
- approximation_function: NONE
2125
- input_format: SAME
2126
- instance: Linear
2127
- output_format: SAME
2128
- weight_format: SAME
2129
- weight_sparseness: DENSE
2130
- model.decoder.project_out:
2131
- accum_format: SAME
2132
- approximation_function: NONE
2133
- input_format: SAME
2134
- instance: Linear
2135
- output_format: SAME
2136
- weight_format: SAME
2137
- weight_sparseness: DENSE