|
CUDA extension not installed. |
|
Some weights of the model checkpoint at google/t5-v1_1-xxl were not used when initializing T5EncoderModel: ['decoder.block.6.layer.1.layer_norm.weight', 'decoder.block.10.layer.1.layer_norm.weight', 'decoder.block.17.layer.2.layer_norm.weight', 'decoder.block.13.layer.0.SelfAttention.v.weight', 'decoder.block.19.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.2.layer_norm.weight', 'decoder.block.23.layer.0.SelfAttention.q.weight', 'decoder.block.20.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.17.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.7.layer.1.EncDecAttention.k.weight', 'decoder.block.20.layer.0.SelfAttention.k.weight', 'decoder.block.18.layer.2.DenseReluDense.wo.weight', 'decoder.block.16.layer.1.EncDecAttention.v.weight', 'decoder.block.3.layer.0.SelfAttention.k.weight', 'decoder.block.1.layer.0.SelfAttention.v.weight', 'decoder.block.21.layer.2.DenseReluDense.wo.weight', 'decoder.block.2.layer.1.layer_norm.weight', 'decoder.block.17.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.3.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.0.SelfAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.o.weight', 'decoder.block.11.layer.0.SelfAttention.v.weight', 'decoder.block.1.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.1.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.9.layer.1.layer_norm.weight', 'decoder.block.11.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.1.EncDecAttention.k.weight', 'decoder.block.22.layer.0.SelfAttention.o.weight', 'decoder.block.19.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.14.layer.2.DenseReluDense.wo.weight', 'decoder.block.2.layer.0.SelfAttention.k.weight', 'decoder.embed_tokens.weight', 'decoder.block.6.layer.0.layer_norm.weight', 'decoder.block.3.layer.2.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.k.weight', 'decoder.block.0.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.2.DenseReluDense.wo.weight', 'decoder.block.18.layer.0.layer_norm.weight', 'decoder.block.9.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.0.SelfAttention.q.weight', 'decoder.block.15.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.6.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.v.weight', 'decoder.block.17.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.1.layer_norm.weight', 'decoder.block.18.layer.2.layer_norm.weight', 'decoder.block.12.layer.2.layer_norm.weight', 'decoder.block.2.layer.1.EncDecAttention.o.weight', 'decoder.block.6.layer.1.EncDecAttention.o.weight', 'decoder.block.17.layer.1.EncDecAttention.o.weight', 'decoder.block.3.layer.1.EncDecAttention.o.weight', 'decoder.block.18.layer.1.EncDecAttention.v.weight', 'decoder.block.15.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.13.layer.2.DenseReluDense.wo.weight', 'decoder.block.1.layer.0.layer_norm.weight', 'decoder.block.15.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.2.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.1.EncDecAttention.k.weight', 'decoder.block.14.layer.2.layer_norm.weight', 'decoder.block.17.layer.0.SelfAttention.k.weight', 'decoder.block.3.layer.0.SelfAttention.q.weight', 'decoder.block.14.layer.0.SelfAttention.v.weight', 'decoder.block.6.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.1.EncDecAttention.o.weight', 'decoder.block.15.layer.0.SelfAttention.o.weight', 'decoder.block.18.layer.0.SelfAttention.v.weight', 'decoder.block.1.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.1.EncDecAttention.v.weight', 'decoder.block.1.layer.0.SelfAttention.q.weight', 'decoder.block.8.layer.0.layer_norm.weight', 'decoder.block.16.layer.2.layer_norm.weight', 'decoder.block.7.layer.1.EncDecAttention.v.weight', 'decoder.block.12.layer.1.EncDecAttention.k.weight', 'decoder.block.17.layer.1.EncDecAttention.v.weight', 'decoder.block.23.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.0.SelfAttention.k.weight', 'decoder.block.3.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.17.layer.1.layer_norm.weight', 'decoder.block.2.layer.1.EncDecAttention.k.weight', 'decoder.block.10.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.12.layer.1.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.o.weight', 'decoder.block.9.layer.2.layer_norm.weight', 'decoder.block.1.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.13.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.0.SelfAttention.v.weight', 'decoder.block.2.layer.2.DenseReluDense.wo.weight', 'decoder.block.4.layer.1.EncDecAttention.v.weight', 'decoder.block.19.layer.0.SelfAttention.q.weight', 'decoder.block.12.layer.2.DenseReluDense.wo.weight', 'decoder.block.4.layer.2.layer_norm.weight', 'decoder.block.9.layer.1.EncDecAttention.v.weight', 'decoder.block.13.layer.0.SelfAttention.q.weight', 'decoder.block.4.layer.0.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.q.weight', 'decoder.block.16.layer.1.EncDecAttention.o.weight', 'decoder.block.6.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.0.SelfAttention.k.weight', 'decoder.block.8.layer.1.EncDecAttention.q.weight', 'decoder.block.17.layer.0.SelfAttention.q.weight', 'decoder.block.5.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.22.layer.0.SelfAttention.v.weight', 'decoder.block.14.layer.1.layer_norm.weight', 'decoder.block.15.layer.1.EncDecAttention.k.weight', 'decoder.block.21.layer.2.layer_norm.weight', 'decoder.block.21.layer.1.layer_norm.weight', 'decoder.block.10.layer.1.EncDecAttention.o.weight', 'decoder.block.11.layer.1.EncDecAttention.q.weight', 'decoder.block.16.layer.0.layer_norm.weight', 'decoder.block.11.layer.0.SelfAttention.o.weight', 'decoder.block.5.layer.0.SelfAttention.v.weight', 'decoder.block.20.layer.1.EncDecAttention.v.weight', 'decoder.block.2.layer.2.layer_norm.weight', 'decoder.block.15.layer.1.EncDecAttention.q.weight', 'decoder.block.13.layer.0.SelfAttention.o.weight', 'decoder.block.5.layer.0.layer_norm.weight', 'decoder.block.6.layer.1.EncDecAttention.v.weight', 'decoder.block.23.layer.1.EncDecAttention.q.weight', 'decoder.block.18.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.5.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.0.SelfAttention.v.weight', 'decoder.block.8.layer.0.SelfAttention.o.weight', 'decoder.block.23.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.1.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.q.weight', 'decoder.block.20.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.1.EncDecAttention.q.weight', 'decoder.block.15.layer.0.layer_norm.weight', 'decoder.block.8.layer.1.EncDecAttention.k.weight', 'decoder.block.21.layer.0.SelfAttention.o.weight', 'decoder.block.4.layer.1.EncDecAttention.o.weight', 'decoder.block.1.layer.0.SelfAttention.k.weight', 'decoder.block.19.layer.1.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.1.EncDecAttention.k.weight', 'decoder.block.20.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.0.SelfAttention.o.weight', 'decoder.block.1.layer.1.EncDecAttention.o.weight', 'decoder.block.18.layer.1.EncDecAttention.k.weight', 'lm_head.weight', 'decoder.block.2.layer.0.layer_norm.weight', 'decoder.block.14.layer.1.EncDecAttention.v.weight', 'decoder.block.10.layer.0.layer_norm.weight', 'decoder.block.11.layer.0.SelfAttention.k.weight', 'decoder.block.18.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.0.layer.1.EncDecAttention.v.weight', 'decoder.block.0.layer.2.layer_norm.weight', 'decoder.block.23.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.20.layer.1.layer_norm.weight', 'decoder.block.20.layer.1.EncDecAttention.k.weight', 'decoder.block.15.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.1.EncDecAttention.o.weight', 'decoder.block.13.layer.1.layer_norm.weight', 'decoder.block.7.layer.2.DenseReluDense.wo.weight', 'decoder.block.10.layer.2.layer_norm.weight', 'decoder.block.0.layer.0.SelfAttention.k.weight', 'decoder.block.19.layer.0.SelfAttention.k.weight', 'decoder.block.8.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.16.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.5.layer.0.SelfAttention.q.weight', 'decoder.block.18.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.20.layer.0.SelfAttention.o.weight', 'decoder.block.6.layer.0.SelfAttention.v.weight', 'decoder.block.14.layer.0.SelfAttention.q.weight', 'decoder.block.13.layer.1.EncDecAttention.o.weight', 'decoder.block.19.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.21.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.0.SelfAttention.o.weight', 'decoder.block.15.layer.2.layer_norm.weight', 'decoder.block.18.layer.0.SelfAttention.q.weight', 'decoder.block.7.layer.1.layer_norm.weight', 'decoder.block.4.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.4.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.0.layer_norm.weight', 'decoder.block.7.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.2.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.22.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.12.layer.1.EncDecAttention.v.weight', 'decoder.block.11.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.4.layer.1.EncDecAttention.q.weight', 'decoder.block.5.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.1.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.v.weight', 'decoder.block.19.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.1.layer_norm.weight', 'decoder.block.20.layer.0.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.layer_norm.weight', 'decoder.block.7.layer.0.SelfAttention.v.weight', 'decoder.block.3.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.6.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.16.layer.0.SelfAttention.q.weight', 'decoder.block.21.layer.1.EncDecAttention.k.weight', 'decoder.block.3.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.1.EncDecAttention.q.weight', 'decoder.block.6.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.0.SelfAttention.v.weight', 'decoder.block.11.layer.0.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.v.weight', 'decoder.block.19.layer.2.DenseReluDense.wo.weight', 'decoder.block.0.layer.1.EncDecAttention.q.weight', 'decoder.block.15.layer.1.layer_norm.weight', 'decoder.block.4.layer.2.DenseReluDense.wo.weight', 'decoder.block.8.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.1.EncDecAttention.o.weight', 'decoder.block.4.layer.0.SelfAttention.k.weight', 'decoder.block.15.layer.1.EncDecAttention.v.weight', 'decoder.block.5.layer.0.SelfAttention.o.weight', 'decoder.block.1.layer.1.EncDecAttention.v.weight', 'decoder.block.2.layer.0.SelfAttention.v.weight', 'decoder.block.7.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.2.layer.1.EncDecAttention.v.weight', 'decoder.block.14.layer.0.layer_norm.weight', 'decoder.block.15.layer.0.SelfAttention.k.weight', 'decoder.block.22.layer.1.EncDecAttention.o.weight', 'decoder.block.21.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.14.layer.1.EncDecAttention.q.weight', 'decoder.block.7.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.0.SelfAttention.q.weight', 'decoder.block.4.layer.0.SelfAttention.q.weight', 'decoder.block.3.layer.0.SelfAttention.v.weight', 'decoder.block.13.layer.0.layer_norm.weight', 'decoder.block.21.layer.0.SelfAttention.v.weight', 'decoder.block.16.layer.0.SelfAttention.k.weight', 'decoder.block.3.layer.0.layer_norm.weight', 'decoder.block.10.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.2.DenseReluDense.wo.weight', 'decoder.block.21.layer.0.SelfAttention.k.weight', 'decoder.block.16.layer.1.EncDecAttention.k.weight', 'decoder.block.7.layer.0.SelfAttention.k.weight', 'decoder.block.7.layer.1.EncDecAttention.q.weight', 'decoder.block.11.layer.1.EncDecAttention.k.weight', 'decoder.block.23.layer.0.SelfAttention.k.weight', 'decoder.block.20.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.5.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.6.layer.0.SelfAttention.q.weight', 'decoder.block.22.layer.0.SelfAttention.q.weight', 'decoder.block.23.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.EncDecAttention.o.weight', 'decoder.block.19.layer.1.EncDecAttention.v.weight', 'decoder.block.13.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.3.layer.1.EncDecAttention.v.weight', 'decoder.block.13.layer.2.layer_norm.weight', 'decoder.block.16.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.19.layer.0.layer_norm.weight', 'decoder.block.17.layer.1.EncDecAttention.q.weight', 'decoder.block.21.layer.1.EncDecAttention.v.weight', 'decoder.block.17.layer.0.layer_norm.weight', 'decoder.block.5.layer.2.layer_norm.weight', 'decoder.block.20.layer.0.SelfAttention.q.weight', 'decoder.block.23.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.0.layer_norm.weight', 'decoder.block.16.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.0.SelfAttention.v.weight', 'decoder.block.9.layer.1.EncDecAttention.o.weight', 'decoder.block.4.layer.1.layer_norm.weight', 'decoder.block.5.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.12.layer.0.layer_norm.weight', 'decoder.block.5.layer.1.EncDecAttention.v.weight', 'decoder.block.12.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.8.layer.0.SelfAttention.k.weight', 'decoder.block.8.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.0.SelfAttention.v.weight', 'decoder.block.12.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.1.layer.1.EncDecAttention.k.weight', 'decoder.block.2.layer.0.SelfAttention.q.weight', 'decoder.block.5.layer.0.SelfAttention.k.weight', 'decoder.block.22.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.7.layer.0.layer_norm.weight', 'decoder.block.9.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.17.layer.0.SelfAttention.v.weight', 'decoder.block.8.layer.2.DenseReluDense.wo.weight', 'decoder.block.18.layer.1.EncDecAttention.q.weight', 'decoder.block.6.layer.1.EncDecAttention.k.weight', 'decoder.block.22.layer.2.DenseReluDense.wo.weight', 'decoder.block.9.layer.0.SelfAttention.k.weight', 'decoder.block.2.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.7.layer.2.layer_norm.weight', 'decoder.block.16.layer.1.EncDecAttention.q.weight', 'decoder.block.15.layer.0.SelfAttention.v.weight', 'decoder.final_layer_norm.weight', 'decoder.block.0.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.3.layer.1.EncDecAttention.q.weight', 'decoder.block.3.layer.1.layer_norm.weight', 'decoder.block.9.layer.0.SelfAttention.q.weight', 'decoder.block.1.layer.1.layer_norm.weight', 'decoder.block.14.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.10.layer.0.SelfAttention.k.weight', 'decoder.block.14.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.1.layer_norm.weight', 'decoder.block.9.layer.0.SelfAttention.o.weight', 'decoder.block.19.layer.2.layer_norm.weight', 'decoder.block.1.layer.2.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.6.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.EncDecAttention.v.weight', 'decoder.block.5.layer.1.layer_norm.weight', 'decoder.block.12.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.1.EncDecAttention.k.weight', 'decoder.block.23.layer.0.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.v.weight', 'decoder.block.13.layer.0.SelfAttention.k.weight', 'decoder.block.20.layer.2.layer_norm.weight', 'decoder.block.21.layer.1.EncDecAttention.q.weight', 'decoder.block.3.layer.2.DenseReluDense.wo.weight', 'decoder.block.1.layer.2.DenseReluDense.wo.weight', 'decoder.block.21.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.22.layer.1.layer_norm.weight', 'decoder.block.10.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.23.layer.1.EncDecAttention.v.weight', 'decoder.block.15.layer.0.SelfAttention.q.weight', 'decoder.block.2.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.q.weight', 'decoder.block.21.layer.0.layer_norm.weight', 'decoder.block.14.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.5.layer.1.EncDecAttention.q.weight', 'decoder.block.8.layer.2.layer_norm.weight', 'decoder.block.16.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.1.EncDecAttention.q.weight', 'decoder.block.12.layer.0.SelfAttention.o.weight', 'decoder.block.12.layer.1.EncDecAttention.q.weight', 'decoder.block.21.layer.0.SelfAttention.q.weight', 'decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.8.layer.1.EncDecAttention.v.weight', 'decoder.block.6.layer.2.DenseReluDense.wi_1.weight'] |
|
- This IS expected if you are initializing T5EncoderModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). |
|
- This IS NOT expected if you are initializing T5EncoderModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). |
|
Found cached dataset wikitext (/root/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) |
|
Found cached dataset wikitext (/root/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) |
|
Token indices sequence length is longer than the specified maximum sequence length for this model (2837981 > 512). Running this sequence through the model will result in indexing errors |
|
Starting ... |
|
Ready. |
|
0 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.80 |
|
error 137.22543334960938 |
|
0 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 11656.236328125 |
|
0 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 10592.220703125 |
|
0 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 120966.59375 |
|
0 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 38126.375 |
|
0 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 32506.427734375 |
|
0 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.81 |
|
error 214925.140625 |
|
1 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.27 |
|
error 253.24050903320312 |
|
1 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.01 |
|
error 15095.802734375 |
|
1 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.03 |
|
error 4179.1083984375 |
|
1 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 20773.45703125 |
|
1 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.03 |
|
error 28934.0859375 |
|
1 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.05 |
|
error 24144.3125 |
|
1 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.75 |
|
error 97274.90625 |
|
2 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.34 |
|
error 205.71896362304688 |
|
2 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.05 |
|
error 10929.7021484375 |
|
2 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.06 |
|
error 3825.074462890625 |
|
2 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.02 |
|
error 2498.05859375 |
|
2 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.03 |
|
error 42947.859375 |
|
2 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.03 |
|
error 36752.1171875 |
|
2 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.71 |
|
error 135178.4375 |
|
3 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.31 |
|
error 263.6244201660156 |
|
3 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.06 |
|
error 13956.330078125 |
|
3 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.06 |
|
error 5999.3544921875 |
|
3 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.05 |
|
error 5389.494140625 |
|
3 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.10 |
|
error 43406.984375 |
|
3 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.07 |
|
error 40294.578125 |
|
3 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.80 |
|
error 136006.0 |
|
4 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.30 |
|
error 300.17022705078125 |
|
4 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 16043.65234375 |
|
4 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.03 |
|
error 6112.3857421875 |
|
4 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 4162.61474609375 |
|
4 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.06 |
|
error 44532.5625 |
|
4 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.07 |
|
error 42825.140625 |
|
4 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.88 |
|
error 165037.09375 |
|
5 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.28 |
|
error 352.9566650390625 |
|
5 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 19099.544921875 |
|
5 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.02 |
|
error 6900.2197265625 |
|
5 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 14074.9541015625 |
|
5 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 38257.37109375 |
|
5 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 36839.3046875 |
|
5 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.76 |
|
error 132062.96875 |
|
6 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.33 |
|
error 385.77520751953125 |
|
6 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.06 |
|
error 22221.486328125 |
|
6 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.02 |
|
error 7855.71533203125 |
|
6 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.04 |
|
error 20587.6171875 |
|
6 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 34824.55078125 |
|
6 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.05 |
|
error 36079.15625 |
|
6 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.74 |
|
error 166183.125 |
|
7 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.32 |
|
error 304.88519287109375 |
|
7 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.05 |
|
error 21111.80859375 |
|
7 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.05 |
|
error 5978.3095703125 |
|
7 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.08 |
|
error 10927.888671875 |
|
7 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.07 |
|
error 29760.138671875 |
|
7 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.08 |
|
error 33814.875 |
|
7 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.73 |
|
error 175563.4375 |
|
8 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.30 |
|
error 333.85931396484375 |
|
8 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 24634.984375 |
|
8 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.03 |
|
error 7116.8212890625 |
|
8 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.07 |
|
error 15384.3369140625 |
|
8 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.07 |
|
error 28838.537109375 |
|
8 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.09 |
|
error 29991.21875 |
|
8 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.85 |
|
error 170053.9375 |
|
9 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.27 |
|
error 354.49725341796875 |
|
9 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.02 |
|
error 26472.80078125 |
|
9 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.02 |
|
error 9778.65234375 |
|
9 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 46135.9140625 |
|
9 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 30183.34765625 |
|
9 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.05 |
|
error 35315.9375 |
|
9 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.80 |
|
error 294261.34375 |
|
10 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.36 |
|
error 330.4294128417969 |
|
10 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.04 |
|
error 21810.806640625 |
|
10 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.03 |
|
error 7377.060546875 |
|
10 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 31458.453125 |
|
10 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 30981.423828125 |
|
10 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.05 |
|
error 45770.9140625 |
|
10 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.73 |
|
error 338105.5625 |
|
11 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.35 |
|
error 332.6951904296875 |
|
11 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.06 |
|
error 23045.384765625 |
|
11 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.07 |
|
error 9068.484375 |
|
11 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.09 |
|
error 39716.03125 |
|
11 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 29951.611328125 |
|
11 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.06 |
|
error 46667.8828125 |
|
11 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.76 |
|
error 458927.0 |
|
12 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.29 |
|
error 364.91387939453125 |
|
12 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 26386.5546875 |
|
12 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.08 |
|
error 10412.025390625 |
|
12 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.07 |
|
error 69506.734375 |
|
12 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.08 |
|
error 32437.169921875 |
|
12 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.13 |
|
error 54537.1328125 |
|
12 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.81 |
|
error 555848.125 |
|
13 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.28 |
|
error 334.4095153808594 |
|
13 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.04 |
|
error 24624.59375 |
|
13 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 11093.2373046875 |
|
13 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.02 |
|
error 73139.5859375 |
|
13 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.06 |
|
error 31185.44921875 |
|
13 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.08 |
|
error 63193.28125 |
|
13 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.84 |
|
error 484003.5 |
|
14 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.33 |
|
error 315.36883544921875 |
|
14 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.02 |
|
error 22693.66015625 |
|
14 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 11054.283203125 |
|
14 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.04 |
|
error 55301.96875 |
|
14 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.06 |
|
error 35040.09765625 |
|
14 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 69227.671875 |
|
14 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.76 |
|
error 538346.875 |
|
15 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.31 |
|
error 305.54083251953125 |
|
15 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.05 |
|
error 22575.48046875 |
|
15 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.10 |
|
error 14035.61328125 |
|
15 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 100519.5234375 |
|
15 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.04 |
|
error 34874.54296875 |
|
15 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 76981.28125 |
|
15 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.75 |
|
error 590792.75 |
|
16 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.30 |
|
error 292.1910095214844 |
|
16 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.10 |
|
error 24363.197265625 |
|
16 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.08 |
|
error 17756.51953125 |
|
16 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.09 |
|
error 189057.78125 |
|
16 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.07 |
|
error 35124.7109375 |
|
16 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.09 |
|
error 87091.78125 |
|
16 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.81 |
|
error 1044289.5625 |
|
17 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.28 |
|
error 261.1668701171875 |
|
17 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.02 |
|
error 18598.86328125 |
|
17 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.03 |
|
error 18718.98046875 |
|
17 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.04 |
|
error 254419.0625 |
|
17 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.07 |
|
error 35458.671875 |
|
17 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.10 |
|
error 88659.0390625 |
|
17 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.87 |
|
error 1568064.75 |
|
18 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.31 |
|
error 282.4662780761719 |
|
18 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 19631.552734375 |
|
18 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.06 |
|
error 21855.74609375 |
|
18 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.05 |
|
error 451241.28125 |
|
18 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.04 |
|
error 35819.91015625 |
|
18 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 96373.1015625 |
|
18 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.75 |
|
error 4121681.25 |
|
19 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.33 |
|
error 222.93960571289062 |
|
19 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.08 |
|
error 15299.37890625 |
|
19 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 25438.86328125 |
|
19 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.05 |
|
error 1097173.0 |
|
19 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.06 |
|
error 34149.09375 |
|
19 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 90188.0078125 |
|
19 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.74 |
|
error 6266101.0 |
|
20 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.35 |
|
error 211.04458618164062 |
|
20 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.04 |
|
error 13809.572265625 |
|
20 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.06 |
|
error 29788.564453125 |
|
20 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.05 |
|
error 1334543.125 |
|
20 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.09 |
|
error 31375.771484375 |
|
20 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.08 |
|
error 78350.203125 |
|
20 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.74 |
|
error 7183110.0 |
|
21 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.30 |
|
error 194.26229858398438 |
|
21 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.04 |
|
error 14619.9853515625 |
|
21 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 38181.265625 |
|
21 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.05 |
|
error 1776184.0 |
|
21 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.12 |
|
error 30981.5625 |
|
21 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.09 |
|
error 77552.046875 |
|
21 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.83 |
|
error 9851391.0 |
|
22 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.29 |
|
error 196.11984252929688 |
|
22 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 12573.25 |
|
22 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 43983.0703125 |
|
22 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.03 |
|
error 1969925.5 |
|
22 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 42481.56640625 |
|
22 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 106760.0078125 |
|
22 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.84 |
|
error 15271906.0 |
|
23 layer.0.SelfAttention.q |
|
Quantizing ... |
|
time 2.39 |
|
error 213.98135375976562 |
|
23 layer.0.SelfAttention.k |
|
Quantizing ... |
|
time 1.03 |
|
error 14789.1396484375 |
|
23 layer.0.SelfAttention.v |
|
Quantizing ... |
|
time 1.04 |
|
error 57604.91015625 |
|
23 layer.0.SelfAttention.o |
|
Quantizing ... |
|
time 1.02 |
|
error 2114846.25 |
|
23 layer.1.DenseReluDense.wi_0 |
|
Quantizing ... |
|
time 1.05 |
|
error 41047.03125 |
|
23 layer.1.DenseReluDense.wi_1 |
|
Quantizing ... |
|
time 1.04 |
|
error 83152.765625 |
|
23 layer.1.DenseReluDense.wo |
|
Quantizing ... |
|
time 2.75 |
|
error 13002426.0 |
|
728.4299275875092 |
|
Packing ... |
|
encoder.block.0.layer.0.SelfAttention.q |
|
encoder.block.0.layer.0.SelfAttention.k |
|
encoder.block.0.layer.0.SelfAttention.v |
|
encoder.block.0.layer.0.SelfAttention.o |
|
encoder.block.0.layer.1.DenseReluDense.wi_0 |
|
encoder.block.0.layer.1.DenseReluDense.wi_1 |
|
encoder.block.0.layer.1.DenseReluDense.wo |
|
encoder.block.1.layer.0.SelfAttention.q |
|
encoder.block.1.layer.0.SelfAttention.k |
|
encoder.block.1.layer.0.SelfAttention.v |
|
encoder.block.1.layer.0.SelfAttention.o |
|
encoder.block.1.layer.1.DenseReluDense.wi_0 |
|
encoder.block.1.layer.1.DenseReluDense.wi_1 |
|
encoder.block.1.layer.1.DenseReluDense.wo |
|
encoder.block.2.layer.0.SelfAttention.q |
|
encoder.block.2.layer.0.SelfAttention.k |
|
encoder.block.2.layer.0.SelfAttention.v |
|
encoder.block.2.layer.0.SelfAttention.o |
|
encoder.block.2.layer.1.DenseReluDense.wi_0 |
|
encoder.block.2.layer.1.DenseReluDense.wi_1 |
|
encoder.block.2.layer.1.DenseReluDense.wo |
|
encoder.block.3.layer.0.SelfAttention.q |
|
encoder.block.3.layer.0.SelfAttention.k |
|
encoder.block.3.layer.0.SelfAttention.v |
|
encoder.block.3.layer.0.SelfAttention.o |
|
encoder.block.3.layer.1.DenseReluDense.wi_0 |
|
encoder.block.3.layer.1.DenseReluDense.wi_1 |
|
encoder.block.3.layer.1.DenseReluDense.wo |
|
encoder.block.4.layer.0.SelfAttention.q |
|
encoder.block.4.layer.0.SelfAttention.k |
|
encoder.block.4.layer.0.SelfAttention.v |
|
encoder.block.4.layer.0.SelfAttention.o |
|
encoder.block.4.layer.1.DenseReluDense.wi_0 |
|
encoder.block.4.layer.1.DenseReluDense.wi_1 |
|
encoder.block.4.layer.1.DenseReluDense.wo |
|
encoder.block.5.layer.0.SelfAttention.q |
|
encoder.block.5.layer.0.SelfAttention.k |
|
encoder.block.5.layer.0.SelfAttention.v |
|
encoder.block.5.layer.0.SelfAttention.o |
|
encoder.block.5.layer.1.DenseReluDense.wi_0 |
|
encoder.block.5.layer.1.DenseReluDense.wi_1 |
|
encoder.block.5.layer.1.DenseReluDense.wo |
|
encoder.block.6.layer.0.SelfAttention.q |
|
encoder.block.6.layer.0.SelfAttention.k |
|
encoder.block.6.layer.0.SelfAttention.v |
|
encoder.block.6.layer.0.SelfAttention.o |
|
encoder.block.6.layer.1.DenseReluDense.wi_0 |
|
encoder.block.6.layer.1.DenseReluDense.wi_1 |
|
encoder.block.6.layer.1.DenseReluDense.wo |
|
encoder.block.7.layer.0.SelfAttention.q |
|
encoder.block.7.layer.0.SelfAttention.k |
|
encoder.block.7.layer.0.SelfAttention.v |
|
encoder.block.7.layer.0.SelfAttention.o |
|
encoder.block.7.layer.1.DenseReluDense.wi_0 |
|
encoder.block.7.layer.1.DenseReluDense.wi_1 |
|
encoder.block.7.layer.1.DenseReluDense.wo |
|
encoder.block.8.layer.0.SelfAttention.q |
|
encoder.block.8.layer.0.SelfAttention.k |
|
encoder.block.8.layer.0.SelfAttention.v |
|
encoder.block.8.layer.0.SelfAttention.o |
|
encoder.block.8.layer.1.DenseReluDense.wi_0 |
|
encoder.block.8.layer.1.DenseReluDense.wi_1 |
|
encoder.block.8.layer.1.DenseReluDense.wo |
|
encoder.block.9.layer.0.SelfAttention.q |
|
encoder.block.9.layer.0.SelfAttention.k |
|
encoder.block.9.layer.0.SelfAttention.v |
|
encoder.block.9.layer.0.SelfAttention.o |
|
encoder.block.9.layer.1.DenseReluDense.wi_0 |
|
encoder.block.9.layer.1.DenseReluDense.wi_1 |
|
encoder.block.9.layer.1.DenseReluDense.wo |
|
encoder.block.10.layer.0.SelfAttention.q |
|
encoder.block.10.layer.0.SelfAttention.k |
|
encoder.block.10.layer.0.SelfAttention.v |
|
encoder.block.10.layer.0.SelfAttention.o |
|
encoder.block.10.layer.1.DenseReluDense.wi_0 |
|
encoder.block.10.layer.1.DenseReluDense.wi_1 |
|
encoder.block.10.layer.1.DenseReluDense.wo |
|
encoder.block.11.layer.0.SelfAttention.q |
|
encoder.block.11.layer.0.SelfAttention.k |
|
encoder.block.11.layer.0.SelfAttention.v |
|
encoder.block.11.layer.0.SelfAttention.o |
|
encoder.block.11.layer.1.DenseReluDense.wi_0 |
|
encoder.block.11.layer.1.DenseReluDense.wi_1 |
|
encoder.block.11.layer.1.DenseReluDense.wo |
|
encoder.block.12.layer.0.SelfAttention.q |
|
encoder.block.12.layer.0.SelfAttention.k |
|
encoder.block.12.layer.0.SelfAttention.v |
|
encoder.block.12.layer.0.SelfAttention.o |
|
encoder.block.12.layer.1.DenseReluDense.wi_0 |
|
encoder.block.12.layer.1.DenseReluDense.wi_1 |
|
encoder.block.12.layer.1.DenseReluDense.wo |
|
encoder.block.13.layer.0.SelfAttention.q |
|
encoder.block.13.layer.0.SelfAttention.k |
|
encoder.block.13.layer.0.SelfAttention.v |
|
encoder.block.13.layer.0.SelfAttention.o |
|
encoder.block.13.layer.1.DenseReluDense.wi_0 |
|
encoder.block.13.layer.1.DenseReluDense.wi_1 |
|
encoder.block.13.layer.1.DenseReluDense.wo |
|
encoder.block.14.layer.0.SelfAttention.q |
|
encoder.block.14.layer.0.SelfAttention.k |
|
encoder.block.14.layer.0.SelfAttention.v |
|
encoder.block.14.layer.0.SelfAttention.o |
|
encoder.block.14.layer.1.DenseReluDense.wi_0 |
|
encoder.block.14.layer.1.DenseReluDense.wi_1 |
|
encoder.block.14.layer.1.DenseReluDense.wo |
|
encoder.block.15.layer.0.SelfAttention.q |
|
encoder.block.15.layer.0.SelfAttention.k |
|
encoder.block.15.layer.0.SelfAttention.v |
|
encoder.block.15.layer.0.SelfAttention.o |
|
encoder.block.15.layer.1.DenseReluDense.wi_0 |
|
encoder.block.15.layer.1.DenseReluDense.wi_1 |
|
encoder.block.15.layer.1.DenseReluDense.wo |
|
encoder.block.16.layer.0.SelfAttention.q |
|
encoder.block.16.layer.0.SelfAttention.k |
|
encoder.block.16.layer.0.SelfAttention.v |
|
encoder.block.16.layer.0.SelfAttention.o |
|
encoder.block.16.layer.1.DenseReluDense.wi_0 |
|
encoder.block.16.layer.1.DenseReluDense.wi_1 |
|
encoder.block.16.layer.1.DenseReluDense.wo |
|
encoder.block.17.layer.0.SelfAttention.q |
|
encoder.block.17.layer.0.SelfAttention.k |
|
encoder.block.17.layer.0.SelfAttention.v |
|
encoder.block.17.layer.0.SelfAttention.o |
|
encoder.block.17.layer.1.DenseReluDense.wi_0 |
|
encoder.block.17.layer.1.DenseReluDense.wi_1 |
|
encoder.block.17.layer.1.DenseReluDense.wo |
|
encoder.block.18.layer.0.SelfAttention.q |
|
encoder.block.18.layer.0.SelfAttention.k |
|
encoder.block.18.layer.0.SelfAttention.v |
|
encoder.block.18.layer.0.SelfAttention.o |
|
encoder.block.18.layer.1.DenseReluDense.wi_0 |
|
encoder.block.18.layer.1.DenseReluDense.wi_1 |
|
encoder.block.18.layer.1.DenseReluDense.wo |
|
encoder.block.19.layer.0.SelfAttention.q |
|
encoder.block.19.layer.0.SelfAttention.k |
|
encoder.block.19.layer.0.SelfAttention.v |
|
encoder.block.19.layer.0.SelfAttention.o |
|
encoder.block.19.layer.1.DenseReluDense.wi_0 |
|
encoder.block.19.layer.1.DenseReluDense.wi_1 |
|
encoder.block.19.layer.1.DenseReluDense.wo |
|
encoder.block.20.layer.0.SelfAttention.q |
|
encoder.block.20.layer.0.SelfAttention.k |
|
encoder.block.20.layer.0.SelfAttention.v |
|
encoder.block.20.layer.0.SelfAttention.o |
|
encoder.block.20.layer.1.DenseReluDense.wi_0 |
|
encoder.block.20.layer.1.DenseReluDense.wi_1 |
|
encoder.block.20.layer.1.DenseReluDense.wo |
|
encoder.block.21.layer.0.SelfAttention.q |
|
encoder.block.21.layer.0.SelfAttention.k |
|
encoder.block.21.layer.0.SelfAttention.v |
|
encoder.block.21.layer.0.SelfAttention.o |
|
encoder.block.21.layer.1.DenseReluDense.wi_0 |
|
encoder.block.21.layer.1.DenseReluDense.wi_1 |
|
encoder.block.21.layer.1.DenseReluDense.wo |
|
encoder.block.22.layer.0.SelfAttention.q |
|
encoder.block.22.layer.0.SelfAttention.k |
|
encoder.block.22.layer.0.SelfAttention.v |
|
encoder.block.22.layer.0.SelfAttention.o |
|
encoder.block.22.layer.1.DenseReluDense.wi_0 |
|
encoder.block.22.layer.1.DenseReluDense.wi_1 |
|
encoder.block.22.layer.1.DenseReluDense.wo |
|
encoder.block.23.layer.0.SelfAttention.q |
|
encoder.block.23.layer.0.SelfAttention.k |
|
encoder.block.23.layer.0.SelfAttention.v |
|
encoder.block.23.layer.0.SelfAttention.o |
|
encoder.block.23.layer.1.DenseReluDense.wi_0 |
|
encoder.block.23.layer.1.DenseReluDense.wi_1 |
|
encoder.block.23.layer.1.DenseReluDense.wo |
|
Done. |
|
|
|
|