CUDA extension not installed. Some weights of the model checkpoint at google/t5-v1_1-xxl were not used when initializing T5EncoderModel: ['decoder.block.6.layer.1.layer_norm.weight', 'decoder.block.10.layer.1.layer_norm.weight', 'decoder.block.17.layer.2.layer_norm.weight', 'decoder.block.13.layer.0.SelfAttention.v.weight', 'decoder.block.19.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.2.layer_norm.weight', 'decoder.block.23.layer.0.SelfAttention.q.weight', 'decoder.block.20.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.17.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.7.layer.1.EncDecAttention.k.weight', 'decoder.block.20.layer.0.SelfAttention.k.weight', 'decoder.block.18.layer.2.DenseReluDense.wo.weight', 'decoder.block.16.layer.1.EncDecAttention.v.weight', 'decoder.block.3.layer.0.SelfAttention.k.weight', 'decoder.block.1.layer.0.SelfAttention.v.weight', 'decoder.block.21.layer.2.DenseReluDense.wo.weight', 'decoder.block.2.layer.1.layer_norm.weight', 'decoder.block.17.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.3.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.0.SelfAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.o.weight', 'decoder.block.11.layer.0.SelfAttention.v.weight', 'decoder.block.1.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.1.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.9.layer.1.layer_norm.weight', 'decoder.block.11.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.1.EncDecAttention.k.weight', 'decoder.block.22.layer.0.SelfAttention.o.weight', 'decoder.block.19.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.14.layer.2.DenseReluDense.wo.weight', 'decoder.block.2.layer.0.SelfAttention.k.weight', 'decoder.embed_tokens.weight', 'decoder.block.6.layer.0.layer_norm.weight', 'decoder.block.3.layer.2.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.k.weight', 'decoder.block.0.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.2.DenseReluDense.wo.weight', 'decoder.block.18.layer.0.layer_norm.weight', 'decoder.block.9.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.0.SelfAttention.q.weight', 'decoder.block.15.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.6.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.v.weight', 'decoder.block.17.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.1.layer_norm.weight', 'decoder.block.18.layer.2.layer_norm.weight', 'decoder.block.12.layer.2.layer_norm.weight', 'decoder.block.2.layer.1.EncDecAttention.o.weight', 'decoder.block.6.layer.1.EncDecAttention.o.weight', 'decoder.block.17.layer.1.EncDecAttention.o.weight', 'decoder.block.3.layer.1.EncDecAttention.o.weight', 'decoder.block.18.layer.1.EncDecAttention.v.weight', 'decoder.block.15.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.13.layer.2.DenseReluDense.wo.weight', 'decoder.block.1.layer.0.layer_norm.weight', 'decoder.block.15.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.2.layer.0.SelfAttention.o.weight', 'decoder.block.17.layer.1.EncDecAttention.k.weight', 'decoder.block.14.layer.2.layer_norm.weight', 'decoder.block.17.layer.0.SelfAttention.k.weight', 'decoder.block.3.layer.0.SelfAttention.q.weight', 'decoder.block.14.layer.0.SelfAttention.v.weight', 'decoder.block.6.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.1.EncDecAttention.o.weight', 'decoder.block.15.layer.0.SelfAttention.o.weight', 'decoder.block.18.layer.0.SelfAttention.v.weight', 'decoder.block.1.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.1.EncDecAttention.v.weight', 'decoder.block.1.layer.0.SelfAttention.q.weight', 'decoder.block.8.layer.0.layer_norm.weight', 'decoder.block.16.layer.2.layer_norm.weight', 'decoder.block.7.layer.1.EncDecAttention.v.weight', 'decoder.block.12.layer.1.EncDecAttention.k.weight', 'decoder.block.17.layer.1.EncDecAttention.v.weight', 'decoder.block.23.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.0.SelfAttention.k.weight', 'decoder.block.3.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.17.layer.1.layer_norm.weight', 'decoder.block.2.layer.1.EncDecAttention.k.weight', 'decoder.block.10.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.12.layer.1.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.o.weight', 'decoder.block.9.layer.2.layer_norm.weight', 'decoder.block.1.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.13.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.0.SelfAttention.v.weight', 'decoder.block.2.layer.2.DenseReluDense.wo.weight', 'decoder.block.4.layer.1.EncDecAttention.v.weight', 'decoder.block.19.layer.0.SelfAttention.q.weight', 'decoder.block.12.layer.2.DenseReluDense.wo.weight', 'decoder.block.4.layer.2.layer_norm.weight', 'decoder.block.9.layer.1.EncDecAttention.v.weight', 'decoder.block.13.layer.0.SelfAttention.q.weight', 'decoder.block.4.layer.0.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.q.weight', 'decoder.block.16.layer.1.EncDecAttention.o.weight', 'decoder.block.6.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.0.SelfAttention.k.weight', 'decoder.block.8.layer.1.EncDecAttention.q.weight', 'decoder.block.17.layer.0.SelfAttention.q.weight', 'decoder.block.5.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.22.layer.0.SelfAttention.v.weight', 'decoder.block.14.layer.1.layer_norm.weight', 'decoder.block.15.layer.1.EncDecAttention.k.weight', 'decoder.block.21.layer.2.layer_norm.weight', 'decoder.block.21.layer.1.layer_norm.weight', 'decoder.block.10.layer.1.EncDecAttention.o.weight', 'decoder.block.11.layer.1.EncDecAttention.q.weight', 'decoder.block.16.layer.0.layer_norm.weight', 'decoder.block.11.layer.0.SelfAttention.o.weight', 'decoder.block.5.layer.0.SelfAttention.v.weight', 'decoder.block.20.layer.1.EncDecAttention.v.weight', 'decoder.block.2.layer.2.layer_norm.weight', 'decoder.block.15.layer.1.EncDecAttention.q.weight', 'decoder.block.13.layer.0.SelfAttention.o.weight', 'decoder.block.5.layer.0.layer_norm.weight', 'decoder.block.6.layer.1.EncDecAttention.v.weight', 'decoder.block.23.layer.1.EncDecAttention.q.weight', 'decoder.block.18.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.5.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.0.SelfAttention.v.weight', 'decoder.block.8.layer.0.SelfAttention.o.weight', 'decoder.block.23.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.1.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.q.weight', 'decoder.block.20.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.1.EncDecAttention.q.weight', 'decoder.block.15.layer.0.layer_norm.weight', 'decoder.block.8.layer.1.EncDecAttention.k.weight', 'decoder.block.21.layer.0.SelfAttention.o.weight', 'decoder.block.4.layer.1.EncDecAttention.o.weight', 'decoder.block.1.layer.0.SelfAttention.k.weight', 'decoder.block.19.layer.1.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.1.EncDecAttention.k.weight', 'decoder.block.20.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.0.SelfAttention.o.weight', 'decoder.block.1.layer.1.EncDecAttention.o.weight', 'decoder.block.18.layer.1.EncDecAttention.k.weight', 'lm_head.weight', 'decoder.block.2.layer.0.layer_norm.weight', 'decoder.block.14.layer.1.EncDecAttention.v.weight', 'decoder.block.10.layer.0.layer_norm.weight', 'decoder.block.11.layer.0.SelfAttention.k.weight', 'decoder.block.18.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.0.layer.1.EncDecAttention.v.weight', 'decoder.block.0.layer.2.layer_norm.weight', 'decoder.block.23.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.20.layer.1.layer_norm.weight', 'decoder.block.20.layer.1.EncDecAttention.k.weight', 'decoder.block.15.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.1.EncDecAttention.o.weight', 'decoder.block.13.layer.1.layer_norm.weight', 'decoder.block.7.layer.2.DenseReluDense.wo.weight', 'decoder.block.10.layer.2.layer_norm.weight', 'decoder.block.0.layer.0.SelfAttention.k.weight', 'decoder.block.19.layer.0.SelfAttention.k.weight', 'decoder.block.8.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.16.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.5.layer.0.SelfAttention.q.weight', 'decoder.block.18.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.20.layer.0.SelfAttention.o.weight', 'decoder.block.6.layer.0.SelfAttention.v.weight', 'decoder.block.14.layer.0.SelfAttention.q.weight', 'decoder.block.13.layer.1.EncDecAttention.o.weight', 'decoder.block.19.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.21.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.0.SelfAttention.o.weight', 'decoder.block.15.layer.2.layer_norm.weight', 'decoder.block.18.layer.0.SelfAttention.q.weight', 'decoder.block.7.layer.1.layer_norm.weight', 'decoder.block.4.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.4.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.0.layer_norm.weight', 'decoder.block.7.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.2.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.22.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.12.layer.1.EncDecAttention.v.weight', 'decoder.block.11.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.4.layer.1.EncDecAttention.q.weight', 'decoder.block.5.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.1.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.v.weight', 'decoder.block.19.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.1.layer_norm.weight', 'decoder.block.20.layer.0.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.layer_norm.weight', 'decoder.block.7.layer.0.SelfAttention.v.weight', 'decoder.block.3.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.6.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.16.layer.0.SelfAttention.q.weight', 'decoder.block.21.layer.1.EncDecAttention.k.weight', 'decoder.block.3.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.1.EncDecAttention.q.weight', 'decoder.block.6.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.0.SelfAttention.v.weight', 'decoder.block.11.layer.0.layer_norm.weight', 'decoder.block.22.layer.1.EncDecAttention.v.weight', 'decoder.block.19.layer.2.DenseReluDense.wo.weight', 'decoder.block.0.layer.1.EncDecAttention.q.weight', 'decoder.block.15.layer.1.layer_norm.weight', 'decoder.block.4.layer.2.DenseReluDense.wo.weight', 'decoder.block.8.layer.0.SelfAttention.v.weight', 'decoder.block.18.layer.1.EncDecAttention.o.weight', 'decoder.block.4.layer.0.SelfAttention.k.weight', 'decoder.block.15.layer.1.EncDecAttention.v.weight', 'decoder.block.5.layer.0.SelfAttention.o.weight', 'decoder.block.1.layer.1.EncDecAttention.v.weight', 'decoder.block.2.layer.0.SelfAttention.v.weight', 'decoder.block.7.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.2.layer.1.EncDecAttention.v.weight', 'decoder.block.14.layer.0.layer_norm.weight', 'decoder.block.15.layer.0.SelfAttention.k.weight', 'decoder.block.22.layer.1.EncDecAttention.o.weight', 'decoder.block.21.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.14.layer.1.EncDecAttention.q.weight', 'decoder.block.7.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.0.SelfAttention.q.weight', 'decoder.block.4.layer.0.SelfAttention.q.weight', 'decoder.block.3.layer.0.SelfAttention.v.weight', 'decoder.block.13.layer.0.layer_norm.weight', 'decoder.block.21.layer.0.SelfAttention.v.weight', 'decoder.block.16.layer.0.SelfAttention.k.weight', 'decoder.block.3.layer.0.layer_norm.weight', 'decoder.block.10.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.2.DenseReluDense.wo.weight', 'decoder.block.21.layer.0.SelfAttention.k.weight', 'decoder.block.16.layer.1.EncDecAttention.k.weight', 'decoder.block.7.layer.0.SelfAttention.k.weight', 'decoder.block.7.layer.1.EncDecAttention.q.weight', 'decoder.block.11.layer.1.EncDecAttention.k.weight', 'decoder.block.23.layer.0.SelfAttention.k.weight', 'decoder.block.20.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.5.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.6.layer.0.SelfAttention.q.weight', 'decoder.block.22.layer.0.SelfAttention.q.weight', 'decoder.block.23.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.EncDecAttention.o.weight', 'decoder.block.19.layer.1.EncDecAttention.v.weight', 'decoder.block.13.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.3.layer.1.EncDecAttention.v.weight', 'decoder.block.13.layer.2.layer_norm.weight', 'decoder.block.16.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.19.layer.0.layer_norm.weight', 'decoder.block.17.layer.1.EncDecAttention.q.weight', 'decoder.block.21.layer.1.EncDecAttention.v.weight', 'decoder.block.17.layer.0.layer_norm.weight', 'decoder.block.5.layer.2.layer_norm.weight', 'decoder.block.20.layer.0.SelfAttention.q.weight', 'decoder.block.23.layer.0.SelfAttention.o.weight', 'decoder.block.22.layer.0.layer_norm.weight', 'decoder.block.16.layer.0.SelfAttention.o.weight', 'decoder.block.9.layer.0.SelfAttention.v.weight', 'decoder.block.9.layer.1.EncDecAttention.o.weight', 'decoder.block.4.layer.1.layer_norm.weight', 'decoder.block.5.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.12.layer.0.layer_norm.weight', 'decoder.block.5.layer.1.EncDecAttention.v.weight', 'decoder.block.12.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.8.layer.0.SelfAttention.k.weight', 'decoder.block.8.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.1.EncDecAttention.k.weight', 'decoder.block.16.layer.0.SelfAttention.v.weight', 'decoder.block.12.layer.1.EncDecAttention.o.weight', 'decoder.block.8.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.1.layer.1.EncDecAttention.k.weight', 'decoder.block.2.layer.0.SelfAttention.q.weight', 'decoder.block.5.layer.0.SelfAttention.k.weight', 'decoder.block.22.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.7.layer.0.layer_norm.weight', 'decoder.block.9.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.17.layer.0.SelfAttention.v.weight', 'decoder.block.8.layer.2.DenseReluDense.wo.weight', 'decoder.block.18.layer.1.EncDecAttention.q.weight', 'decoder.block.6.layer.1.EncDecAttention.k.weight', 'decoder.block.22.layer.2.DenseReluDense.wo.weight', 'decoder.block.9.layer.0.SelfAttention.k.weight', 'decoder.block.2.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.7.layer.2.layer_norm.weight', 'decoder.block.16.layer.1.EncDecAttention.q.weight', 'decoder.block.15.layer.0.SelfAttention.v.weight', 'decoder.final_layer_norm.weight', 'decoder.block.0.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.3.layer.1.EncDecAttention.q.weight', 'decoder.block.3.layer.1.layer_norm.weight', 'decoder.block.9.layer.0.SelfAttention.q.weight', 'decoder.block.1.layer.1.layer_norm.weight', 'decoder.block.14.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.10.layer.0.SelfAttention.k.weight', 'decoder.block.14.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.1.layer_norm.weight', 'decoder.block.9.layer.0.SelfAttention.o.weight', 'decoder.block.19.layer.2.layer_norm.weight', 'decoder.block.1.layer.2.layer_norm.weight', 'decoder.block.13.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.2.DenseReluDense.wo.weight', 'decoder.block.14.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.6.layer.2.layer_norm.weight', 'decoder.block.11.layer.1.EncDecAttention.v.weight', 'decoder.block.5.layer.1.layer_norm.weight', 'decoder.block.12.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.23.layer.1.EncDecAttention.k.weight', 'decoder.block.23.layer.0.layer_norm.weight', 'decoder.block.12.layer.0.SelfAttention.v.weight', 'decoder.block.13.layer.0.SelfAttention.k.weight', 'decoder.block.20.layer.2.layer_norm.weight', 'decoder.block.21.layer.1.EncDecAttention.q.weight', 'decoder.block.3.layer.2.DenseReluDense.wo.weight', 'decoder.block.1.layer.2.DenseReluDense.wo.weight', 'decoder.block.21.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.22.layer.1.layer_norm.weight', 'decoder.block.10.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.23.layer.1.EncDecAttention.v.weight', 'decoder.block.15.layer.0.SelfAttention.q.weight', 'decoder.block.2.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.0.SelfAttention.q.weight', 'decoder.block.21.layer.0.layer_norm.weight', 'decoder.block.14.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.5.layer.1.EncDecAttention.q.weight', 'decoder.block.8.layer.2.layer_norm.weight', 'decoder.block.16.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.1.EncDecAttention.q.weight', 'decoder.block.12.layer.0.SelfAttention.o.weight', 'decoder.block.12.layer.1.EncDecAttention.q.weight', 'decoder.block.21.layer.0.SelfAttention.q.weight', 'decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.8.layer.1.EncDecAttention.v.weight', 'decoder.block.6.layer.2.DenseReluDense.wi_1.weight'] - This IS expected if you are initializing T5EncoderModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing T5EncoderModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). Found cached dataset wikitext (/root/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) Found cached dataset wikitext (/root/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) Token indices sequence length is longer than the specified maximum sequence length for this model (2837981 > 512). Running this sequence through the model will result in indexing errors Starting ... Ready. 0 layer.0.SelfAttention.q Quantizing ... time 2.80 error 137.22543334960938 0 layer.0.SelfAttention.k Quantizing ... time 1.03 error 11656.236328125 0 layer.0.SelfAttention.v Quantizing ... time 1.04 error 10592.220703125 0 layer.0.SelfAttention.o Quantizing ... time 1.03 error 120966.59375 0 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 38126.375 0 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 32506.427734375 0 layer.1.DenseReluDense.wo Quantizing ... time 2.81 error 214925.140625 1 layer.0.SelfAttention.q Quantizing ... time 2.27 error 253.24050903320312 1 layer.0.SelfAttention.k Quantizing ... time 1.01 error 15095.802734375 1 layer.0.SelfAttention.v Quantizing ... time 1.03 error 4179.1083984375 1 layer.0.SelfAttention.o Quantizing ... time 1.03 error 20773.45703125 1 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.03 error 28934.0859375 1 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.05 error 24144.3125 1 layer.1.DenseReluDense.wo Quantizing ... time 2.75 error 97274.90625 2 layer.0.SelfAttention.q Quantizing ... time 2.34 error 205.71896362304688 2 layer.0.SelfAttention.k Quantizing ... time 1.05 error 10929.7021484375 2 layer.0.SelfAttention.v Quantizing ... time 1.06 error 3825.074462890625 2 layer.0.SelfAttention.o Quantizing ... time 1.02 error 2498.05859375 2 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.03 error 42947.859375 2 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.03 error 36752.1171875 2 layer.1.DenseReluDense.wo Quantizing ... time 2.71 error 135178.4375 3 layer.0.SelfAttention.q Quantizing ... time 2.31 error 263.6244201660156 3 layer.0.SelfAttention.k Quantizing ... time 1.06 error 13956.330078125 3 layer.0.SelfAttention.v Quantizing ... time 1.06 error 5999.3544921875 3 layer.0.SelfAttention.o Quantizing ... time 1.05 error 5389.494140625 3 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.10 error 43406.984375 3 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.07 error 40294.578125 3 layer.1.DenseReluDense.wo Quantizing ... time 2.80 error 136006.0 4 layer.0.SelfAttention.q Quantizing ... time 2.30 error 300.17022705078125 4 layer.0.SelfAttention.k Quantizing ... time 1.03 error 16043.65234375 4 layer.0.SelfAttention.v Quantizing ... time 1.03 error 6112.3857421875 4 layer.0.SelfAttention.o Quantizing ... time 1.03 error 4162.61474609375 4 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.06 error 44532.5625 4 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.07 error 42825.140625 4 layer.1.DenseReluDense.wo Quantizing ... time 2.88 error 165037.09375 5 layer.0.SelfAttention.q Quantizing ... time 2.28 error 352.9566650390625 5 layer.0.SelfAttention.k Quantizing ... time 1.03 error 19099.544921875 5 layer.0.SelfAttention.v Quantizing ... time 1.02 error 6900.2197265625 5 layer.0.SelfAttention.o Quantizing ... time 1.03 error 14074.9541015625 5 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 38257.37109375 5 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 36839.3046875 5 layer.1.DenseReluDense.wo Quantizing ... time 2.76 error 132062.96875 6 layer.0.SelfAttention.q Quantizing ... time 2.33 error 385.77520751953125 6 layer.0.SelfAttention.k Quantizing ... time 1.06 error 22221.486328125 6 layer.0.SelfAttention.v Quantizing ... time 1.02 error 7855.71533203125 6 layer.0.SelfAttention.o Quantizing ... time 1.04 error 20587.6171875 6 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 34824.55078125 6 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.05 error 36079.15625 6 layer.1.DenseReluDense.wo Quantizing ... time 2.74 error 166183.125 7 layer.0.SelfAttention.q Quantizing ... time 2.32 error 304.88519287109375 7 layer.0.SelfAttention.k Quantizing ... time 1.05 error 21111.80859375 7 layer.0.SelfAttention.v Quantizing ... time 1.05 error 5978.3095703125 7 layer.0.SelfAttention.o Quantizing ... time 1.08 error 10927.888671875 7 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.07 error 29760.138671875 7 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.08 error 33814.875 7 layer.1.DenseReluDense.wo Quantizing ... time 2.73 error 175563.4375 8 layer.0.SelfAttention.q Quantizing ... time 2.30 error 333.85931396484375 8 layer.0.SelfAttention.k Quantizing ... time 1.03 error 24634.984375 8 layer.0.SelfAttention.v Quantizing ... time 1.03 error 7116.8212890625 8 layer.0.SelfAttention.o Quantizing ... time 1.07 error 15384.3369140625 8 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.07 error 28838.537109375 8 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.09 error 29991.21875 8 layer.1.DenseReluDense.wo Quantizing ... time 2.85 error 170053.9375 9 layer.0.SelfAttention.q Quantizing ... time 2.27 error 354.49725341796875 9 layer.0.SelfAttention.k Quantizing ... time 1.02 error 26472.80078125 9 layer.0.SelfAttention.v Quantizing ... time 1.02 error 9778.65234375 9 layer.0.SelfAttention.o Quantizing ... time 1.03 error 46135.9140625 9 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 30183.34765625 9 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.05 error 35315.9375 9 layer.1.DenseReluDense.wo Quantizing ... time 2.80 error 294261.34375 10 layer.0.SelfAttention.q Quantizing ... time 2.36 error 330.4294128417969 10 layer.0.SelfAttention.k Quantizing ... time 1.04 error 21810.806640625 10 layer.0.SelfAttention.v Quantizing ... time 1.03 error 7377.060546875 10 layer.0.SelfAttention.o Quantizing ... time 1.03 error 31458.453125 10 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 30981.423828125 10 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.05 error 45770.9140625 10 layer.1.DenseReluDense.wo Quantizing ... time 2.73 error 338105.5625 11 layer.0.SelfAttention.q Quantizing ... time 2.35 error 332.6951904296875 11 layer.0.SelfAttention.k Quantizing ... time 1.06 error 23045.384765625 11 layer.0.SelfAttention.v Quantizing ... time 1.07 error 9068.484375 11 layer.0.SelfAttention.o Quantizing ... time 1.09 error 39716.03125 11 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 29951.611328125 11 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.06 error 46667.8828125 11 layer.1.DenseReluDense.wo Quantizing ... time 2.76 error 458927.0 12 layer.0.SelfAttention.q Quantizing ... time 2.29 error 364.91387939453125 12 layer.0.SelfAttention.k Quantizing ... time 1.03 error 26386.5546875 12 layer.0.SelfAttention.v Quantizing ... time 1.08 error 10412.025390625 12 layer.0.SelfAttention.o Quantizing ... time 1.07 error 69506.734375 12 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.08 error 32437.169921875 12 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.13 error 54537.1328125 12 layer.1.DenseReluDense.wo Quantizing ... time 2.81 error 555848.125 13 layer.0.SelfAttention.q Quantizing ... time 2.28 error 334.4095153808594 13 layer.0.SelfAttention.k Quantizing ... time 1.04 error 24624.59375 13 layer.0.SelfAttention.v Quantizing ... time 1.04 error 11093.2373046875 13 layer.0.SelfAttention.o Quantizing ... time 1.02 error 73139.5859375 13 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.06 error 31185.44921875 13 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.08 error 63193.28125 13 layer.1.DenseReluDense.wo Quantizing ... time 2.84 error 484003.5 14 layer.0.SelfAttention.q Quantizing ... time 2.33 error 315.36883544921875 14 layer.0.SelfAttention.k Quantizing ... time 1.02 error 22693.66015625 14 layer.0.SelfAttention.v Quantizing ... time 1.04 error 11054.283203125 14 layer.0.SelfAttention.o Quantizing ... time 1.04 error 55301.96875 14 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.06 error 35040.09765625 14 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 69227.671875 14 layer.1.DenseReluDense.wo Quantizing ... time 2.76 error 538346.875 15 layer.0.SelfAttention.q Quantizing ... time 2.31 error 305.54083251953125 15 layer.0.SelfAttention.k Quantizing ... time 1.05 error 22575.48046875 15 layer.0.SelfAttention.v Quantizing ... time 1.10 error 14035.61328125 15 layer.0.SelfAttention.o Quantizing ... time 1.03 error 100519.5234375 15 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.04 error 34874.54296875 15 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 76981.28125 15 layer.1.DenseReluDense.wo Quantizing ... time 2.75 error 590792.75 16 layer.0.SelfAttention.q Quantizing ... time 2.30 error 292.1910095214844 16 layer.0.SelfAttention.k Quantizing ... time 1.10 error 24363.197265625 16 layer.0.SelfAttention.v Quantizing ... time 1.08 error 17756.51953125 16 layer.0.SelfAttention.o Quantizing ... time 1.09 error 189057.78125 16 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.07 error 35124.7109375 16 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.09 error 87091.78125 16 layer.1.DenseReluDense.wo Quantizing ... time 2.81 error 1044289.5625 17 layer.0.SelfAttention.q Quantizing ... time 2.28 error 261.1668701171875 17 layer.0.SelfAttention.k Quantizing ... time 1.02 error 18598.86328125 17 layer.0.SelfAttention.v Quantizing ... time 1.03 error 18718.98046875 17 layer.0.SelfAttention.o Quantizing ... time 1.04 error 254419.0625 17 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.07 error 35458.671875 17 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.10 error 88659.0390625 17 layer.1.DenseReluDense.wo Quantizing ... time 2.87 error 1568064.75 18 layer.0.SelfAttention.q Quantizing ... time 2.31 error 282.4662780761719 18 layer.0.SelfAttention.k Quantizing ... time 1.03 error 19631.552734375 18 layer.0.SelfAttention.v Quantizing ... time 1.06 error 21855.74609375 18 layer.0.SelfAttention.o Quantizing ... time 1.05 error 451241.28125 18 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.04 error 35819.91015625 18 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 96373.1015625 18 layer.1.DenseReluDense.wo Quantizing ... time 2.75 error 4121681.25 19 layer.0.SelfAttention.q Quantizing ... time 2.33 error 222.93960571289062 19 layer.0.SelfAttention.k Quantizing ... time 1.08 error 15299.37890625 19 layer.0.SelfAttention.v Quantizing ... time 1.04 error 25438.86328125 19 layer.0.SelfAttention.o Quantizing ... time 1.05 error 1097173.0 19 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.06 error 34149.09375 19 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 90188.0078125 19 layer.1.DenseReluDense.wo Quantizing ... time 2.74 error 6266101.0 20 layer.0.SelfAttention.q Quantizing ... time 2.35 error 211.04458618164062 20 layer.0.SelfAttention.k Quantizing ... time 1.04 error 13809.572265625 20 layer.0.SelfAttention.v Quantizing ... time 1.06 error 29788.564453125 20 layer.0.SelfAttention.o Quantizing ... time 1.05 error 1334543.125 20 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.09 error 31375.771484375 20 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.08 error 78350.203125 20 layer.1.DenseReluDense.wo Quantizing ... time 2.74 error 7183110.0 21 layer.0.SelfAttention.q Quantizing ... time 2.30 error 194.26229858398438 21 layer.0.SelfAttention.k Quantizing ... time 1.04 error 14619.9853515625 21 layer.0.SelfAttention.v Quantizing ... time 1.04 error 38181.265625 21 layer.0.SelfAttention.o Quantizing ... time 1.05 error 1776184.0 21 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.12 error 30981.5625 21 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.09 error 77552.046875 21 layer.1.DenseReluDense.wo Quantizing ... time 2.83 error 9851391.0 22 layer.0.SelfAttention.q Quantizing ... time 2.29 error 196.11984252929688 22 layer.0.SelfAttention.k Quantizing ... time 1.03 error 12573.25 22 layer.0.SelfAttention.v Quantizing ... time 1.04 error 43983.0703125 22 layer.0.SelfAttention.o Quantizing ... time 1.03 error 1969925.5 22 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 42481.56640625 22 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 106760.0078125 22 layer.1.DenseReluDense.wo Quantizing ... time 2.84 error 15271906.0 23 layer.0.SelfAttention.q Quantizing ... time 2.39 error 213.98135375976562 23 layer.0.SelfAttention.k Quantizing ... time 1.03 error 14789.1396484375 23 layer.0.SelfAttention.v Quantizing ... time 1.04 error 57604.91015625 23 layer.0.SelfAttention.o Quantizing ... time 1.02 error 2114846.25 23 layer.1.DenseReluDense.wi_0 Quantizing ... time 1.05 error 41047.03125 23 layer.1.DenseReluDense.wi_1 Quantizing ... time 1.04 error 83152.765625 23 layer.1.DenseReluDense.wo Quantizing ... time 2.75 error 13002426.0 728.4299275875092 Packing ... encoder.block.0.layer.0.SelfAttention.q encoder.block.0.layer.0.SelfAttention.k encoder.block.0.layer.0.SelfAttention.v encoder.block.0.layer.0.SelfAttention.o encoder.block.0.layer.1.DenseReluDense.wi_0 encoder.block.0.layer.1.DenseReluDense.wi_1 encoder.block.0.layer.1.DenseReluDense.wo encoder.block.1.layer.0.SelfAttention.q encoder.block.1.layer.0.SelfAttention.k encoder.block.1.layer.0.SelfAttention.v encoder.block.1.layer.0.SelfAttention.o encoder.block.1.layer.1.DenseReluDense.wi_0 encoder.block.1.layer.1.DenseReluDense.wi_1 encoder.block.1.layer.1.DenseReluDense.wo encoder.block.2.layer.0.SelfAttention.q encoder.block.2.layer.0.SelfAttention.k encoder.block.2.layer.0.SelfAttention.v encoder.block.2.layer.0.SelfAttention.o encoder.block.2.layer.1.DenseReluDense.wi_0 encoder.block.2.layer.1.DenseReluDense.wi_1 encoder.block.2.layer.1.DenseReluDense.wo encoder.block.3.layer.0.SelfAttention.q encoder.block.3.layer.0.SelfAttention.k encoder.block.3.layer.0.SelfAttention.v encoder.block.3.layer.0.SelfAttention.o encoder.block.3.layer.1.DenseReluDense.wi_0 encoder.block.3.layer.1.DenseReluDense.wi_1 encoder.block.3.layer.1.DenseReluDense.wo encoder.block.4.layer.0.SelfAttention.q encoder.block.4.layer.0.SelfAttention.k encoder.block.4.layer.0.SelfAttention.v encoder.block.4.layer.0.SelfAttention.o encoder.block.4.layer.1.DenseReluDense.wi_0 encoder.block.4.layer.1.DenseReluDense.wi_1 encoder.block.4.layer.1.DenseReluDense.wo encoder.block.5.layer.0.SelfAttention.q encoder.block.5.layer.0.SelfAttention.k encoder.block.5.layer.0.SelfAttention.v encoder.block.5.layer.0.SelfAttention.o encoder.block.5.layer.1.DenseReluDense.wi_0 encoder.block.5.layer.1.DenseReluDense.wi_1 encoder.block.5.layer.1.DenseReluDense.wo encoder.block.6.layer.0.SelfAttention.q encoder.block.6.layer.0.SelfAttention.k encoder.block.6.layer.0.SelfAttention.v encoder.block.6.layer.0.SelfAttention.o encoder.block.6.layer.1.DenseReluDense.wi_0 encoder.block.6.layer.1.DenseReluDense.wi_1 encoder.block.6.layer.1.DenseReluDense.wo encoder.block.7.layer.0.SelfAttention.q encoder.block.7.layer.0.SelfAttention.k encoder.block.7.layer.0.SelfAttention.v encoder.block.7.layer.0.SelfAttention.o encoder.block.7.layer.1.DenseReluDense.wi_0 encoder.block.7.layer.1.DenseReluDense.wi_1 encoder.block.7.layer.1.DenseReluDense.wo encoder.block.8.layer.0.SelfAttention.q encoder.block.8.layer.0.SelfAttention.k encoder.block.8.layer.0.SelfAttention.v encoder.block.8.layer.0.SelfAttention.o encoder.block.8.layer.1.DenseReluDense.wi_0 encoder.block.8.layer.1.DenseReluDense.wi_1 encoder.block.8.layer.1.DenseReluDense.wo encoder.block.9.layer.0.SelfAttention.q encoder.block.9.layer.0.SelfAttention.k encoder.block.9.layer.0.SelfAttention.v encoder.block.9.layer.0.SelfAttention.o encoder.block.9.layer.1.DenseReluDense.wi_0 encoder.block.9.layer.1.DenseReluDense.wi_1 encoder.block.9.layer.1.DenseReluDense.wo encoder.block.10.layer.0.SelfAttention.q encoder.block.10.layer.0.SelfAttention.k encoder.block.10.layer.0.SelfAttention.v encoder.block.10.layer.0.SelfAttention.o encoder.block.10.layer.1.DenseReluDense.wi_0 encoder.block.10.layer.1.DenseReluDense.wi_1 encoder.block.10.layer.1.DenseReluDense.wo encoder.block.11.layer.0.SelfAttention.q encoder.block.11.layer.0.SelfAttention.k encoder.block.11.layer.0.SelfAttention.v encoder.block.11.layer.0.SelfAttention.o encoder.block.11.layer.1.DenseReluDense.wi_0 encoder.block.11.layer.1.DenseReluDense.wi_1 encoder.block.11.layer.1.DenseReluDense.wo encoder.block.12.layer.0.SelfAttention.q encoder.block.12.layer.0.SelfAttention.k encoder.block.12.layer.0.SelfAttention.v encoder.block.12.layer.0.SelfAttention.o encoder.block.12.layer.1.DenseReluDense.wi_0 encoder.block.12.layer.1.DenseReluDense.wi_1 encoder.block.12.layer.1.DenseReluDense.wo encoder.block.13.layer.0.SelfAttention.q encoder.block.13.layer.0.SelfAttention.k encoder.block.13.layer.0.SelfAttention.v encoder.block.13.layer.0.SelfAttention.o encoder.block.13.layer.1.DenseReluDense.wi_0 encoder.block.13.layer.1.DenseReluDense.wi_1 encoder.block.13.layer.1.DenseReluDense.wo encoder.block.14.layer.0.SelfAttention.q encoder.block.14.layer.0.SelfAttention.k encoder.block.14.layer.0.SelfAttention.v encoder.block.14.layer.0.SelfAttention.o encoder.block.14.layer.1.DenseReluDense.wi_0 encoder.block.14.layer.1.DenseReluDense.wi_1 encoder.block.14.layer.1.DenseReluDense.wo encoder.block.15.layer.0.SelfAttention.q encoder.block.15.layer.0.SelfAttention.k encoder.block.15.layer.0.SelfAttention.v encoder.block.15.layer.0.SelfAttention.o encoder.block.15.layer.1.DenseReluDense.wi_0 encoder.block.15.layer.1.DenseReluDense.wi_1 encoder.block.15.layer.1.DenseReluDense.wo encoder.block.16.layer.0.SelfAttention.q encoder.block.16.layer.0.SelfAttention.k encoder.block.16.layer.0.SelfAttention.v encoder.block.16.layer.0.SelfAttention.o encoder.block.16.layer.1.DenseReluDense.wi_0 encoder.block.16.layer.1.DenseReluDense.wi_1 encoder.block.16.layer.1.DenseReluDense.wo encoder.block.17.layer.0.SelfAttention.q encoder.block.17.layer.0.SelfAttention.k encoder.block.17.layer.0.SelfAttention.v encoder.block.17.layer.0.SelfAttention.o encoder.block.17.layer.1.DenseReluDense.wi_0 encoder.block.17.layer.1.DenseReluDense.wi_1 encoder.block.17.layer.1.DenseReluDense.wo encoder.block.18.layer.0.SelfAttention.q encoder.block.18.layer.0.SelfAttention.k encoder.block.18.layer.0.SelfAttention.v encoder.block.18.layer.0.SelfAttention.o encoder.block.18.layer.1.DenseReluDense.wi_0 encoder.block.18.layer.1.DenseReluDense.wi_1 encoder.block.18.layer.1.DenseReluDense.wo encoder.block.19.layer.0.SelfAttention.q encoder.block.19.layer.0.SelfAttention.k encoder.block.19.layer.0.SelfAttention.v encoder.block.19.layer.0.SelfAttention.o encoder.block.19.layer.1.DenseReluDense.wi_0 encoder.block.19.layer.1.DenseReluDense.wi_1 encoder.block.19.layer.1.DenseReluDense.wo encoder.block.20.layer.0.SelfAttention.q encoder.block.20.layer.0.SelfAttention.k encoder.block.20.layer.0.SelfAttention.v encoder.block.20.layer.0.SelfAttention.o encoder.block.20.layer.1.DenseReluDense.wi_0 encoder.block.20.layer.1.DenseReluDense.wi_1 encoder.block.20.layer.1.DenseReluDense.wo encoder.block.21.layer.0.SelfAttention.q encoder.block.21.layer.0.SelfAttention.k encoder.block.21.layer.0.SelfAttention.v encoder.block.21.layer.0.SelfAttention.o encoder.block.21.layer.1.DenseReluDense.wi_0 encoder.block.21.layer.1.DenseReluDense.wi_1 encoder.block.21.layer.1.DenseReluDense.wo encoder.block.22.layer.0.SelfAttention.q encoder.block.22.layer.0.SelfAttention.k encoder.block.22.layer.0.SelfAttention.v encoder.block.22.layer.0.SelfAttention.o encoder.block.22.layer.1.DenseReluDense.wi_0 encoder.block.22.layer.1.DenseReluDense.wi_1 encoder.block.22.layer.1.DenseReluDense.wo encoder.block.23.layer.0.SelfAttention.q encoder.block.23.layer.0.SelfAttention.k encoder.block.23.layer.0.SelfAttention.v encoder.block.23.layer.0.SelfAttention.o encoder.block.23.layer.1.DenseReluDense.wi_0 encoder.block.23.layer.1.DenseReluDense.wi_1 encoder.block.23.layer.1.DenseReluDense.wo Done.