python run_qa.py     --model_name_or_path /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled     --optimize_model_before_eval      --optimized_checkpoint /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-filled-lt-compiled      --dataset_name squad     --do_eval     --do_train     --evaluation_strategy steps     --eval_steps 250     --learning_rate 3e-5     --teacher bert-large-uncased-whole-word-masking-finetuned-squad     --teacher_ratio 0.9     --lr_scheduler_type cosine_with_restarts     --warmup_ratio 0.25     --cosine_cycles 1     --num_train_epochs 5     --per_device_eval_batch_size 128     --per_device_train_batch_size 16     --max_seq_length 384     --doc_stride 128     --save_steps 250     --nncf_config nncf_bert_squad_sparsity.json     --logging_steps 1     --overwrite_output_dir     --run_name run10-bert-squad-cropped-qat-customkd-lt-5eph     --output_dir /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph
 --max_steps 25
### End of CMD ---
01/16/2022 13:20:03 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False
01/16/2022 13:20:03 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
cosine_cycles=1,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
distill_temp=2.0,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_steps=250,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
greater_is_better=None,
group_by_length=False,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=3e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=-1,
log_level=-1,
log_level_replica=-1,
log_on_each_node=True,
logging_dir=/data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/runs/Jan16_13-20-03_csr-dgx1-03,
logging_first_step=False,
logging_steps=1,
logging_strategy=IntervalStrategy.STEPS,
lr_scheduler_type=SchedulerType.COSINE_WITH_RESTARTS,
max_grad_norm=1.0,
max_steps=25,
metric_for_best_model=None,
mp_parameters=,
nncf_config=nncf_bert_squad_sparsity.json,
no_cuda=False,
num_train_epochs=5.0,
optimize_model_before_eval=True,
optimized_checkpoint=/data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-filled-lt-compiled,
output_dir=/data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph,
overwrite_output_dir=True,
past_index=-1,
per_device_eval_batch_size=128,
per_device_train_batch_size=16,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=run10-bert-squad-cropped-qat-customkd-lt-5eph,
push_to_hub_organization=None,
push_to_hub_token=None,
qat_checkpoint=None,
remove_unused_columns=True,
report_to=['mlflow', 'tensorboard', 'wandb'],
resume_from_checkpoint=None,
run_name=run10-bert-squad-cropped-qat-customkd-lt-5eph,
save_on_each_node=False,
save_steps=250,
save_strategy=IntervalStrategy.STEPS,
save_total_limit=None,
seed=42,
sharded_ddp=[],
skip_memory_metrics=True,
teacher=bert-large-uncased-whole-word-masking-finetuned-squad,
teacher_ratio=0.9,
to_onnx=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_legacy_prediction_loop=False,
warmup_ratio=0.25,
warmup_steps=0,
weight_decay=0.0,
)
01/16/2022 13:20:03 - INFO - datasets.builder - No config specified, defaulting to first: squad/plain_text
01/16/2022 13:20:03 - INFO - datasets.info - Loading Dataset Infos from /home/vchua/.cache/huggingface/modules/datasets_modules/datasets/squad/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453
01/16/2022 13:20:03 - INFO - datasets.builder - Overwrite dataset info from restored data version.
01/16/2022 13:20:03 - INFO - datasets.info - Loading Dataset info from /home/vchua/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453
01/16/2022 13:20:03 - WARNING - datasets.builder - Reusing dataset squad (/home/vchua/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)
01/16/2022 13:20:03 - INFO - datasets.info - Loading Dataset info from /home/vchua/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453
/home/vchua/tld-poc/nncf/nncf/torch/dynamic_graph/patch_pytorch.py:163: UserWarning: Not patching unique_dim since it is missing in this version of PyTorch
  warnings.warn("Not patching {} since it is missing in this version of PyTorch".format(op_name))
/home/vchua/tld-poc/nncf/nncf/torch/dynamic_graph/patch_pytorch.py:163: UserWarning: Not patching unique_dim since it is missing in this version of PyTorch
  warnings.warn("Not patching {} since it is missing in this version of PyTorch".format(op_name))
  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 666.08it/s]
[INFO|configuration_utils.py:543] 2022-01-16 13:20:03,990 >> loading configuration file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/config.json
[INFO|configuration_utils.py:581] 2022-01-16 13:20:03,990 >> Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForQuestionAnswering"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "pruned_heads": {
    "0": [
      0,
      2,
      4,
      5,
      6,
      7,
      11
    ],
    "1": [
      0,
      2,
      3,
      5,
      6,
      7,
      8
    ],
    "2": [
      8,
      4,
      7
    ],
    "3": [
      2,
      4,
      6
    ],
    "4": [
      1,
      2,
      11
    ],
    "5": [
      1,
      2,
      5,
      6,
      7,
      11
    ],
    "6": [
      0,
      2,
      3,
      7,
      10
    ],
    "7": [
      1,
      3,
      6,
      7,
      11
    ],
    "8": [
      0,
      3,
      4,
      5,
      8
    ],
    "9": [
      1,
      3,
      4,
      5,
      7,
      9,
      10
    ],
    "10": [
      1,
      4,
      5,
      6,
      7,
      8
    ],
    "11": [
      4,
      5,
      7,
      8,
      10,
      11
    ]
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

[INFO|tokenization_utils_base.py:1664] 2022-01-16 13:20:03,991 >> Didn't find file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/added_tokens.json. We won't load it.
[INFO|tokenization_utils_base.py:1728] 2022-01-16 13:20:03,991 >> loading file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/vocab.txt
[INFO|tokenization_utils_base.py:1728] 2022-01-16 13:20:03,991 >> loading file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/tokenizer.json
[INFO|tokenization_utils_base.py:1728] 2022-01-16 13:20:03,991 >> loading file None
[INFO|tokenization_utils_base.py:1728] 2022-01-16 13:20:03,991 >> loading file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/special_tokens_map.json
[INFO|tokenization_utils_base.py:1728] 2022-01-16 13:20:03,991 >> loading file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/tokenizer_config.json
01/16/2022 13:20:04 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/vchua/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-65c15074471a5f81.arrow
01/16/2022 13:20:04 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/vchua/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-c7d0554dffe9c91c.arrow
[INFO|configuration_utils.py:545] 2022-01-16 13:20:05,280 >> loading configuration file https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/config.json from cache at /home/vchua/.cache/huggingface/transformers/402f6d8c99fdd3bffd354782842e2b5a6be81f80ab630591051ebc78ca726f39.ebffac96fee44dbe30674c204dd3d3f358c1b8c33100281ecdd688514f41410a
[INFO|configuration_utils.py:581] 2022-01-16 13:20:05,281 >> Model config BertConfig {
  "architectures": [
    "BertForQuestionAnswering"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

[INFO|modeling_utils.py:1280] 2022-01-16 13:20:05,631 >> loading weights file https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/pytorch_model.bin from cache at /home/vchua/.cache/huggingface/transformers/28a060c1e2e1216bd9c8f5222ce38ce916c4829b8b05e027fe91510f3fd4da7e.50fc4a146342b3a6a99b185af3d5b70163b64d45790be64d9124dcccbcd3915e
[INFO|modeling_utils.py:1574] 2022-01-16 13:20:08,823 >> All model checkpoint weights were used when initializing BertForQuestionAnswering.

[INFO|modeling_utils.py:1582] 2022-01-16 13:20:08,823 >> All the weights of BertForQuestionAnswering were initialized from the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForQuestionAnswering for predictions without further training.
[INFO|modeling_utils.py:1278] 2022-01-16 13:20:08,825 >> loading weights file /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled/pytorch_model.bin
[INFO|modeling_utils.py:1574] 2022-01-16 13:20:10,073 >> All model checkpoint weights were used when initializing BertForQuestionAnswering.

[INFO|modeling_utils.py:1582] 2022-01-16 13:20:10,073 >> All the weights of BertForQuestionAnswering were initialized from the model checkpoint at /data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForQuestionAnswering for predictions without further training.
removed heads 0, total_heads=81, percentage removed=0.0
bert.encoder.layer.0.intermediate.dense, sparsity = 93.98
bert.encoder.layer.0.output.dense, sparsity = 93.98
bert.encoder.layer.1.intermediate.dense, sparsity = 89.75
bert.encoder.layer.1.output.dense, sparsity = 89.75
bert.encoder.layer.2.intermediate.dense, sparsity = 88.96
bert.encoder.layer.2.output.dense, sparsity = 88.96
bert.encoder.layer.3.intermediate.dense, sparsity = 88.02
bert.encoder.layer.3.output.dense, sparsity = 88.02
bert.encoder.layer.4.intermediate.dense, sparsity = 87.43
bert.encoder.layer.4.output.dense, sparsity = 87.43
bert.encoder.layer.5.intermediate.dense, sparsity = 89.06
bert.encoder.layer.5.output.dense, sparsity = 89.06
bert.encoder.layer.6.intermediate.dense, sparsity = 90.89
bert.encoder.layer.6.output.dense, sparsity = 90.89
bert.encoder.layer.7.intermediate.dense, sparsity = 93.13
bert.encoder.layer.7.output.dense, sparsity = 93.13
bert.encoder.layer.8.intermediate.dense, sparsity = 96.48
bert.encoder.layer.8.output.dense, sparsity = 96.48
bert.encoder.layer.9.intermediate.dense, sparsity = 98.27
bert.encoder.layer.9.output.dense, sparsity = 98.27
bert.encoder.layer.10.intermediate.dense, sparsity = 97.20
bert.encoder.layer.10.output.dense, sparsity = 97.20
bert.encoder.layer.11.intermediate.dense, sparsity = 96.58
bert.encoder.layer.11.output.dense, sparsity = 96.58
WARNING:nncf:Graphviz is not installed - only the .dot model visualization format will be used. Install pygraphviz into your Python environment and graphviz system-wide to enable PNG rendering.
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/Embedding[word_embeddings] by BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/Embedding[position_embeddings] by BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/Embedding[token_type_embeddings] by BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/Linear[key] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/Linear[value] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/Linear[dense] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]
INFO:nncf:Wrapping module BertForQuestionAnswering/Linear[qa_outputs] by BertForQuestionAnswering/NNCFLinear[qa_outputs]
WARNING:nncf:Preset quantizer parameters {'mode'} explicitly overrided.
WARNING:nncf:Preset quantizer parameters {'mode'} explicitly overrided.
WARNING:nncf:Could not find an associated input activation quantizer for a weighted node with quantizable weights: BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0

WARNING:nncf:Could not find an associated input activation quantizer for a weighted node with quantizable weights: BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0

WARNING:nncf:Could not find an associated input activation quantizer for a weighted node with quantizable weights: BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0

WARNING:nncf:Attempted to use weight quantizer of 4 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 to quantize input of {'6 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__add___0'}, but no compatible configs were found.
WARNING:nncf:Attempted to use weight quantizer of 5 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 to quantize input of {'6 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__add___0'}, but no compatible configs were found.
WARNING:nncf:Attempted to use weight quantizer of 7 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0 to quantize input of {'8 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__iadd___0'}, but no compatible configs were found.
WARNING:nncf:Could not find an associated input activation quantizer for a weighted node with quantizable weights: BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0

WARNING:nncf:Could not find an associated input activation quantizer for a weighted node with quantizable weights: BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0

WARNING:nncf:Could not find an associated input activation quantizer for a weighted node with quantizable weights: BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0

WARNING:nncf:Attempted to use weight quantizer of 4 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 to quantize input of {'6 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__add___0'}, but no compatible configs were found.
WARNING:nncf:Attempted to use weight quantizer of 5 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 to quantize input of {'6 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__add___0'}, but no compatible configs were found.
WARNING:nncf:Attempted to use weight quantizer of 7 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0 to quantize input of {'8 BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__iadd___0'}, but no compatible configs were found.
WARNING:nncf:NNCFNetwork(
  (nncf_module): BertForQuestionAnswering(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): NNCFEmbedding(
          30522, 768, padding_idx=0
          (pre_ops): ModuleDict()
          (post_ops): ModuleDict()
        )
        (position_embeddings): NNCFEmbedding(
          512, 768
          (pre_ops): ModuleDict()
          (post_ops): ModuleDict()
        )
        (token_type_embeddings): NNCFEmbedding(
          2, 768
          (pre_ops): ModuleDict()
          (post_ops): ModuleDict()
        )
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=320, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=185, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=185, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (1): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=320, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=315, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=315, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (2): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=576, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=339, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=339, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (3): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=576, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=368, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=368, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (4): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=576, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=576, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=386, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=386, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (5): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=384, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=336, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=336, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (6): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=448, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=280, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=280, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (7): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=448, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=211, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=211, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (8): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=448, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=448, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=108, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=108, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (9): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=320, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=320, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=53, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=53, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (10): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=384, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=86, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=86, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (11): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (key): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (value): NNCFLinear(
                  in_features=768, out_features=384, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): NNCFLinear(
                  in_features=384, out_features=768, bias=True
                  (pre_ops): ModuleDict()
                  (post_ops): ModuleDict()
                )
                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
            (intermediate): BertIntermediate(
              (dense): NNCFLinear(
                in_features=768, out_features=105, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
            )
            (output): BertOutput(
              (dense): NNCFLinear(
                in_features=105, out_features=768, bias=True
                (pre_ops): ModuleDict()
                (post_ops): ModuleDict()
              )
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
        )
      )
    )
    (qa_outputs): NNCFLinear(
      in_features=768, out_features=2, bias=True
      (pre_ops): ModuleDict()
      (post_ops): ModuleDict()
    )
  )
)
INFO:nncf:Collecting tensor statistics ████████          | 1 / 2
INFO:nncf:Collecting tensor statistics ████████████████  | 2 / 2
INFO:nncf:Set sign: True and scale: [0.3102, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0
INFO:nncf:Set sign: True and scale: [0.6038, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0
INFO:nncf:Set sign: True and scale: [0.7471, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__add___0
INFO:nncf:Set sign: True and scale: [0.1617, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0
INFO:nncf:Set sign: True and scale: [0.7471, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__iadd___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/__iadd___0
INFO:nncf:Set sign: True and scale: [6.4642, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [6.3328, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [13.2152, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.9097, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [2.6530, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [1.5105, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.0678, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [4.1636, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [7.5213, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [7.2468, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [26.0014, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [7.3508, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [6.6369, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [22.0350, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.9929, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [4.6145, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.3576, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.0576, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [6.1697, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [8.2951, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [2.3020, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [32.6324, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [9.5079, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [8.5606, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [43.0158, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.9999, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [4.9327, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.1434, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.1261, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [7.7405, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [5.6863, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [1.7192, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [27.8657, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [6.3400, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [6.3571, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [16.8984, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.9659, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [3.5732, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.4142, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.4166, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [7.1450, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [6.4436, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [1.7720, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [18.1251, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [5.6173, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [6.2416, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [12.2383, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.5249, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [3.2927, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.2221, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.3042, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [7.5106, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [5.8193, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [1.5960, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [16.6634, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [5.8104, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [7.1760, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [19.2534, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.9088, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [3.2773, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.3789, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.3509, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [8.0278, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [11.2281, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [2.5510, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [13.3404, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [5.9837, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [7.7394, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [16.1081, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.8820, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [2.8653, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.0227, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.1265, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [7.6260, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [10.1383, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [2.0738, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [12.1558, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [6.2621, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [7.9600, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [15.8902, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.8493, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [2.7640, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.1766, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.7565, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [6.6154, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [12.0720, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [2.7441, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [12.5571, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [6.7775, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [7.6945, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [15.1916, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.4918, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [3.3290, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.5413, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.2726, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [5.8699, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [9.0108, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [3.2770, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [16.1354, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [5.5691, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [8.0957, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [12.0098, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.4007, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [2.9702, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [2.1663, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.2682, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [5.5587, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [13.8907, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [2.9744, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [11.1289, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [4.5430, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [7.9071, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [11.5003, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.5586, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [6.2925, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [5.2843, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.9610, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [6.5934, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [9.1569, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [3.3628, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [9.9668, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [4.3727, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [6.3919, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [11.2429, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/__truediv___0
INFO:nncf:Set sign: False and scale: [0.5345, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
INFO:nncf:Set sign: True and scale: [4.5022, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [3.6806, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/matmul_1
INFO:nncf:Set sign: True and scale: [1.8066, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [7.7527, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [7.1442, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/gelu_0
INFO:nncf:Set sign: True and scale: [3.5706, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [14.4304, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/__add___0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/__add___0
INFO:nncf:Set sign: True and scale: [2.8359, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [4.1685, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [26.1356, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [6.5463, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [31.8059, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [7.8890, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [26.9679, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [7.5738, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [17.8367, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [7.5737, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [15.8130, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [7.9781, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [12.5568, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [7.7418, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [10.9951, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [6.9427, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [13.5330, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [6.1915, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [14.7554, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [5.7933, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [10.3499, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [6.5209, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [9.4660, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [7.7494, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Set sign: True and scale: [14.1016, ] for TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/LayerNorm[LayerNorm]/layer_norm_0
WARNING:nncf:The overflow issue fix will be applied. Now all weight quantizers will effectively use only 7 bits out of 8 bits. This resolves the overflow issue problem on AVX2 and AVX-512 machines. Please take a look at the documentation for a detailed information.
INFO:nncf:Set sign: True and scale: [0.2280, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0
INFO:nncf:Set sign: True and scale: [0.1551, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0
INFO:nncf:Set sign: True and scale: [0.5894, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0
INFO:nncf:Set sign: True and scale: [0.2171, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.3125, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1119, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1805, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1899, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.4780, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1869, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.2278, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1179, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1659, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1715, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.3295, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.3058, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.3194, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1175, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1345, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.3839, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2657, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2068, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.2180, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1241, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1482, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.3309, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2841, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1797, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1788, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1461, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1489, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2849, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2799, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1738, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1816, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1570, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1515, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2263, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2835, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1689, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1792, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1346, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1372, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1811, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2911, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1743, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1844, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1330, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1359, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2244, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2513, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1815, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1927, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1330, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1297, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1677, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2830, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1797, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.2035, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1279, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1226, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2134, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [1.1807, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1933, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1842, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1474, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1218, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.4426, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.3693, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2113, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
INFO:nncf:Set sign: True and scale: [0.1714, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
INFO:nncf:Set sign: True and scale: [0.1439, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
INFO:nncf:Set sign: True and scale: [0.1319, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2404, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.2133, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
INFO:nncf:Set sign: True and scale: [0.1000, ] for TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/NNCFLinear[qa_outputs]/linear_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS BertForQuestionAnswering/NNCFLinear[qa_outputs]/linear_0
WARNING:nncf:Module `BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0` has quantized weights and no quantized inputs!
WARNING:nncf:Module `BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0` has quantized weights and no quantized inputs!
WARNING:nncf:Module `BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0` has quantized weights and no quantized inputs!
INFO:nncf:BatchNorm statistics adaptation █                 | 1 / 13
INFO:nncf:BatchNorm statistics adaptation ██                | 2 / 13
INFO:nncf:BatchNorm statistics adaptation ███               | 3 / 13
INFO:nncf:BatchNorm statistics adaptation ████              | 4 / 13
INFO:nncf:BatchNorm statistics adaptation ██████            | 5 / 13
INFO:nncf:BatchNorm statistics adaptation ███████           | 6 / 13
INFO:nncf:BatchNorm statistics adaptation ████████          | 7 / 13
INFO:nncf:BatchNorm statistics adaptation █████████         | 8 / 13
INFO:nncf:BatchNorm statistics adaptation ███████████       | 9 / 13
INFO:nncf:BatchNorm statistics adaptation ████████████      | 10 / 13
INFO:nncf:BatchNorm statistics adaptation █████████████     | 11 / 13
INFO:nncf:BatchNorm statistics adaptation ██████████████    | 12 / 13
INFO:nncf:BatchNorm statistics adaptation ████████████████  | 13 / 13
WARNING:nncf:Graphviz is not installed - only the .dot model visualization format will be used. Install pygraphviz into your Python environment and graphviz system-wide to enable PNG rendering.
[INFO|trainer.py:434] 2022-01-16 13:21:10,698 >> max_steps is given, it will override any value given in num_train_epochs
[INFO|trainer.py:1209] 2022-01-16 13:21:10,729 >> ***** Running training *****
[INFO|trainer.py:1210] 2022-01-16 13:21:10,729 >>   Num examples = 88524
[INFO|trainer.py:1211] 2022-01-16 13:21:10,729 >>   Num Epochs = 1
[INFO|trainer.py:1212] 2022-01-16 13:21:10,729 >>   Instantaneous batch size per device = 16
[INFO|trainer.py:1213] 2022-01-16 13:21:10,729 >>   Total train batch size (w. parallel, distributed & accumulation) = 16
[INFO|trainer.py:1214] 2022-01-16 13:21:10,729 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:1215] 2022-01-16 13:21:10,729 >>   Total optimization steps = 25
[WARNING|integrations.py:650] 2022-01-16 13:21:11,179 >> Trainer is attempting to log a value of "{0: [0, 2, 4, 5, 6, 7, 11], 1: [0, 2, 3, 5, 6, 7, 8], 2: [8, 4, 7], 3: [2, 4, 6], 4: [1, 2, 11], 5: [1, 2, 5, 6, 7, 11], 6: [0, 2, 3, 7, 10], 7: [1, 3, 6, 7, 11], 8: [0, 3, 4, 5, 8], 9: [1, 3, 4, 5, 7, 9, 10], 10: [1, 4, 5, 6, 7, 8], 11: [4, 5, 7, 8, 10, 11]}" for key "pruned_heads" as a parameter. MLflow's log_param() only accepts values no longer than 250 characters so we dropped this attribute.
[INFO|integrations.py:445] 2022-01-16 13:21:11,205 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
wandb: Currently logged in as: vchua (use `wandb login --relogin` to force relogin)
wandb: Tracking run with wandb version 0.12.9
wandb: Syncing run run10-bert-squad-cropped-qat-customkd-lt-5eph
wandb:  View project at https://wandb.ai/vchua/pruneofa-tl%20%28csr-dgx1-03%29
wandb:  View run at https://wandb.ai/vchua/pruneofa-tl%20%28csr-dgx1-03%29/runs/3ald1d3c
wandb: Run data is saved locally in /home/vchua/tld-poc/transformers/examples/pytorch/question-answering/wandb/run-20220116_132111-3ald1d3c
wandb: Run `wandb offline` to turn off syncing.

  0% 0/25 [00:00<?, ?it/s]/home/vchua/tld-poc/nncf/nncf/torch/quantization/quantize_functions.py:53: RuntimeWarning: grad_output is not contiguous!
  warnings.warn("grad_output is not contiguous!", RuntimeWarning)
  4% 1/25 [00:01<00:42,  1.76s/it]  4% 1/25 [00:01<00:42,  1.76s/it, loss=0.304]                                                4% 1/25 [00:01<00:42,  1.76s/it, loss=0.304]  8% 2/25 [00:02<00:27,  1.18s/it, loss=0.304]  8% 2/25 [00:02<00:27,  1.18s/it, loss=0.365]                                              Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
  8% 2/25 [00:02<00:27,  1.18s/it, loss=0.365]| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3044, 'learning_rate': 4.2857142857142855e-06, 'compression_loss': 0.0, 'label_loss': 0.2878532409667969, 'teacher_loss': 0.30623510479927063, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
 12% 3/25 [00:03<00:21,  1.01it/s, loss=0.365] 12% 3/25 [00:03<00:21,  1.01it/s, loss=0.395]                                               12% 3/25 [00:03<00:21,  1.01it/s, loss=0.395] 16% 4/25 [00:04<00:18,  1.11it/s, loss=0.395] 16% 4/25 [00:04<00:18,  1.11it/s, loss=0.38]                                               16% 4/25 [00:04<00:18,  1.11it/s, loss=0.38] 20% 5/25 [00:04<00:16,  1.19it/s, loss=0.38] 20% 5/25 [00:04<00:16,  1.19it/s, loss=0.334]{'loss': 0.3654, 'learning_rate': 8.571428571428571e-06, 'compression_loss': 0.0, 'label_loss': 0.49937474727630615, 'teacher_loss': 0.35050299763679504, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3953, 'learning_rate': 1.2857142857142857e-05, 'compression_loss': 0.0, 'label_loss': 0.7557134628295898, 'teacher_loss': 0.35520607233047485, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.38, 'learning_rate': 1.7142857142857142e-05, 'compression_loss': 0.0, 'label_loss': 0.7107768654823303, 'teacher_loss': 0.34323692321777344, 'epoch': 0.0}
                                               20% 5/25 [00:04<00:16,  1.19it/s, loss=0.334] 24% 6/25 [00:05<00:15,  1.24it/s, loss=0.334] 24% 6/25 [00:05<00:15,  1.24it/s, loss=0.265]                                               24% 6/25 [00:05<00:15,  1.24it/s, loss=0.265] 28% 7/25 [00:06<00:14,  1.28it/s, loss=0.265] 28% 7/25 [00:06<00:14,  1.28it/s, loss=0.368]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3337, 'learning_rate': 2.1428571428571428e-05, 'compression_loss': 0.0, 'label_loss': 0.6693242788314819, 'teacher_loss': 0.2964354157447815, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2649, 'learning_rate': 2.5714285714285714e-05, 'compression_loss': 0.0, 'label_loss': 0.6224533319473267, 'teacher_loss': 0.2251352071762085, 'epoch': 0.0}
                                               28% 7/25 [00:06<00:14,  1.28it/s, loss=0.368] 32% 8/25 [00:07<00:13,  1.30it/s, loss=0.368] 32% 8/25 [00:07<00:13,  1.30it/s, loss=0.768]                                               32% 8/25 [00:07<00:13,  1.30it/s, loss=0.768] 36% 9/25 [00:07<00:12,  1.30it/s, loss=0.768] 36% 9/25 [00:07<00:12,  1.30it/s, loss=0.225]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3679, 'learning_rate': 3e-05, 'compression_loss': 0.0, 'label_loss': 0.601239025592804, 'teacher_loss': 0.3419259190559387, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.7683, 'learning_rate': 2.977211629518312e-05, 'compression_loss': 0.0, 'label_loss': 0.6015853881835938, 'teacher_loss': 0.7868368029594421, 'epoch': 0.0}
                                               36% 9/25 [00:07<00:12,  1.30it/s, loss=0.225] 40% 10/25 [00:08<00:11,  1.31it/s, loss=0.225] 40% 10/25 [00:08<00:11,  1.31it/s, loss=0.37]                                                40% 10/25 [00:08<00:11,  1.31it/s, loss=0.37] 44% 11/25 [00:09<00:10,  1.32it/s, loss=0.37] 44% 11/25 [00:09<00:10,  1.32it/s, loss=0.33]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2246, 'learning_rate': 2.9095389311788626e-05, 'compression_loss': 0.0, 'label_loss': 0.3573703169822693, 'teacher_loss': 0.20986995100975037, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3697, 'learning_rate': 2.7990381056766583e-05, 'compression_loss': 0.0, 'label_loss': 0.40208685398101807, 'teacher_loss': 0.36606425046920776, 'epoch': 0.0}
                                               44% 11/25 [00:09<00:10,  1.32it/s, loss=0.33] 48% 12/25 [00:10<00:09,  1.33it/s, loss=0.33] 48% 12/25 [00:10<00:09,  1.33it/s, loss=0.187]                                                48% 12/25 [00:10<00:09,  1.33it/s, loss=0.187] 52% 13/25 [00:10<00:09,  1.32it/s, loss=0.187] 52% 13/25 [00:10<00:09,  1.32it/s, loss=0.309]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3297, 'learning_rate': 2.649066664678467e-05, 'compression_loss': 0.0, 'label_loss': 0.6352951526641846, 'teacher_loss': 0.2956993579864502, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.1867, 'learning_rate': 2.464181414529809e-05, 'compression_loss': 0.0, 'label_loss': 0.12262766063213348, 'teacher_loss': 0.19385287165641785, 'epoch': 0.0}
                                                52% 13/25 [00:10<00:09,  1.32it/s, loss=0.309] 56% 14/25 [00:11<00:08,  1.32it/s, loss=0.309] 56% 14/25 [00:11<00:08,  1.32it/s, loss=0.311]                                                56% 14/25 [00:11<00:08,  1.32it/s, loss=0.311] 60% 15/25 [00:12<00:07,  1.32it/s, loss=0.311] 60% 15/25 [00:12<00:07,  1.32it/s, loss=0.396]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3087, 'learning_rate': 2.25e-05, 'compression_loss': 0.0, 'label_loss': 0.17887291312217712, 'teacher_loss': 0.3230943977832794, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3115, 'learning_rate': 2.0130302149885033e-05, 'compression_loss': 0.0, 'label_loss': 0.30509212613105774, 'teacher_loss': 0.31218934059143066, 'epoch': 0.0}
                                                60% 15/25 [00:12<00:07,  1.32it/s, loss=0.396] 64% 16/25 [00:13<00:06,  1.33it/s, loss=0.396] 64% 16/25 [00:13<00:06,  1.33it/s, loss=0.317]                                                64% 16/25 [00:13<00:06,  1.33it/s, loss=0.317] 68% 17/25 [00:13<00:06,  1.33it/s, loss=0.317] 68% 17/25 [00:13<00:06,  1.33it/s, loss=0.368]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3959, 'learning_rate': 1.760472266500396e-05, 'compression_loss': 0.0, 'label_loss': 0.5257797837257385, 'teacher_loss': 0.3814351558685303, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3169, 'learning_rate': 1.5e-05, 'compression_loss': 0.0, 'label_loss': 0.5662463903427124, 'teacher_loss': 0.28917646408081055, 'epoch': 0.0}
                                                68% 17/25 [00:13<00:06,  1.33it/s, loss=0.368] 72% 18/25 [00:14<00:05,  1.31it/s, loss=0.368] 72% 18/25 [00:14<00:05,  1.31it/s, loss=0.222]                                                72% 18/25 [00:14<00:05,  1.31it/s, loss=0.222] 76% 19/25 [00:15<00:04,  1.32it/s, loss=0.222] 76% 19/25 [00:15<00:04,  1.32it/s, loss=0.32] Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3684, 'learning_rate': 1.2395277334996045e-05, 'compression_loss': 0.0, 'label_loss': 0.7095128893852234, 'teacher_loss': 0.3305053412914276, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2224, 'learning_rate': 9.86969785011497e-06, 'compression_loss': 0.0, 'label_loss': 0.2062043845653534, 'teacher_loss': 0.2241814136505127, 'epoch': 0.0}
                                               76% 19/25 [00:15<00:04,  1.32it/s, loss=0.32] 80% 20/25 [00:16<00:03,  1.31it/s, loss=0.32] 80% 20/25 [00:16<00:03,  1.31it/s, loss=0.33]                                               80% 20/25 [00:16<00:03,  1.31it/s, loss=0.33] 84% 21/25 [00:16<00:03,  1.31it/s, loss=0.33] 84% 21/25 [00:16<00:03,  1.31it/s, loss=0.241]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3205, 'learning_rate': 7.500000000000004e-06, 'compression_loss': 0.0, 'label_loss': 0.4287645220756531, 'teacher_loss': 0.3084205687046051, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3304, 'learning_rate': 5.3581858547019095e-06, 'compression_loss': 0.0, 'label_loss': 0.8593454360961914, 'teacher_loss': 0.27162981033325195, 'epoch': 0.0}
                                                84% 21/25 [00:16<00:03,  1.31it/s, loss=0.241] 88% 22/25 [00:17<00:02,  1.30it/s, loss=0.241] 88% 22/25 [00:17<00:02,  1.30it/s, loss=0.221]                                                88% 22/25 [00:17<00:02,  1.30it/s, loss=0.221] 92% 23/25 [00:18<00:01,  1.32it/s, loss=0.221] 92% 23/25 [00:18<00:01,  1.32it/s, loss=0.337]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2412, 'learning_rate': 3.5093333532153316e-06, 'compression_loss': 0.0, 'label_loss': 0.503807783126831, 'teacher_loss': 0.2120203673839569, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2208, 'learning_rate': 2.0096189432334194e-06, 'compression_loss': 0.0, 'label_loss': 0.33648350834846497, 'teacher_loss': 0.20795848965644836, 'epoch': 0.0}
                                                92% 23/25 [00:18<00:01,  1.32it/s, loss=0.337] 96% 24/25 [00:19<00:00,  1.31it/s, loss=0.337] 96% 24/25 [00:19<00:00,  1.31it/s, loss=0.281]                                                96% 24/25 [00:19<00:00,  1.31it/s, loss=0.281]100% 25/25 [00:19<00:00,  1.32it/s, loss=0.281]100% 25/25 [00:19<00:00,  1.32it/s, loss=0.289]Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.3368, 'learning_rate': 9.046106882113753e-07, 'compression_loss': 0.0, 'label_loss': 0.7542837858200073, 'teacher_loss': 0.290416955947876, 'epoch': 0.0}
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2808, 'learning_rate': 2.278837048168797e-07, 'compression_loss': 0.0, 'label_loss': 0.5351338386535645, 'teacher_loss': 0.2525731921195984, 'epoch': 0.0}
                                               100% 25/25 [00:19<00:00,  1.32it/s, loss=0.289][INFO|trainer.py:1412] 2022-01-16 13:21:35,231 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)


                                               100% 25/25 [00:19<00:00,  1.32it/s, loss=0.289]100% 25/25 [00:19<00:00,  1.25it/s, loss=0.289]
[INFO|trainer.py:1982] 2022-01-16 13:21:35,237 >> Saving model checkpoint to /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph
[INFO|configuration_utils.py:379] 2022-01-16 13:21:35,244 >> Configuration saved in /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/config.json
[INFO|modeling_utils.py:1004] 2022-01-16 13:21:35,643 >> Model weights saved in /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/pytorch_model.bin
[INFO|tokenization_utils_base.py:2006] 2022-01-16 13:21:35,643 >> tokenizer config file saved in /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/tokenizer_config.json
[INFO|tokenization_utils_base.py:2012] 2022-01-16 13:21:35,644 >> Special tokens file saved in /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/special_tokens_map.json
Statistics of the quantization algorithm:
+--------------------------------+-------+
|        Statistic's name        | Value |
+================================+=======+
| Ratio of enabled quantizations | 100   |
+--------------------------------+-------+

Statistics of the quantization share:
+----------------------------------+----------------------+
|         Statistic's name         |        Value         |
+==================================+======================+
| Symmetric WQs / All placed WQs   | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Asymmetric WQs / All placed WQs  | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Signed WQs / All placed WQs      | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Unsigned WQs / All placed WQs    | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Per-tensor WQs / All placed WQs  | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Per-channel WQs / All placed WQs | 0.00 % (0 / 76)      |
+----------------------------------+----------------------+
| Placed WQs / Potential WQs       | 100.00 % (76 / 76)   |
+----------------------------------+----------------------+
| Symmetric AQs / All placed AQs   | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Asymmetric AQs / All placed AQs  | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+
| Signed AQs / All placed AQs      | 92.59 % (150 / 162)  |
+----------------------------------+----------------------+
| Unsigned AQs / All placed AQs    | 7.41 % (12 / 162)    |
+----------------------------------+----------------------+
| Per-tensor AQs / All placed AQs  | 100.00 % (162 / 162) |
+----------------------------------+----------------------+
| Per-channel AQs / All placed AQs | 0.00 % (0 / 162)     |
+----------------------------------+----------------------+

Statistics of the bitwidth distribution:
+--------------+---------------------+--------------------+--------------------+
| Num bits (N) | N-bits WQs / Placed |    N-bits AQs /    | N-bits Qs / Placed |
|              |         WQs         |     Placed AQs     |         Qs         |
+==============+=====================+====================+====================+
| 8            | 100.00 % (76 / 76)  | 100.00 % (162 /    | 100.00 % (238 /    |
|              |                     | 162)               | 238)               |
+--------------+---------------------+--------------------+--------------------+
{'loss': 0.2891, 'learning_rate': 0.0, 'compression_loss': 0.0, 'label_loss': 0.30624544620513916, 'teacher_loss': 0.2872086465358734, 'epoch': 0.0}
{'train_runtime': 24.5019, 'train_samples_per_second': 16.325, 'train_steps_per_second': 1.02, 'train_loss': 0.3293510854244232, 'epoch': 0.0}
***** train metrics *****
  epoch                    =        0.0
  train_loss               =     0.3294
  train_runtime            = 0:00:24.50
  train_samples            =      88524
  train_samples_per_second =     16.325
  train_steps_per_second   =       1.02
01/16/2022 13:21:35 - INFO - __main__ - *** Evaluate ***
[INFO|trainer.py:554] 2022-01-16 13:21:35,765 >> The following columns in the evaluation set  don't have a corresponding argument in `NNCFNetwork.forward` and have been ignored: offset_mapping, example_id.
[INFO|trainer.py:2244] 2022-01-16 13:21:35,767 >> ***** Running Evaluation *****
[INFO|trainer.py:2246] 2022-01-16 13:21:35,767 >>   Num examples = 10784
[INFO|trainer.py:2249] 2022-01-16 13:21:35,767 >>   Batch size = 128
  0% 0/85 [00:00<?, ?it/s]  2% 2/85 [00:00<00:21,  3.79it/s]  4% 3/85 [00:01<00:30,  2.67it/s]  5% 4/85 [00:01<00:35,  2.28it/s]  6% 5/85 [00:02<00:37,  2.12it/s]  7% 6/85 [00:02<00:38,  2.04it/s]  8% 7/85 [00:03<00:39,  1.98it/s]  9% 8/85 [00:03<00:39,  1.94it/s] 11% 9/85 [00:04<00:39,  1.92it/s] 12% 10/85 [00:04<00:39,  1.91it/s] 13% 11/85 [00:05<00:38,  1.90it/s] 14% 12/85 [00:05<00:38,  1.90it/s] 15% 13/85 [00:06<00:38,  1.89it/s] 16% 14/85 [00:06<00:37,  1.89it/s] 18% 15/85 [00:07<00:37,  1.89it/s] 19% 16/85 [00:07<00:36,  1.89it/s] 20% 17/85 [00:08<00:36,  1.89it/s] 21% 18/85 [00:09<00:35,  1.89it/s] 22% 19/85 [00:09<00:35,  1.88it/s] 24% 20/85 [00:10<00:34,  1.88it/s] 25% 21/85 [00:10<00:33,  1.88it/s] 26% 22/85 [00:11<00:33,  1.89it/s] 27% 23/85 [00:11<00:32,  1.89it/s] 28% 24/85 [00:12<00:32,  1.88it/s] 29% 25/85 [00:12<00:31,  1.88it/s] 31% 26/85 [00:13<00:31,  1.89it/s] 32% 27/85 [00:13<00:30,  1.89it/s] 33% 28/85 [00:14<00:30,  1.88it/s] 34% 29/85 [00:14<00:29,  1.88it/s] 35% 30/85 [00:15<00:29,  1.88it/s] 36% 31/85 [00:15<00:28,  1.87it/s] 38% 32/85 [00:16<00:28,  1.87it/s] 39% 33/85 [00:17<00:27,  1.87it/s] 40% 34/85 [00:17<00:27,  1.87it/s] 41% 35/85 [00:18<00:27,  1.84it/s] 42% 36/85 [00:18<00:26,  1.86it/s] 44% 37/85 [00:19<00:25,  1.88it/s] 45% 38/85 [00:19<00:24,  1.89it/s] 46% 39/85 [00:20<00:24,  1.86it/s] 47% 40/85 [00:20<00:23,  1.88it/s] 48% 41/85 [00:21<00:23,  1.89it/s] 49% 42/85 [00:21<00:22,  1.90it/s] 51% 43/85 [00:22<00:21,  1.91it/s] 52% 44/85 [00:22<00:21,  1.92it/s] 53% 45/85 [00:23<00:20,  1.91it/s] 54% 46/85 [00:23<00:20,  1.91it/s] 55% 47/85 [00:24<00:19,  1.91it/s] 56% 48/85 [00:24<00:19,  1.92it/s] 58% 49/85 [00:25<00:18,  1.91it/s] 59% 50/85 [00:26<00:18,  1.88it/s] 60% 51/85 [00:26<00:17,  1.90it/s] 61% 52/85 [00:27<00:17,  1.90it/s] 62% 53/85 [00:27<00:16,  1.91it/s] 64% 54/85 [00:28<00:16,  1.91it/s] 65% 55/85 [00:28<00:15,  1.91it/s] 66% 56/85 [00:29<00:15,  1.91it/s] 67% 57/85 [00:29<00:14,  1.90it/s] 68% 58/85 [00:30<00:14,  1.91it/s] 69% 59/85 [00:30<00:13,  1.91it/s] 71% 60/85 [00:31<00:13,  1.91it/s] 72% 61/85 [00:31<00:12,  1.91it/s] 73% 62/85 [00:32<00:12,  1.91it/s] 74% 63/85 [00:32<00:11,  1.91it/s] 75% 64/85 [00:33<00:11,  1.88it/s] 76% 65/85 [00:33<00:10,  1.89it/s] 78% 66/85 [00:34<00:10,  1.90it/s] 79% 67/85 [00:34<00:09,  1.87it/s] 80% 68/85 [00:35<00:09,  1.89it/s] 81% 69/85 [00:36<00:08,  1.90it/s] 82% 70/85 [00:36<00:07,  1.90it/s] 84% 71/85 [00:37<00:07,  1.90it/s] 85% 72/85 [00:37<00:06,  1.90it/s] 86% 73/85 [00:38<00:06,  1.91it/s] 87% 74/85 [00:38<00:05,  1.91it/s] 88% 75/85 [00:39<00:05,  1.91it/s] 89% 76/85 [00:39<00:04,  1.92it/s] 91% 77/85 [00:40<00:04,  1.92it/s] 92% 78/85 [00:40<00:03,  1.92it/s] 93% 79/85 [00:41<00:03,  1.90it/s] 94% 80/85 [00:41<00:02,  1.91it/s] 95% 81/85 [00:42<00:02,  1.91it/s] 96% 82/85 [00:42<00:01,  1.92it/s] 98% 83/85 [00:43<00:01,  1.92it/s] 99% 84/85 [00:43<00:00,  1.92it/s]100% 85/85 [00:44<00:00,  2.37it/s]01/16/2022 13:22:28 - INFO - utils_qa - Post-processing 10570 example predictions split into 10784 features.

  0% 0/10570 [00:00<?, ?it/s][A
  1% 53/10570 [00:00<00:19, 528.33it/s][A
  1% 113/10570 [00:00<00:18, 566.11it/s][A
  2% 173/10570 [00:00<00:17, 577.81it/s][A
  2% 231/10570 [00:00<00:19, 535.47it/s][A
  3% 285/10570 [00:00<00:22, 452.73it/s][A
  3% 343/10570 [00:00<00:20, 487.96it/s][A
  4% 399/10570 [00:00<00:20, 506.56it/s][A
  4% 452/10570 [00:00<00:20, 505.66it/s][A
  5% 504/10570 [00:00<00:19, 507.35it/s][A
  5% 558/10570 [00:01<00:19, 515.74it/s][A
  6% 612/10570 [00:01<00:19, 522.32it/s][A
  6% 669/10570 [00:01<00:18, 536.44it/s][A
  7% 724/10570 [00:01<00:18, 537.87it/s][A
  7% 778/10570 [00:01<00:18, 522.83it/s][A
  8% 831/10570 [00:01<00:19, 503.26it/s][A
  8% 885/10570 [00:01<00:18, 511.12it/s][A
  9% 937/10570 [00:01<00:19, 500.11it/s][A
  9% 988/10570 [00:01<00:19, 494.79it/s][A
 10% 1038/10570 [00:02<00:19, 493.89it/s][A
 10% 1090/10570 [00:02<00:18, 500.17it/s][A
 11% 1141/10570 [00:02<00:18, 501.13it/s][A
 11% 1199/10570 [00:02<00:17, 522.15it/s][A
 12% 1252/10570 [00:02<00:17, 522.99it/s][A
 12% 1308/10570 [00:02<00:17, 533.00it/s][A
 13% 1364/10570 [00:02<00:17, 540.47it/s][A
 13% 1419/10570 [00:02<00:17, 529.14it/s][A
 14% 1474/10570 [00:02<00:17, 533.45it/s][A
 14% 1528/10570 [00:02<00:17, 530.48it/s][A
 15% 1584/10570 [00:03<00:16, 538.57it/s][A
 16% 1641/10570 [00:03<00:16, 546.50it/s][A
 16% 1701/10570 [00:03<00:15, 559.63it/s][A
 17% 1757/10570 [00:03<00:15, 557.59it/s][A
 17% 1813/10570 [00:03<00:15, 551.44it/s][A
 18% 1869/10570 [00:03<00:15, 551.63it/s][A
 18% 1925/10570 [00:03<00:15, 551.96it/s][A
 19% 1982/10570 [00:03<00:15, 555.32it/s][A
 19% 2038/10570 [00:03<00:15, 552.07it/s][A
 20% 2094/10570 [00:03<00:15, 545.88it/s][A
 20% 2149/10570 [00:04<00:16, 512.26it/s][A
 21% 2203/10570 [00:04<00:16, 518.05it/s][A
 21% 2258/10570 [00:04<00:15, 525.98it/s][A
 22% 2313/10570 [00:04<00:15, 532.07it/s][A
 22% 2367/10570 [00:04<00:15, 529.90it/s][A
 23% 2421/10570 [00:04<00:15, 526.83it/s][A
 23% 2474/10570 [00:04<00:15, 517.40it/s][A
 24% 2526/10570 [00:04<00:16, 491.26it/s][A
 24% 2576/10570 [00:04<00:16, 486.38it/s][A
 25% 2630/10570 [00:05<00:15, 499.90it/s][A
 25% 2687/10570 [00:05<00:15, 518.28it/s][A
 26% 2742/10570 [00:05<00:14, 525.29it/s][A
 27% 2803/10570 [00:05<00:14, 548.48it/s][A
 27% 2858/10570 [00:05<00:14, 533.71it/s][A
 28% 2912/10570 [00:05<00:14, 531.68it/s][A
 28% 2966/10570 [00:05<00:14, 524.13it/s][A
 29% 3019/10570 [00:05<00:14, 525.20it/s][A
 29% 3072/10570 [00:05<00:14, 524.60it/s][A
 30% 3125/10570 [00:05<00:14, 518.59it/s][A
 30% 3177/10570 [00:06<00:14, 515.53it/s][A
 31% 3229/10570 [00:06<00:14, 514.57it/s][A
 31% 3281/10570 [00:06<00:14, 514.17it/s][A
 32% 3335/10570 [00:06<00:13, 521.54it/s][A
 32% 3388/10570 [00:06<00:13, 517.71it/s][A
 33% 3441/10570 [00:06<00:13, 520.33it/s][A
 33% 3494/10570 [00:06<00:13, 519.77it/s][A
 34% 3548/10570 [00:06<00:13, 525.53it/s][A
 34% 3601/10570 [00:06<00:13, 516.58it/s][A
 35% 3653/10570 [00:06<00:13, 515.08it/s][A
 35% 3705/10570 [00:07<00:13, 516.03it/s][A
 36% 3758/10570 [00:07<00:13, 520.01it/s][A
 36% 3812/10570 [00:07<00:12, 523.04it/s][A
 37% 3866/10570 [00:07<00:12, 526.65it/s][A
 37% 3919/10570 [00:07<00:12, 515.88it/s][A
 38% 3972/10570 [00:07<00:12, 519.91it/s][A
 38% 4025/10570 [00:07<00:12, 512.26it/s][A
 39% 4080/10570 [00:07<00:12, 521.18it/s][A
 39% 4133/10570 [00:07<00:12, 501.74it/s][A
 40% 4184/10570 [00:08<00:15, 417.31it/s][A
 40% 4229/10570 [00:08<00:16, 394.43it/s][A
 40% 4271/10570 [00:08<00:17, 361.62it/s][A
 41% 4309/10570 [00:08<00:19, 313.22it/s][A
 41% 4359/10570 [00:08<00:17, 355.83it/s][A
 42% 4407/10570 [00:08<00:15, 386.26it/s][A
 42% 4458/10570 [00:08<00:14, 418.32it/s][A
 43% 4506/10570 [00:08<00:13, 433.70it/s][A
 43% 4552/10570 [00:09<00:13, 437.75it/s][A
 44% 4598/10570 [00:09<00:13, 440.72it/s][A
 44% 4644/10570 [00:09<00:13, 445.15it/s][A
 44% 4693/10570 [00:09<00:12, 455.63it/s][A
 45% 4746/10570 [00:09<00:12, 475.77it/s][A
 45% 4794/10570 [00:09<00:12, 454.73it/s][A
 46% 4846/10570 [00:09<00:12, 471.56it/s][A
 46% 4894/10570 [00:09<00:12, 460.15it/s][A
 47% 4948/10570 [00:09<00:11, 480.05it/s][A
 47% 4997/10570 [00:09<00:11, 475.51it/s][A
 48% 5048/10570 [00:10<00:11, 481.76it/s][A
 48% 5100/10570 [00:10<00:11, 490.57it/s][A
 49% 5153/10570 [00:10<00:10, 501.06it/s][A
 49% 5205/10570 [00:10<00:10, 506.54it/s][A
 50% 5256/10570 [00:10<00:10, 500.30it/s][A
 50% 5311/10570 [00:10<00:10, 513.52it/s][A
 51% 5364/10570 [00:10<00:10, 515.58it/s][A
 51% 5417/10570 [00:10<00:09, 517.59it/s][A
 52% 5469/10570 [00:10<00:10, 489.72it/s][A
 52% 5519/10570 [00:11<00:10, 467.81it/s][A
 53% 5569/10570 [00:11<00:10, 476.22it/s][A
 53% 5618/10570 [00:11<00:10, 479.66it/s][A
 54% 5667/10570 [00:11<00:10, 471.26it/s][A
 54% 5721/10570 [00:11<00:09, 488.94it/s][A
 55% 5771/10570 [00:11<00:09, 483.22it/s][A
 55% 5820/10570 [00:11<00:09, 481.98it/s][A
 56% 5870/10570 [00:11<00:09, 484.53it/s][A
 56% 5921/10570 [00:11<00:09, 491.03it/s][A
 57% 5974/10570 [00:11<00:09, 500.14it/s][A
 57% 6025/10570 [00:12<00:09, 494.21it/s][A
 57% 6075/10570 [00:12<00:09, 488.87it/s][A
 58% 6127/10570 [00:12<00:08, 495.53it/s][A
 58% 6180/10570 [00:12<00:08, 502.75it/s][A
 59% 6231/10570 [00:12<00:09, 480.27it/s][A
 59% 6280/10570 [00:12<00:08, 482.98it/s][A
 60% 6329/10570 [00:12<00:08, 484.55it/s][A
 60% 6378/10570 [00:12<00:08, 471.60it/s][A
 61% 6431/10570 [00:12<00:08, 486.73it/s][A
 61% 6481/10570 [00:13<00:08, 489.25it/s][A
 62% 6531/10570 [00:13<00:08, 491.36it/s][A
 62% 6581/10570 [00:13<00:08, 492.50it/s][A
 63% 6632/10570 [00:13<00:07, 497.27it/s][A
 63% 6682/10570 [00:13<00:07, 496.63it/s][A
 64% 6732/10570 [00:13<00:07, 485.06it/s][A
 64% 6785/10570 [00:13<00:07, 497.09it/s][A
 65% 6835/10570 [00:13<00:07, 486.13it/s][A
 65% 6885/10570 [00:13<00:07, 487.84it/s][A
 66% 6934/10570 [00:13<00:07, 487.30it/s][A
 66% 6983/10570 [00:14<00:08, 444.13it/s][A
 66% 7029/10570 [00:14<00:08, 430.86it/s][A
 67% 7078/10570 [00:14<00:07, 446.40it/s][A
 67% 7129/10570 [00:14<00:07, 464.20it/s][A
 68% 7180/10570 [00:14<00:07, 474.60it/s][A
 68% 7228/10570 [00:14<00:07, 469.43it/s][A
 69% 7278/10570 [00:14<00:06, 475.81it/s][A
 69% 7331/10570 [00:14<00:06, 489.03it/s][A
 70% 7381/10570 [00:14<00:06, 465.20it/s][A
 70% 7429/10570 [00:15<00:06, 468.51it/s][A
 71% 7481/10570 [00:15<00:06, 481.16it/s][A
 71% 7534/10570 [00:15<00:06, 494.19it/s][A
 72% 7587/10570 [00:15<00:05, 502.62it/s][A
 72% 7640/10570 [00:15<00:05, 509.02it/s][A
 73% 7692/10570 [00:15<00:05, 492.64it/s][A
 73% 7744/10570 [00:15<00:05, 498.67it/s][A
 74% 7795/10570 [00:15<00:05, 479.48it/s][A
 74% 7845/10570 [00:15<00:05, 483.90it/s][A
 75% 7894/10570 [00:15<00:06, 440.71it/s][A
 75% 7939/10570 [00:16<00:06, 423.77it/s][A
 76% 7987/10570 [00:16<00:05, 438.75it/s][A
 76% 8038/10570 [00:16<00:05, 458.31it/s][A
 77% 8090/10570 [00:16<00:05, 473.78it/s][A
 77% 8140/10570 [00:16<00:05, 478.85it/s][A
 77% 8189/10570 [00:16<00:05, 464.55it/s][A
 78% 8236/10570 [00:16<00:05, 465.28it/s][A
 78% 8283/10570 [00:16<00:05, 451.56it/s][A
 79% 8333/10570 [00:16<00:04, 464.70it/s][A
 79% 8383/10570 [00:17<00:04, 474.57it/s][A
 80% 8437/10570 [00:17<00:04, 491.68it/s][A
 80% 8491/10570 [00:17<00:04, 504.40it/s][A
 81% 8544/10570 [00:17<00:03, 511.74it/s][A
 81% 8596/10570 [00:17<00:03, 509.97it/s][A
 82% 8648/10570 [00:17<00:03, 503.58it/s][A
 82% 8700/10570 [00:17<00:03, 507.61it/s][A
 83% 8751/10570 [00:17<00:03, 495.06it/s][A
 83% 8801/10570 [00:17<00:03, 486.24it/s][A
 84% 8852/10570 [00:17<00:03, 490.53it/s][A
 84% 8902/10570 [00:18<00:03, 488.39it/s][A
 85% 8953/10570 [00:18<00:03, 491.97it/s][A
 85% 9006/10570 [00:18<00:03, 502.12it/s][A
 86% 9057/10570 [00:18<00:03, 501.01it/s][A
 86% 9108/10570 [00:18<00:02, 501.91it/s][A
 87% 9159/10570 [00:18<00:02, 497.14it/s][A
 87% 9211/10570 [00:18<00:02, 503.47it/s][A
 88% 9264/10570 [00:18<00:02, 510.37it/s][A
 88% 9316/10570 [00:18<00:02, 508.76it/s][A
 89% 9367/10570 [00:18<00:02, 503.39it/s][A
 89% 9418/10570 [00:19<00:02, 496.83it/s][A
 90% 9472/10570 [00:19<00:02, 506.92it/s][A
 90% 9525/10570 [00:19<00:02, 513.45it/s][A
 91% 9578/10570 [00:19<00:01, 517.35it/s][A
 91% 9631/10570 [00:19<00:01, 518.56it/s][A
 92% 9683/10570 [00:19<00:01, 515.96it/s][A
 92% 9736/10570 [00:19<00:01, 518.63it/s][A
 93% 9788/10570 [00:19<00:01, 509.00it/s][A
 93% 9839/10570 [00:19<00:01, 508.07it/s][A
 94% 9892/10570 [00:20<00:01, 512.07it/s][A
 94% 9944/10570 [00:20<00:01, 506.97it/s][A
 95% 9995/10570 [00:20<00:01, 504.08it/s][A
 95% 10050/10570 [00:20<00:01, 515.05it/s][A
 96% 10102/10570 [00:20<00:00, 510.59it/s][A
 96% 10154/10570 [00:20<00:00, 507.47it/s][A
 97% 10205/10570 [00:20<00:00, 494.63it/s][A
 97% 10257/10570 [00:20<00:00, 501.55it/s][A
 98% 10308/10570 [00:20<00:00, 501.82it/s][A
 98% 10360/10570 [00:20<00:00, 504.31it/s][A
 98% 10411/10570 [00:21<00:00, 497.26it/s][A
 99% 10462/10570 [00:21<00:00, 498.08it/s][A
 99% 10515/10570 [00:21<00:00, 506.23it/s][A
100% 10570/10570 [00:21<00:00, 516.80it/s][A01/16/2022 13:22:49 - INFO - utils_qa - Saving predictions to /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/eval_predictions.json.
100% 10570/10570 [00:21<00:00, 495.21it/s]01/16/2022 13:22:49 - INFO - utils_qa - Saving nbest_preds to /data1/vchua/tld-poc-csr-dgx1-03//run10-bert-squad-cropped-qat-customkd-lt-5eph/eval_nbest_predictions.json.
01/16/2022 13:22:53 - INFO - datasets.metric - Removing /home/vchua/.cache/huggingface/metrics/squad/default/default_experiment-1-0.arrow

100% 85/85 [01:17<00:00,  1.10it/s]***** eval_XP metrics *****
  epoch            =     0.0
  eval_exact_match = 79.2621
  eval_f1          = 86.8136
  eval_samples     =   10784

wandb: Waiting for W&B process to finish, PID 44410... (success).
wandb: - 0.00MB of 0.00MB uploaded (0.00MB deduped)wandb: \ 0.00MB of 0.00MB uploaded (0.00MB deduped)wandb: | 0.00MB of 0.00MB uploaded (0.00MB deduped)wandb: / 0.00MB of 0.09MB uploaded (0.00MB deduped)wandb: - 0.00MB of 0.09MB uploaded (0.00MB deduped)wandb: \ 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: | 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: / 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: - 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: \ 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: | 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: / 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb: - 0.09MB of 0.09MB uploaded (0.00MB deduped)wandb:                                                                                
wandb: Run history:
wandb:                 eval/exact_match ▁
wandb:                          eval/f1 ▁
wandb:           train/compression_loss ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wandb:                      train/epoch ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wandb:                train/global_step ▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
wandb:                 train/label_loss ▃▅▇▇▆▆▆▆▃▄▆▁▂▃▅▅▇▂▄█▅▃▇▅▃
wandb:              train/learning_rate ▂▃▄▅▆▇████▇▇▆▆▅▅▄▃▃▂▂▁▁▁▁
wandb:                       train/loss ▂▃▄▃▃▂▃█▁▃▃▁▂▃▄▃▃▁▃▃▂▁▃▂▂
wandb:               train/teacher_loss ▂▃▃▃▂▁▃█▁▃▂▁▃▂▃▂▃▁▂▂▁▁▂▂▂
wandb:                 train/total_flos ▁
wandb:                 train/train_loss ▁
wandb:              train/train_runtime ▁
wandb:   train/train_samples_per_second ▁
wandb:     train/train_steps_per_second ▁
wandb: 
wandb: Run summary:
wandb:                 eval/exact_match 79.26206
wandb:                          eval/f1 86.81358
wandb:           train/compression_loss 0.0
wandb:                      train/epoch 0.0
wandb:                train/global_step 25
wandb:                 train/label_loss 0.30625
wandb:              train/learning_rate 0.0
wandb:                       train/loss 0.2891
wandb:               train/teacher_loss 0.28721
wandb:                 train/total_flos 18672028876800.0
wandb:                 train/train_loss 0.32935
wandb:              train/train_runtime 24.5019
wandb:   train/train_samples_per_second 16.325
wandb:     train/train_steps_per_second 1.02
wandb: 
wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Synced run10-bert-squad-cropped-qat-customkd-lt-5eph: https://wandb.ai/vchua/pruneofa-tl%20%28csr-dgx1-03%29/runs/3ald1d3c
wandb: Find logs at: ./wandb/run-20220116_132111-3ald1d3c/logs/debug.log
wandb: