Vui Seng Chua commited on Feb 23, 2022

Commit

cd6bbb4

•

1 Parent(s): 3b67ed8

Add ckpt@35K

Browse files

Files changed (17) hide show

.gitattributes +1 -0
checkpoint-35000/NNCFNetwork.onnx +3 -0
checkpoint-35000/config.json +25 -0
checkpoint-35000/onnx_sparsity.csv +147 -0
checkpoint-35000/onnx_sparsity.md +148 -0
checkpoint-35000/optimizer.pt +3 -0
checkpoint-35000/pytorch_model.bin +3 -0
checkpoint-35000/rng_state.pth +3 -0
checkpoint-35000/scheduler.pt +3 -0
checkpoint-35000/special_tokens_map.json +1 -0
checkpoint-35000/tokenizer.json +0 -0
checkpoint-35000/tokenizer_config.json +1 -0
checkpoint-35000/torch_mask_structures.csv +73 -0
checkpoint-35000/torch_mask_structures.md +74 -0
checkpoint-35000/trainer_state.json +3 -0
checkpoint-35000/training_args.bin +3 -0
checkpoint-35000/vocab.txt +0 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+trainer_state.json filter=lfs diff=lfs merge=lfs -text

checkpoint-35000/NNCFNetwork.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cafb8d66bd60b7c0660a3dbcb033936b27c7cf47f4c7ba854405f8682d039fe
+size 435667833

checkpoint-35000/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "NNCFNetwork"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.9.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

checkpoint-35000/onnx_sparsity.csv ADDED Viewed

	@@ -0,0 +1,147 @@

+,layer_id,shape,nparam,nnz,sparsity
+0,nncf_module.bert.encoder.layer.0.attention.self.query.bias,[768],768,192,0.75
+1,nncf_module.bert.encoder.layer.0.attention.self.query.weight,"[768, 768]",589824,147456,0.75
+2,nncf_module.bert.encoder.layer.0.attention.self.key.bias,[768],768,192,0.75
+3,nncf_module.bert.encoder.layer.0.attention.self.key.weight,"[768, 768]",589824,147456,0.75
+4,nncf_module.bert.encoder.layer.0.attention.self.value.bias,[768],768,192,0.75
+5,nncf_module.bert.encoder.layer.0.attention.self.value.weight,"[768, 768]",589824,147456,0.75
+6,nncf_module.bert.encoder.layer.0.attention.output.dense.bias,[768],768,768,0.0
+7,nncf_module.bert.encoder.layer.0.attention.output.dense.weight,"[768, 768]",589824,147456,0.75
+8,nncf_module.bert.encoder.layer.0.intermediate.dense.bias,[3072],3072,2940,0.04296875
+9,nncf_module.bert.encoder.layer.0.intermediate.dense.weight,"[3072, 768]",2359296,2257920,0.04296875
+10,nncf_module.bert.encoder.layer.0.output.dense.bias,[768],768,768,0.0
+11,nncf_module.bert.encoder.layer.0.output.dense.weight,"[768, 3072]",2359296,2257920,0.04296875
+12,nncf_module.bert.encoder.layer.1.attention.self.query.bias,[768],768,256,0.6666666666666667
+13,nncf_module.bert.encoder.layer.1.attention.self.query.weight,"[768, 768]",589824,196608,0.6666666666666667
+14,nncf_module.bert.encoder.layer.1.attention.self.key.bias,[768],768,256,0.6666666666666667
+15,nncf_module.bert.encoder.layer.1.attention.self.key.weight,"[768, 768]",589824,196608,0.6666666666666667
+16,nncf_module.bert.encoder.layer.1.attention.self.value.bias,[768],768,256,0.6666666666666667
+17,nncf_module.bert.encoder.layer.1.attention.self.value.weight,"[768, 768]",589824,196608,0.6666666666666667
+18,nncf_module.bert.encoder.layer.1.attention.output.dense.bias,[768],768,768,0.0
+19,nncf_module.bert.encoder.layer.1.attention.output.dense.weight,"[768, 768]",589824,196608,0.6666666666666667
+20,nncf_module.bert.encoder.layer.1.intermediate.dense.bias,[3072],3072,2923,0.04850260416666663
+21,nncf_module.bert.encoder.layer.1.intermediate.dense.weight,"[3072, 768]",2359296,2244864,0.04850260416666663
+22,nncf_module.bert.encoder.layer.1.output.dense.bias,[768],768,768,0.0
+23,nncf_module.bert.encoder.layer.1.output.dense.weight,"[768, 3072]",2359296,2244864,0.04850260416666663
+24,nncf_module.bert.encoder.layer.2.attention.self.query.bias,[768],768,768,0.0
+25,nncf_module.bert.encoder.layer.2.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+26,nncf_module.bert.encoder.layer.2.attention.self.key.bias,[768],768,768,0.0
+27,nncf_module.bert.encoder.layer.2.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+28,nncf_module.bert.encoder.layer.2.attention.self.value.bias,[768],768,768,0.0
+29,nncf_module.bert.encoder.layer.2.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+30,nncf_module.bert.encoder.layer.2.attention.output.dense.bias,[768],768,768,0.0
+31,nncf_module.bert.encoder.layer.2.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+32,nncf_module.bert.encoder.layer.2.intermediate.dense.bias,[3072],3072,2980,0.02994791666666663
+33,nncf_module.bert.encoder.layer.2.intermediate.dense.weight,"[3072, 768]",2359296,2288640,0.02994791666666663
+34,nncf_module.bert.encoder.layer.2.output.dense.bias,[768],768,768,0.0
+35,nncf_module.bert.encoder.layer.2.output.dense.weight,"[768, 3072]",2359296,2288640,0.02994791666666663
+36,nncf_module.bert.encoder.layer.3.attention.self.query.bias,[768],768,768,0.0
+37,nncf_module.bert.encoder.layer.3.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+38,nncf_module.bert.encoder.layer.3.attention.self.key.bias,[768],768,768,0.0
+39,nncf_module.bert.encoder.layer.3.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+40,nncf_module.bert.encoder.layer.3.attention.self.value.bias,[768],768,768,0.0
+41,nncf_module.bert.encoder.layer.3.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+42,nncf_module.bert.encoder.layer.3.attention.output.dense.bias,[768],768,768,0.0
+43,nncf_module.bert.encoder.layer.3.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+44,nncf_module.bert.encoder.layer.3.intermediate.dense.bias,[3072],3072,2957,0.03743489583333337
+45,nncf_module.bert.encoder.layer.3.intermediate.dense.weight,"[3072, 768]",2359296,2270976,0.03743489583333337
+46,nncf_module.bert.encoder.layer.3.output.dense.bias,[768],768,768,0.0
+47,nncf_module.bert.encoder.layer.3.output.dense.weight,"[768, 3072]",2359296,2270976,0.03743489583333337
+48,nncf_module.bert.encoder.layer.4.attention.self.query.bias,[768],768,768,0.0
+49,nncf_module.bert.encoder.layer.4.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+50,nncf_module.bert.encoder.layer.4.attention.self.key.bias,[768],768,768,0.0
+51,nncf_module.bert.encoder.layer.4.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+52,nncf_module.bert.encoder.layer.4.attention.self.value.bias,[768],768,768,0.0
+53,nncf_module.bert.encoder.layer.4.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+54,nncf_module.bert.encoder.layer.4.attention.output.dense.bias,[768],768,768,0.0
+55,nncf_module.bert.encoder.layer.4.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+56,nncf_module.bert.encoder.layer.4.intermediate.dense.bias,[3072],3072,2906,0.05403645833333337
+57,nncf_module.bert.encoder.layer.4.intermediate.dense.weight,"[3072, 768]",2359296,2231808,0.05403645833333337
+58,nncf_module.bert.encoder.layer.4.output.dense.bias,[768],768,768,0.0
+59,nncf_module.bert.encoder.layer.4.output.dense.weight,"[768, 3072]",2359296,2231808,0.05403645833333337
+60,nncf_module.bert.encoder.layer.5.attention.self.query.bias,[768],768,768,0.0
+61,nncf_module.bert.encoder.layer.5.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+62,nncf_module.bert.encoder.layer.5.attention.self.key.bias,[768],768,768,0.0
+63,nncf_module.bert.encoder.layer.5.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+64,nncf_module.bert.encoder.layer.5.attention.self.value.bias,[768],768,768,0.0
+65,nncf_module.bert.encoder.layer.5.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+66,nncf_module.bert.encoder.layer.5.attention.output.dense.bias,[768],768,768,0.0
+67,nncf_module.bert.encoder.layer.5.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+68,nncf_module.bert.encoder.layer.5.intermediate.dense.bias,[3072],3072,2865,0.0673828125
+69,nncf_module.bert.encoder.layer.5.intermediate.dense.weight,"[3072, 768]",2359296,2200320,0.0673828125
+70,nncf_module.bert.encoder.layer.5.output.dense.bias,[768],768,768,0.0
+71,nncf_module.bert.encoder.layer.5.output.dense.weight,"[768, 3072]",2359296,2200320,0.0673828125
+72,nncf_module.bert.encoder.layer.6.attention.self.query.bias,[768],768,768,0.0
+73,nncf_module.bert.encoder.layer.6.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+74,nncf_module.bert.encoder.layer.6.attention.self.key.bias,[768],768,768,0.0
+75,nncf_module.bert.encoder.layer.6.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+76,nncf_module.bert.encoder.layer.6.attention.self.value.bias,[768],768,768,0.0
+77,nncf_module.bert.encoder.layer.6.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+78,nncf_module.bert.encoder.layer.6.attention.output.dense.bias,[768],768,768,0.0
+79,nncf_module.bert.encoder.layer.6.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+80,nncf_module.bert.encoder.layer.6.intermediate.dense.bias,[3072],3072,2759,0.10188802083333337
+81,nncf_module.bert.encoder.layer.6.intermediate.dense.weight,"[3072, 768]",2359296,2118912,0.10188802083333337
+82,nncf_module.bert.encoder.layer.6.output.dense.bias,[768],768,768,0.0
+83,nncf_module.bert.encoder.layer.6.output.dense.weight,"[768, 3072]",2359296,2118912,0.10188802083333337
+84,nncf_module.bert.encoder.layer.7.attention.self.query.bias,[768],768,768,0.0
+85,nncf_module.bert.encoder.layer.7.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+86,nncf_module.bert.encoder.layer.7.attention.self.key.bias,[768],768,768,0.0
+87,nncf_module.bert.encoder.layer.7.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+88,nncf_module.bert.encoder.layer.7.attention.self.value.bias,[768],768,768,0.0
+89,nncf_module.bert.encoder.layer.7.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+90,nncf_module.bert.encoder.layer.7.attention.output.dense.bias,[768],768,768,0.0
+91,nncf_module.bert.encoder.layer.7.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+92,nncf_module.bert.encoder.layer.7.intermediate.dense.bias,[3072],3072,2569,0.16373697916666663
+93,nncf_module.bert.encoder.layer.7.intermediate.dense.weight,"[3072, 768]",2359296,1972992,0.16373697916666663
+94,nncf_module.bert.encoder.layer.7.output.dense.bias,[768],768,768,0.0
+95,nncf_module.bert.encoder.layer.7.output.dense.weight,"[768, 3072]",2359296,1972992,0.16373697916666663
+96,nncf_module.bert.encoder.layer.8.attention.self.query.bias,[768],768,256,0.6666666666666667
+97,nncf_module.bert.encoder.layer.8.attention.self.query.weight,"[768, 768]",589824,196608,0.6666666666666667
+98,nncf_module.bert.encoder.layer.8.attention.self.key.bias,[768],768,256,0.6666666666666667
+99,nncf_module.bert.encoder.layer.8.attention.self.key.weight,"[768, 768]",589824,196608,0.6666666666666667
+100,nncf_module.bert.encoder.layer.8.attention.self.value.bias,[768],768,256,0.6666666666666667
+101,nncf_module.bert.encoder.layer.8.attention.self.value.weight,"[768, 768]",589824,196608,0.6666666666666667
+102,nncf_module.bert.encoder.layer.8.attention.output.dense.bias,[768],768,768,0.0
+103,nncf_module.bert.encoder.layer.8.attention.output.dense.weight,"[768, 768]",589824,196608,0.6666666666666667
+104,nncf_module.bert.encoder.layer.8.intermediate.dense.bias,[3072],3072,2094,0.318359375
+105,nncf_module.bert.encoder.layer.8.intermediate.dense.weight,"[3072, 768]",2359296,1608192,0.318359375
+106,nncf_module.bert.encoder.layer.8.output.dense.bias,[768],768,768,0.0
+107,nncf_module.bert.encoder.layer.8.output.dense.weight,"[768, 3072]",2359296,1608192,0.318359375
+108,nncf_module.bert.encoder.layer.9.attention.self.query.bias,[768],768,768,0.0
+109,nncf_module.bert.encoder.layer.9.attention.self.query.weight,"[768, 768]",589824,589824,0.0
+110,nncf_module.bert.encoder.layer.9.attention.self.key.bias,[768],768,768,0.0
+111,nncf_module.bert.encoder.layer.9.attention.self.key.weight,"[768, 768]",589824,589824,0.0
+112,nncf_module.bert.encoder.layer.9.attention.self.value.bias,[768],768,768,0.0
+113,nncf_module.bert.encoder.layer.9.attention.self.value.weight,"[768, 768]",589824,589824,0.0
+114,nncf_module.bert.encoder.layer.9.attention.output.dense.bias,[768],768,768,0.0
+115,nncf_module.bert.encoder.layer.9.attention.output.dense.weight,"[768, 768]",589824,589824,0.0
+116,nncf_module.bert.encoder.layer.9.intermediate.dense.bias,[3072],3072,1009,0.6715494791666667
+117,nncf_module.bert.encoder.layer.9.intermediate.dense.weight,"[3072, 768]",2359296,774912,0.6715494791666667
+118,nncf_module.bert.encoder.layer.9.output.dense.bias,[768],768,768,0.0
+119,nncf_module.bert.encoder.layer.9.output.dense.weight,"[768, 3072]",2359296,774912,0.6715494791666667
+120,nncf_module.bert.encoder.layer.10.attention.self.query.bias,[768],768,320,0.5833333333333333
+121,nncf_module.bert.encoder.layer.10.attention.self.query.weight,"[768, 768]",589824,245760,0.5833333333333333
+122,nncf_module.bert.encoder.layer.10.attention.self.key.bias,[768],768,320,0.5833333333333333
+123,nncf_module.bert.encoder.layer.10.attention.self.key.weight,"[768, 768]",589824,245760,0.5833333333333333
+124,nncf_module.bert.encoder.layer.10.attention.self.value.bias,[768],768,320,0.5833333333333333
+125,nncf_module.bert.encoder.layer.10.attention.self.value.weight,"[768, 768]",589824,245760,0.5833333333333333
+126,nncf_module.bert.encoder.layer.10.attention.output.dense.bias,[768],768,768,0.0
+127,nncf_module.bert.encoder.layer.10.attention.output.dense.weight,"[768, 768]",589824,245760,0.5833333333333333
+128,nncf_module.bert.encoder.layer.10.intermediate.dense.bias,[3072],3072,743,0.7581380208333334
+129,nncf_module.bert.encoder.layer.10.intermediate.dense.weight,"[3072, 768]",2359296,570624,0.7581380208333334
+130,nncf_module.bert.encoder.layer.10.output.dense.bias,[768],768,768,0.0
+131,nncf_module.bert.encoder.layer.10.output.dense.weight,"[768, 3072]",2359296,570624,0.7581380208333334
+132,nncf_module.bert.encoder.layer.11.attention.self.query.bias,[768],768,192,0.75
+133,nncf_module.bert.encoder.layer.11.attention.self.query.weight,"[768, 768]",589824,147456,0.75
+134,nncf_module.bert.encoder.layer.11.attention.self.key.bias,[768],768,192,0.75
+135,nncf_module.bert.encoder.layer.11.attention.self.key.weight,"[768, 768]",589824,147456,0.75
+136,nncf_module.bert.encoder.layer.11.attention.self.value.bias,[768],768,192,0.75
+137,nncf_module.bert.encoder.layer.11.attention.self.value.weight,"[768, 768]",589824,147456,0.75
+138,nncf_module.bert.encoder.layer.11.attention.output.dense.bias,[768],768,768,0.0
+139,nncf_module.bert.encoder.layer.11.attention.output.dense.weight,"[768, 768]",589824,147456,0.75
+140,nncf_module.bert.encoder.layer.11.intermediate.dense.bias,[3072],3072,605,0.8030598958333334
+141,nncf_module.bert.encoder.layer.11.intermediate.dense.weight,"[3072, 768]",2359296,464640,0.8030598958333334
+142,nncf_module.bert.encoder.layer.11.output.dense.bias,[768],768,768,0.0
+143,nncf_module.bert.encoder.layer.11.output.dense.weight,"[768, 3072]",2359296,464640,0.8030598958333334
+144,nncf_module.qa_outputs.bias,[2],2,2,0.0
+145,nncf_module.qa_outputs.weight,"[2, 768]",1536,1536,0.0

checkpoint-35000/onnx_sparsity.md ADDED Viewed

	@@ -0,0 +1,148 @@

+|     | layer_id                                                        | shape       |   nparam |     nnz |   sparsity |
+|----:|:----------------------------------------------------------------|:------------|---------:|--------:|-----------:|
+|   0 | nncf_module.bert.encoder.layer.0.attention.self.query.bias      | [768]       |      768 |     192 |  0.75      |
+|   1 | nncf_module.bert.encoder.layer.0.attention.self.query.weight    | [768, 768]  |   589824 |  147456 |  0.75      |
+|   2 | nncf_module.bert.encoder.layer.0.attention.self.key.bias        | [768]       |      768 |     192 |  0.75      |
+|   3 | nncf_module.bert.encoder.layer.0.attention.self.key.weight      | [768, 768]  |   589824 |  147456 |  0.75      |
+|   4 | nncf_module.bert.encoder.layer.0.attention.self.value.bias      | [768]       |      768 |     192 |  0.75      |
+|   5 | nncf_module.bert.encoder.layer.0.attention.self.value.weight    | [768, 768]  |   589824 |  147456 |  0.75      |
+|   6 | nncf_module.bert.encoder.layer.0.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|   7 | nncf_module.bert.encoder.layer.0.attention.output.dense.weight  | [768, 768]  |   589824 |  147456 |  0.75      |
+|   8 | nncf_module.bert.encoder.layer.0.intermediate.dense.bias        | [3072]      |     3072 |    2940 |  0.0429688 |
+|   9 | nncf_module.bert.encoder.layer.0.intermediate.dense.weight      | [3072, 768] |  2359296 | 2257920 |  0.0429688 |
+|  10 | nncf_module.bert.encoder.layer.0.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  11 | nncf_module.bert.encoder.layer.0.output.dense.weight            | [768, 3072] |  2359296 | 2257920 |  0.0429688 |
+|  12 | nncf_module.bert.encoder.layer.1.attention.self.query.bias      | [768]       |      768 |     256 |  0.666667  |
+|  13 | nncf_module.bert.encoder.layer.1.attention.self.query.weight    | [768, 768]  |   589824 |  196608 |  0.666667  |
+|  14 | nncf_module.bert.encoder.layer.1.attention.self.key.bias        | [768]       |      768 |     256 |  0.666667  |
+|  15 | nncf_module.bert.encoder.layer.1.attention.self.key.weight      | [768, 768]  |   589824 |  196608 |  0.666667  |
+|  16 | nncf_module.bert.encoder.layer.1.attention.self.value.bias      | [768]       |      768 |     256 |  0.666667  |
+|  17 | nncf_module.bert.encoder.layer.1.attention.self.value.weight    | [768, 768]  |   589824 |  196608 |  0.666667  |
+|  18 | nncf_module.bert.encoder.layer.1.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  19 | nncf_module.bert.encoder.layer.1.attention.output.dense.weight  | [768, 768]  |   589824 |  196608 |  0.666667  |
+|  20 | nncf_module.bert.encoder.layer.1.intermediate.dense.bias        | [3072]      |     3072 |    2923 |  0.0485026 |
+|  21 | nncf_module.bert.encoder.layer.1.intermediate.dense.weight      | [3072, 768] |  2359296 | 2244864 |  0.0485026 |
+|  22 | nncf_module.bert.encoder.layer.1.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  23 | nncf_module.bert.encoder.layer.1.output.dense.weight            | [768, 3072] |  2359296 | 2244864 |  0.0485026 |
+|  24 | nncf_module.bert.encoder.layer.2.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+|  25 | nncf_module.bert.encoder.layer.2.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  26 | nncf_module.bert.encoder.layer.2.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+|  27 | nncf_module.bert.encoder.layer.2.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+|  28 | nncf_module.bert.encoder.layer.2.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+|  29 | nncf_module.bert.encoder.layer.2.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  30 | nncf_module.bert.encoder.layer.2.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  31 | nncf_module.bert.encoder.layer.2.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+|  32 | nncf_module.bert.encoder.layer.2.intermediate.dense.bias        | [3072]      |     3072 |    2980 |  0.0299479 |
+|  33 | nncf_module.bert.encoder.layer.2.intermediate.dense.weight      | [3072, 768] |  2359296 | 2288640 |  0.0299479 |
+|  34 | nncf_module.bert.encoder.layer.2.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  35 | nncf_module.bert.encoder.layer.2.output.dense.weight            | [768, 3072] |  2359296 | 2288640 |  0.0299479 |
+|  36 | nncf_module.bert.encoder.layer.3.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+|  37 | nncf_module.bert.encoder.layer.3.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  38 | nncf_module.bert.encoder.layer.3.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+|  39 | nncf_module.bert.encoder.layer.3.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+|  40 | nncf_module.bert.encoder.layer.3.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+|  41 | nncf_module.bert.encoder.layer.3.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  42 | nncf_module.bert.encoder.layer.3.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  43 | nncf_module.bert.encoder.layer.3.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+|  44 | nncf_module.bert.encoder.layer.3.intermediate.dense.bias        | [3072]      |     3072 |    2957 |  0.0374349 |
+|  45 | nncf_module.bert.encoder.layer.3.intermediate.dense.weight      | [3072, 768] |  2359296 | 2270976 |  0.0374349 |
+|  46 | nncf_module.bert.encoder.layer.3.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  47 | nncf_module.bert.encoder.layer.3.output.dense.weight            | [768, 3072] |  2359296 | 2270976 |  0.0374349 |
+|  48 | nncf_module.bert.encoder.layer.4.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+|  49 | nncf_module.bert.encoder.layer.4.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  50 | nncf_module.bert.encoder.layer.4.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+|  51 | nncf_module.bert.encoder.layer.4.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+|  52 | nncf_module.bert.encoder.layer.4.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+|  53 | nncf_module.bert.encoder.layer.4.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  54 | nncf_module.bert.encoder.layer.4.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  55 | nncf_module.bert.encoder.layer.4.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+|  56 | nncf_module.bert.encoder.layer.4.intermediate.dense.bias        | [3072]      |     3072 |    2906 |  0.0540365 |
+|  57 | nncf_module.bert.encoder.layer.4.intermediate.dense.weight      | [3072, 768] |  2359296 | 2231808 |  0.0540365 |
+|  58 | nncf_module.bert.encoder.layer.4.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  59 | nncf_module.bert.encoder.layer.4.output.dense.weight            | [768, 3072] |  2359296 | 2231808 |  0.0540365 |
+|  60 | nncf_module.bert.encoder.layer.5.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+|  61 | nncf_module.bert.encoder.layer.5.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  62 | nncf_module.bert.encoder.layer.5.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+|  63 | nncf_module.bert.encoder.layer.5.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+|  64 | nncf_module.bert.encoder.layer.5.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+|  65 | nncf_module.bert.encoder.layer.5.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  66 | nncf_module.bert.encoder.layer.5.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  67 | nncf_module.bert.encoder.layer.5.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+|  68 | nncf_module.bert.encoder.layer.5.intermediate.dense.bias        | [3072]      |     3072 |    2865 |  0.0673828 |
+|  69 | nncf_module.bert.encoder.layer.5.intermediate.dense.weight      | [3072, 768] |  2359296 | 2200320 |  0.0673828 |
+|  70 | nncf_module.bert.encoder.layer.5.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  71 | nncf_module.bert.encoder.layer.5.output.dense.weight            | [768, 3072] |  2359296 | 2200320 |  0.0673828 |
+|  72 | nncf_module.bert.encoder.layer.6.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+|  73 | nncf_module.bert.encoder.layer.6.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  74 | nncf_module.bert.encoder.layer.6.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+|  75 | nncf_module.bert.encoder.layer.6.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+|  76 | nncf_module.bert.encoder.layer.6.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+|  77 | nncf_module.bert.encoder.layer.6.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  78 | nncf_module.bert.encoder.layer.6.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  79 | nncf_module.bert.encoder.layer.6.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+|  80 | nncf_module.bert.encoder.layer.6.intermediate.dense.bias        | [3072]      |     3072 |    2759 |  0.101888  |
+|  81 | nncf_module.bert.encoder.layer.6.intermediate.dense.weight      | [3072, 768] |  2359296 | 2118912 |  0.101888  |
+|  82 | nncf_module.bert.encoder.layer.6.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  83 | nncf_module.bert.encoder.layer.6.output.dense.weight            | [768, 3072] |  2359296 | 2118912 |  0.101888  |
+|  84 | nncf_module.bert.encoder.layer.7.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+|  85 | nncf_module.bert.encoder.layer.7.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  86 | nncf_module.bert.encoder.layer.7.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+|  87 | nncf_module.bert.encoder.layer.7.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+|  88 | nncf_module.bert.encoder.layer.7.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+|  89 | nncf_module.bert.encoder.layer.7.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+|  90 | nncf_module.bert.encoder.layer.7.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+|  91 | nncf_module.bert.encoder.layer.7.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+|  92 | nncf_module.bert.encoder.layer.7.intermediate.dense.bias        | [3072]      |     3072 |    2569 |  0.163737  |
+|  93 | nncf_module.bert.encoder.layer.7.intermediate.dense.weight      | [3072, 768] |  2359296 | 1972992 |  0.163737  |
+|  94 | nncf_module.bert.encoder.layer.7.output.dense.bias              | [768]       |      768 |     768 |  0         |
+|  95 | nncf_module.bert.encoder.layer.7.output.dense.weight            | [768, 3072] |  2359296 | 1972992 |  0.163737  |
+|  96 | nncf_module.bert.encoder.layer.8.attention.self.query.bias      | [768]       |      768 |     256 |  0.666667  |
+|  97 | nncf_module.bert.encoder.layer.8.attention.self.query.weight    | [768, 768]  |   589824 |  196608 |  0.666667  |
+|  98 | nncf_module.bert.encoder.layer.8.attention.self.key.bias        | [768]       |      768 |     256 |  0.666667  |
+|  99 | nncf_module.bert.encoder.layer.8.attention.self.key.weight      | [768, 768]  |   589824 |  196608 |  0.666667  |
+| 100 | nncf_module.bert.encoder.layer.8.attention.self.value.bias      | [768]       |      768 |     256 |  0.666667  |
+| 101 | nncf_module.bert.encoder.layer.8.attention.self.value.weight    | [768, 768]  |   589824 |  196608 |  0.666667  |
+| 102 | nncf_module.bert.encoder.layer.8.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+| 103 | nncf_module.bert.encoder.layer.8.attention.output.dense.weight  | [768, 768]  |   589824 |  196608 |  0.666667  |
+| 104 | nncf_module.bert.encoder.layer.8.intermediate.dense.bias        | [3072]      |     3072 |    2094 |  0.318359  |
+| 105 | nncf_module.bert.encoder.layer.8.intermediate.dense.weight      | [3072, 768] |  2359296 | 1608192 |  0.318359  |
+| 106 | nncf_module.bert.encoder.layer.8.output.dense.bias              | [768]       |      768 |     768 |  0         |
+| 107 | nncf_module.bert.encoder.layer.8.output.dense.weight            | [768, 3072] |  2359296 | 1608192 |  0.318359  |
+| 108 | nncf_module.bert.encoder.layer.9.attention.self.query.bias      | [768]       |      768 |     768 |  0         |
+| 109 | nncf_module.bert.encoder.layer.9.attention.self.query.weight    | [768, 768]  |   589824 |  589824 |  0         |
+| 110 | nncf_module.bert.encoder.layer.9.attention.self.key.bias        | [768]       |      768 |     768 |  0         |
+| 111 | nncf_module.bert.encoder.layer.9.attention.self.key.weight      | [768, 768]  |   589824 |  589824 |  0         |
+| 112 | nncf_module.bert.encoder.layer.9.attention.self.value.bias      | [768]       |      768 |     768 |  0         |
+| 113 | nncf_module.bert.encoder.layer.9.attention.self.value.weight    | [768, 768]  |   589824 |  589824 |  0         |
+| 114 | nncf_module.bert.encoder.layer.9.attention.output.dense.bias    | [768]       |      768 |     768 |  0         |
+| 115 | nncf_module.bert.encoder.layer.9.attention.output.dense.weight  | [768, 768]  |   589824 |  589824 |  0         |
+| 116 | nncf_module.bert.encoder.layer.9.intermediate.dense.bias        | [3072]      |     3072 |    1009 |  0.671549  |
+| 117 | nncf_module.bert.encoder.layer.9.intermediate.dense.weight      | [3072, 768] |  2359296 |  774912 |  0.671549  |
+| 118 | nncf_module.bert.encoder.layer.9.output.dense.bias              | [768]       |      768 |     768 |  0         |
+| 119 | nncf_module.bert.encoder.layer.9.output.dense.weight            | [768, 3072] |  2359296 |  774912 |  0.671549  |
+| 120 | nncf_module.bert.encoder.layer.10.attention.self.query.bias     | [768]       |      768 |     320 |  0.583333  |
+| 121 | nncf_module.bert.encoder.layer.10.attention.self.query.weight   | [768, 768]  |   589824 |  245760 |  0.583333  |
+| 122 | nncf_module.bert.encoder.layer.10.attention.self.key.bias       | [768]       |      768 |     320 |  0.583333  |
+| 123 | nncf_module.bert.encoder.layer.10.attention.self.key.weight     | [768, 768]  |   589824 |  245760 |  0.583333  |
+| 124 | nncf_module.bert.encoder.layer.10.attention.self.value.bias     | [768]       |      768 |     320 |  0.583333  |
+| 125 | nncf_module.bert.encoder.layer.10.attention.self.value.weight   | [768, 768]  |   589824 |  245760 |  0.583333  |
+| 126 | nncf_module.bert.encoder.layer.10.attention.output.dense.bias   | [768]       |      768 |     768 |  0         |
+| 127 | nncf_module.bert.encoder.layer.10.attention.output.dense.weight | [768, 768]  |   589824 |  245760 |  0.583333  |
+| 128 | nncf_module.bert.encoder.layer.10.intermediate.dense.bias       | [3072]      |     3072 |     743 |  0.758138  |
+| 129 | nncf_module.bert.encoder.layer.10.intermediate.dense.weight     | [3072, 768] |  2359296 |  570624 |  0.758138  |
+| 130 | nncf_module.bert.encoder.layer.10.output.dense.bias             | [768]       |      768 |     768 |  0         |
+| 131 | nncf_module.bert.encoder.layer.10.output.dense.weight           | [768, 3072] |  2359296 |  570624 |  0.758138  |
+| 132 | nncf_module.bert.encoder.layer.11.attention.self.query.bias     | [768]       |      768 |     192 |  0.75      |
+| 133 | nncf_module.bert.encoder.layer.11.attention.self.query.weight   | [768, 768]  |   589824 |  147456 |  0.75      |
+| 134 | nncf_module.bert.encoder.layer.11.attention.self.key.bias       | [768]       |      768 |     192 |  0.75      |
+| 135 | nncf_module.bert.encoder.layer.11.attention.self.key.weight     | [768, 768]  |   589824 |  147456 |  0.75      |
+| 136 | nncf_module.bert.encoder.layer.11.attention.self.value.bias     | [768]       |      768 |     192 |  0.75      |
+| 137 | nncf_module.bert.encoder.layer.11.attention.self.value.weight   | [768, 768]  |   589824 |  147456 |  0.75      |
+| 138 | nncf_module.bert.encoder.layer.11.attention.output.dense.bias   | [768]       |      768 |     768 |  0         |
+| 139 | nncf_module.bert.encoder.layer.11.attention.output.dense.weight | [768, 768]  |   589824 |  147456 |  0.75      |
+| 140 | nncf_module.bert.encoder.layer.11.intermediate.dense.bias       | [3072]      |     3072 |     605 |  0.80306   |
+| 141 | nncf_module.bert.encoder.layer.11.intermediate.dense.weight     | [3072, 768] |  2359296 |  464640 |  0.80306   |
+| 142 | nncf_module.bert.encoder.layer.11.output.dense.bias             | [768]       |      768 |     768 |  0         |
+| 143 | nncf_module.bert.encoder.layer.11.output.dense.weight           | [768, 3072] |  2359296 |  464640 |  0.80306   |
+| 144 | nncf_module.qa_outputs.bias                                     | [2]         |        2 |       2 |  0         |
+| 145 | nncf_module.qa_outputs.weight                                   | [2, 768]    |     1536 |    1536 |  0         |

checkpoint-35000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c69179b142350c0900215f2171560e60ce8071203a2a11acc717b62b70da947
+size 872456613

checkpoint-35000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c65c8c3531e309f7ee0524ab8b7e4e35d401ee8b24f40c0fe91f08b4d377ef68
+size 776435185

checkpoint-35000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53f9d1332dd8b0177c10a70c94f642ef694da36859eccb739ae846f1b2fc39d8
+size 14503

checkpoint-35000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:883abecad344373d12706b2d3326e6572c4bb5d25ea9ce27fa2f2c6f3496b303
+size 623

checkpoint-35000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

checkpoint-35000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-35000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}

checkpoint-35000/torch_mask_structures.csv ADDED Viewed

	@@ -0,0 +1,73 @@

+,pt_module_name,block_id,weight_shape,prune_w_shape,bias_shape,prune_b_shape,head_id_to_keep,nncf_graph_node
+0,nncf_module.bert.encoder.layer.0.attention.output.dense,0,"(768, 768)","(768, 192)","(768,)","(768,)","[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+1,nncf_module.bert.encoder.layer.0.attention.self.value,0,"(768, 768)","(192, 768)","(768,)","(192,)","[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+2,nncf_module.bert.encoder.layer.0.attention.self.key,0,"(768, 768)","(192, 768)","(768,)","(192,)","[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+3,nncf_module.bert.encoder.layer.0.attention.self.query,0,"(768, 768)","(192, 768)","(768,)","(192,)","[3, 8, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+4,nncf_module.bert.encoder.layer.0.output.dense,1,"(768, 3072)","(768, 2940)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
+5,nncf_module.bert.encoder.layer.0.intermediate.dense,1,"(3072, 768)","(2940, 768)","(3072,)","(2940,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+6,nncf_module.bert.encoder.layer.1.attention.self.key,2,"(768, 768)","(256, 768)","(768,)","(256,)","[4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+7,nncf_module.bert.encoder.layer.1.attention.self.query,2,"(768, 768)","(256, 768)","(768,)","(256,)","[4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+8,nncf_module.bert.encoder.layer.1.attention.output.dense,2,"(768, 768)","(768, 256)","(768,)","(768,)","[4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+9,nncf_module.bert.encoder.layer.1.attention.self.value,2,"(768, 768)","(256, 768)","(768,)","(256,)","[4, 7, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+10,nncf_module.bert.encoder.layer.1.intermediate.dense,3,"(3072, 768)","(2923, 768)","(3072,)","(2923,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+11,nncf_module.bert.encoder.layer.1.output.dense,3,"(768, 3072)","(768, 2923)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
+12,nncf_module.bert.encoder.layer.2.attention.self.value,4,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+13,nncf_module.bert.encoder.layer.2.attention.output.dense,4,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+14,nncf_module.bert.encoder.layer.2.attention.self.key,4,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+15,nncf_module.bert.encoder.layer.2.attention.self.query,4,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+16,nncf_module.bert.encoder.layer.2.output.dense,5,"(768, 3072)","(768, 2980)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
+17,nncf_module.bert.encoder.layer.2.intermediate.dense,5,"(3072, 768)","(2980, 768)","(3072,)","(2980,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+18,nncf_module.bert.encoder.layer.3.attention.output.dense,6,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+19,nncf_module.bert.encoder.layer.3.attention.self.value,6,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+20,nncf_module.bert.encoder.layer.3.attention.self.key,6,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+21,nncf_module.bert.encoder.layer.3.attention.self.query,6,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+22,nncf_module.bert.encoder.layer.3.intermediate.dense,7,"(3072, 768)","(2957, 768)","(3072,)","(2957,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+23,nncf_module.bert.encoder.layer.3.output.dense,7,"(768, 3072)","(768, 2957)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
+24,nncf_module.bert.encoder.layer.4.attention.self.key,8,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+25,nncf_module.bert.encoder.layer.4.attention.self.value,8,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+26,nncf_module.bert.encoder.layer.4.attention.output.dense,8,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+27,nncf_module.bert.encoder.layer.4.attention.self.query,8,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+28,nncf_module.bert.encoder.layer.4.intermediate.dense,9,"(3072, 768)","(2906, 768)","(3072,)","(2906,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+29,nncf_module.bert.encoder.layer.4.output.dense,9,"(768, 3072)","(768, 2906)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
+30,nncf_module.bert.encoder.layer.5.attention.self.value,10,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+31,nncf_module.bert.encoder.layer.5.attention.output.dense,10,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+32,nncf_module.bert.encoder.layer.5.attention.self.query,10,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+33,nncf_module.bert.encoder.layer.5.attention.self.key,10,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+34,nncf_module.bert.encoder.layer.5.intermediate.dense,11,"(3072, 768)","(2865, 768)","(3072,)","(2865,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+35,nncf_module.bert.encoder.layer.5.output.dense,11,"(768, 3072)","(768, 2865)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
+36,nncf_module.bert.encoder.layer.6.attention.self.value,12,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+37,nncf_module.bert.encoder.layer.6.attention.self.query,12,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+38,nncf_module.bert.encoder.layer.6.attention.self.key,12,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+39,nncf_module.bert.encoder.layer.6.attention.output.dense,12,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+40,nncf_module.bert.encoder.layer.6.output.dense,13,"(768, 3072)","(768, 2759)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
+41,nncf_module.bert.encoder.layer.6.intermediate.dense,13,"(3072, 768)","(2759, 768)","(3072,)","(2759,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+42,nncf_module.bert.encoder.layer.7.attention.self.query,14,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+43,nncf_module.bert.encoder.layer.7.attention.self.key,14,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+44,nncf_module.bert.encoder.layer.7.attention.self.value,14,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+45,nncf_module.bert.encoder.layer.7.attention.output.dense,14,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+46,nncf_module.bert.encoder.layer.7.intermediate.dense,15,"(3072, 768)","(2569, 768)","(3072,)","(2569,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+47,nncf_module.bert.encoder.layer.7.output.dense,15,"(768, 3072)","(768, 2569)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
+48,nncf_module.bert.encoder.layer.8.attention.self.key,16,"(768, 768)","(256, 768)","(768,)","(256,)","[1, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+49,nncf_module.bert.encoder.layer.8.attention.self.value,16,"(768, 768)","(256, 768)","(768,)","(256,)","[1, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+50,nncf_module.bert.encoder.layer.8.attention.self.query,16,"(768, 768)","(256, 768)","(768,)","(256,)","[1, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+51,nncf_module.bert.encoder.layer.8.attention.output.dense,16,"(768, 768)","(768, 256)","(768,)","(768,)","[1, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+52,nncf_module.bert.encoder.layer.8.output.dense,17,"(768, 3072)","(768, 2094)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
+53,nncf_module.bert.encoder.layer.8.intermediate.dense,17,"(3072, 768)","(2094, 768)","(3072,)","(2094,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+54,nncf_module.bert.encoder.layer.9.attention.self.query,18,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+55,nncf_module.bert.encoder.layer.9.attention.output.dense,18,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+56,nncf_module.bert.encoder.layer.9.attention.self.value,18,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+57,nncf_module.bert.encoder.layer.9.attention.self.key,18,"(768, 768)","(768, 768)","(768,)","(768,)","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+58,nncf_module.bert.encoder.layer.9.intermediate.dense,19,"(3072, 768)","(1009, 768)","(3072,)","(1009,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+59,nncf_module.bert.encoder.layer.9.output.dense,19,"(768, 3072)","(768, 1009)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
+60,nncf_module.bert.encoder.layer.10.attention.self.key,20,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+61,nncf_module.bert.encoder.layer.10.attention.self.value,20,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+62,nncf_module.bert.encoder.layer.10.attention.self.query,20,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+63,nncf_module.bert.encoder.layer.10.attention.output.dense,20,"(768, 768)","(768, 320)","(768,)","(768,)","[0, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+64,nncf_module.bert.encoder.layer.10.output.dense,21,"(768, 3072)","(768, 743)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
+65,nncf_module.bert.encoder.layer.10.intermediate.dense,21,"(3072, 768)","(743, 768)","(3072,)","(743,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+66,nncf_module.bert.encoder.layer.11.attention.output.dense,22,"(768, 768)","(768, 192)","(768,)","(768,)","[1, 2, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+67,nncf_module.bert.encoder.layer.11.attention.self.key,22,"(768, 768)","(192, 768)","(768,)","(192,)","[1, 2, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+68,nncf_module.bert.encoder.layer.11.attention.self.value,22,"(768, 768)","(192, 768)","(768,)","(192,)","[1, 2, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+69,nncf_module.bert.encoder.layer.11.attention.self.query,22,"(768, 768)","(192, 768)","(768,)","(192,)","[1, 2, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+70,nncf_module.bert.encoder.layer.11.intermediate.dense,23,"(3072, 768)","(605, 768)","(3072,)","(605,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+71,nncf_module.bert.encoder.layer.11.output.dense,23,"(768, 3072)","(768, 605)","(768,)","(768,)",skip reporting,BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0

checkpoint-35000/torch_mask_structures.md ADDED Viewed

	@@ -0,0 +1,74 @@

+|    | pt_module_name                                           |   block_id | weight_shape   | prune_w_shape   | bias_shape   | prune_b_shape   | head_id_to_keep                        | nncf_graph_node                                                                                                                                                           |
+|---:|:---------------------------------------------------------|-----------:|:---------------|:----------------|:-------------|:----------------|:---------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  0 | nncf_module.bert.encoder.layer.0.attention.output.dense  |          0 | (768, 768)     | (768, 192)      | (768,)       | (768,)          | [3, 8, 10]                             | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+|  1 | nncf_module.bert.encoder.layer.0.attention.self.value    |          0 | (768, 768)     | (192, 768)      | (768,)       | (192,)          | [3, 8, 10]                             | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+|  2 | nncf_module.bert.encoder.layer.0.attention.self.key      |          0 | (768, 768)     | (192, 768)      | (768,)       | (192,)          | [3, 8, 10]                             | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+|  3 | nncf_module.bert.encoder.layer.0.attention.self.query    |          0 | (768, 768)     | (192, 768)      | (768,)       | (192,)          | [3, 8, 10]                             | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+|  4 | nncf_module.bert.encoder.layer.0.output.dense            |          1 | (768, 3072)    | (768, 2940)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+|  5 | nncf_module.bert.encoder.layer.0.intermediate.dense      |          1 | (3072, 768)    | (2940, 768)     | (3072,)      | (2940,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+|  6 | nncf_module.bert.encoder.layer.1.attention.self.key      |          2 | (768, 768)     | (256, 768)      | (768,)       | (256,)          | [4, 7, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+|  7 | nncf_module.bert.encoder.layer.1.attention.self.query    |          2 | (768, 768)     | (256, 768)      | (768,)       | (256,)          | [4, 7, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+|  8 | nncf_module.bert.encoder.layer.1.attention.output.dense  |          2 | (768, 768)     | (768, 256)      | (768,)       | (768,)          | [4, 7, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+|  9 | nncf_module.bert.encoder.layer.1.attention.self.value    |          2 | (768, 768)     | (256, 768)      | (768,)       | (256,)          | [4, 7, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 10 | nncf_module.bert.encoder.layer.1.intermediate.dense      |          3 | (3072, 768)    | (2923, 768)     | (3072,)      | (2923,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 11 | nncf_module.bert.encoder.layer.1.output.dense            |          3 | (768, 3072)    | (768, 2923)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 12 | nncf_module.bert.encoder.layer.2.attention.self.value    |          4 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 13 | nncf_module.bert.encoder.layer.2.attention.output.dense  |          4 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 14 | nncf_module.bert.encoder.layer.2.attention.self.key      |          4 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 15 | nncf_module.bert.encoder.layer.2.attention.self.query    |          4 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 16 | nncf_module.bert.encoder.layer.2.output.dense            |          5 | (768, 3072)    | (768, 2980)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 17 | nncf_module.bert.encoder.layer.2.intermediate.dense      |          5 | (3072, 768)    | (2980, 768)     | (3072,)      | (2980,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 18 | nncf_module.bert.encoder.layer.3.attention.output.dense  |          6 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 19 | nncf_module.bert.encoder.layer.3.attention.self.value    |          6 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 20 | nncf_module.bert.encoder.layer.3.attention.self.key      |          6 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 21 | nncf_module.bert.encoder.layer.3.attention.self.query    |          6 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 22 | nncf_module.bert.encoder.layer.3.intermediate.dense      |          7 | (3072, 768)    | (2957, 768)     | (3072,)      | (2957,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 23 | nncf_module.bert.encoder.layer.3.output.dense            |          7 | (768, 3072)    | (768, 2957)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 24 | nncf_module.bert.encoder.layer.4.attention.self.key      |          8 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 25 | nncf_module.bert.encoder.layer.4.attention.self.value    |          8 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 26 | nncf_module.bert.encoder.layer.4.attention.output.dense  |          8 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 27 | nncf_module.bert.encoder.layer.4.attention.self.query    |          8 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 28 | nncf_module.bert.encoder.layer.4.intermediate.dense      |          9 | (3072, 768)    | (2906, 768)     | (3072,)      | (2906,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 29 | nncf_module.bert.encoder.layer.4.output.dense            |          9 | (768, 3072)    | (768, 2906)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 30 | nncf_module.bert.encoder.layer.5.attention.self.value    |         10 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 31 | nncf_module.bert.encoder.layer.5.attention.output.dense  |         10 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 32 | nncf_module.bert.encoder.layer.5.attention.self.query    |         10 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 33 | nncf_module.bert.encoder.layer.5.attention.self.key      |         10 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 34 | nncf_module.bert.encoder.layer.5.intermediate.dense      |         11 | (3072, 768)    | (2865, 768)     | (3072,)      | (2865,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 35 | nncf_module.bert.encoder.layer.5.output.dense            |         11 | (768, 3072)    | (768, 2865)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 36 | nncf_module.bert.encoder.layer.6.attention.self.value    |         12 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 37 | nncf_module.bert.encoder.layer.6.attention.self.query    |         12 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 38 | nncf_module.bert.encoder.layer.6.attention.self.key      |         12 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 39 | nncf_module.bert.encoder.layer.6.attention.output.dense  |         12 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 40 | nncf_module.bert.encoder.layer.6.output.dense            |         13 | (768, 3072)    | (768, 2759)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 41 | nncf_module.bert.encoder.layer.6.intermediate.dense      |         13 | (3072, 768)    | (2759, 768)     | (3072,)      | (2759,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 42 | nncf_module.bert.encoder.layer.7.attention.self.query    |         14 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 43 | nncf_module.bert.encoder.layer.7.attention.self.key      |         14 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 44 | nncf_module.bert.encoder.layer.7.attention.self.value    |         14 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 45 | nncf_module.bert.encoder.layer.7.attention.output.dense  |         14 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 46 | nncf_module.bert.encoder.layer.7.intermediate.dense      |         15 | (3072, 768)    | (2569, 768)     | (3072,)      | (2569,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 47 | nncf_module.bert.encoder.layer.7.output.dense            |         15 | (768, 3072)    | (768, 2569)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 48 | nncf_module.bert.encoder.layer.8.attention.self.key      |         16 | (768, 768)     | (256, 768)      | (768,)       | (256,)          | [1, 9, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 49 | nncf_module.bert.encoder.layer.8.attention.self.value    |         16 | (768, 768)     | (256, 768)      | (768,)       | (256,)          | [1, 9, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 50 | nncf_module.bert.encoder.layer.8.attention.self.query    |         16 | (768, 768)     | (256, 768)      | (768,)       | (256,)          | [1, 9, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 51 | nncf_module.bert.encoder.layer.8.attention.output.dense  |         16 | (768, 768)     | (768, 256)      | (768,)       | (768,)          | [1, 9, 10, 11]                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 52 | nncf_module.bert.encoder.layer.8.output.dense            |         17 | (768, 3072)    | (768, 2094)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 53 | nncf_module.bert.encoder.layer.8.intermediate.dense      |         17 | (3072, 768)    | (2094, 768)     | (3072,)      | (2094,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 54 | nncf_module.bert.encoder.layer.9.attention.self.query    |         18 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0  |
+| 55 | nncf_module.bert.encoder.layer.9.attention.output.dense  |         18 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0   |
+| 56 | nncf_module.bert.encoder.layer.9.attention.self.value    |         18 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0  |
+| 57 | nncf_module.bert.encoder.layer.9.attention.self.key      |         18 | (768, 768)     | (768, 768)      | (768,)       | (768,)          | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0    |
+| 58 | nncf_module.bert.encoder.layer.9.intermediate.dense      |         19 | (3072, 768)    | (1009, 768)     | (3072,)      | (1009,)         | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                    |
+| 59 | nncf_module.bert.encoder.layer.9.output.dense            |         19 | (768, 3072)    | (768, 1009)     | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0                                |
+| 60 | nncf_module.bert.encoder.layer.10.attention.self.key     |         20 | (768, 768)     | (320, 768)      | (768,)       | (320,)          | [0, 3, 9, 10, 11]                      | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0   |
+| 61 | nncf_module.bert.encoder.layer.10.attention.self.value   |         20 | (768, 768)     | (320, 768)      | (768,)       | (320,)          | [0, 3, 9, 10, 11]                      | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
+| 62 | nncf_module.bert.encoder.layer.10.attention.self.query   |         20 | (768, 768)     | (320, 768)      | (768,)       | (320,)          | [0, 3, 9, 10, 11]                      | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
+| 63 | nncf_module.bert.encoder.layer.10.attention.output.dense |         20 | (768, 768)     | (768, 320)      | (768,)       | (768,)          | [0, 3, 9, 10, 11]                      | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0  |
+| 64 | nncf_module.bert.encoder.layer.10.output.dense           |         21 | (768, 3072)    | (768, 743)      | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0                               |
+| 65 | nncf_module.bert.encoder.layer.10.intermediate.dense     |         21 | (3072, 768)    | (743, 768)      | (3072,)      | (743,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                   |
+| 66 | nncf_module.bert.encoder.layer.11.attention.output.dense |         22 | (768, 768)     | (768, 192)      | (768,)       | (768,)          | [1, 2, 9]                              | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0  |
+| 67 | nncf_module.bert.encoder.layer.11.attention.self.key     |         22 | (768, 768)     | (192, 768)      | (768,)       | (192,)          | [1, 2, 9]                              | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0   |
+| 68 | nncf_module.bert.encoder.layer.11.attention.self.value   |         22 | (768, 768)     | (192, 768)      | (768,)       | (192,)          | [1, 2, 9]                              | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0 |
+| 69 | nncf_module.bert.encoder.layer.11.attention.self.query   |         22 | (768, 768)     | (192, 768)      | (768,)       | (192,)          | [1, 2, 9]                              | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0 |
+| 70 | nncf_module.bert.encoder.layer.11.intermediate.dense     |         23 | (3072, 768)    | (605, 768)      | (3072,)      | (605,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0                   |
+| 71 | nncf_module.bert.encoder.layer.11.output.dense           |         23 | (768, 3072)    | (768, 605)      | (768,)       | (768,)          | skip reporting                         | BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0                               |

checkpoint-35000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:963740bdff2cf72416e92704c8f12f612711498e80bcb86943396aa8686d3525
+size 18704532

checkpoint-35000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b71055db74256e103f6b235fc05792460b6f443079012f8566a63d151abf413
+size 3183

checkpoint-35000/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff