Chua, Vui Seng commited on
Commit
5b8a717
1 Parent(s): b67eda7

Update readme, add collaterals and model analysis report

Browse files
.gitattributes CHANGED
@@ -25,3 +25,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
29
+ trainer_state.json filter=lfs diff=lfs merge=lfs -text
30
+ eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
31
+ checkpoint-26250/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
32
+ checkpoint-26250/trainer_state.json filter=lfs diff=lfs merge=lfs -text
33
+ checkpoint-26250/optimizer.pt filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This model is a downstream optimization of [```vuiseng9/bert-base-squadv1-block-pruning-hybrid-filled-lt```](https://huggingface.co/vuiseng9/bert-base-squadv1-block-pruning-hybrid-filled-lt) using [OpenVINO/NNCF](https://github.com/openvinotoolkit/nncf). Applied optimization includes:
2
+ 1. magnitude sparsification during initialization
3
+ 2. NNCF Quantize-Aware Training
4
+ 3. Custom distillation with large model ```bert-large-uncased-whole-word-masking-finetuned-squad```
5
+
6
+ ```
7
+ eval_exact_match = 80.3217
8
+ eval_f1 = 87.635
9
+ eval_samples = 10784
10
+ ```
11
+
12
+ # Setup
13
+ ```bash
14
+ # OpenVINO/NNCF
15
+ git clone https://github.com/vuiseng9/nncf && cd nncf
16
+ git checkout tld-poc
17
+ git reset --hard 1dec7afe7a4b567c059fcf287ea2c234980fded2
18
+ python setup.py develop
19
+ pip install -r examples/torch/requirements.txt
20
+
21
+ # Huggingface nn_pruning
22
+ git clone https://github.com/vuiseng9/nn_pruning && cd nn_pruning
23
+ git checkout reproduce-evaluation
24
+ git reset --hard 2d4e196d694c465e43e5fbce6c3836d0a60e1446
25
+ pip install -e ".[dev]"
26
+
27
+ # Huggingface Transformers
28
+ git clone https://github.com/vuiseng9/transformers && cd transformers
29
+ git checkout tld-poc
30
+ git reset --hard 10a1e29d84484e48fd106f58957d9ffc89dc43c5
31
+ pip install -e .
32
+ head -n 1 examples/pytorch/question-answering/requirements.txt | xargs -i pip install {}
33
+
34
+ # Additional dependencies
35
+ pip install onnx
36
+ ```
37
+
38
+ # Train
39
+
40
+ ```bash
41
+ git clone https://huggingface.co/vuiseng9/bert-base-squadv1-block-pruning-hybrid-filled-lt
42
+ BASE_MODEL=/path/to/cloned_repo_above #to-revise
43
+
44
+ wget https://huggingface.co/vuiseng9/bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt/nncf_bert_squad_sparsity.json
45
+ NNCF_CFG=/path/to/downloaded_nncf_cfg_above #to-revise
46
+
47
+ export CUDA_VISIBLE_DEVICES=0
48
+ NEPOCH=5
49
+
50
+ RUNID=bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt
51
+ OUTROOT=/path/to/train_output_root #to-revise
52
+ WORKDIR=transformers/examples/pytorch/question-answering #to-revise
53
+
54
+ OUTDIR=$OUTROOT/$RUNID
55
+ mkdir -p $OUTDIR
56
+ cd $WORKDIR
57
+
58
+ python run_qa.py \
59
+ --model_name_or_path vuiseng9/bert-base-squadv1-block-pruning-hybrid \
60
+ --optimize_model_before_eval \
61
+ --optimized_checkpoint $BASE_MODEL \
62
+ --dataset_name squad \
63
+ --do_eval \
64
+ --do_train \
65
+ --evaluation_strategy steps \
66
+ --eval_steps 250 \
67
+ --learning_rate 3e-5 \
68
+ --lr_scheduler_type cosine_with_restarts \
69
+ --warmup_ratio 0.25 \
70
+ --cosine_cycles 1 \
71
+ --teacher bert-large-uncased-whole-word-masking-finetuned-squad \
72
+ --teacher_ratio 0.9 \
73
+ --num_train_epochs $NEPOCH \
74
+ --per_device_eval_batch_size 128 \
75
+ --per_device_train_batch_size 16 \
76
+ --max_seq_length 384 \
77
+ --doc_stride 128 \
78
+ --save_steps 250 \
79
+ --nncf_config $NNCF_CFG \
80
+ --logging_steps 1 \
81
+ --overwrite_output_dir \
82
+ --run_name $RUNID \
83
+ --output_dir $OUTDIR
84
+ ```
85
+
86
+ # Eval
87
+
88
+ This repo must be cloned locally.
89
+ ```bash
90
+ git clone https://huggingface.co/vuiseng9/bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt
91
+ MODELROOT=/path/to/cloned_repo_above #to-revise
92
+
93
+ export CUDA_VISIBLE_DEVICES=0
94
+
95
+ OUTDIR=eval-bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt
96
+ WORKDIR=transformers/examples/pytorch/question-answering #to-revise
97
+ cd $WORKDIR
98
+ mkdir $OUTDIR
99
+
100
+ nohup python run_qa.py \
101
+ --model_name_or_path vuiseng9/bert-base-squadv1-block-pruning-hybrid \
102
+ --dataset_name squad \
103
+ --optimize_model_before_eval \
104
+ --qat_checkpoint $MODELROOT/checkpoint-26250 \
105
+ --nncf_config $MODELROOT/nncf_bert_squad_sparsity.json \
106
+ --to_onnx $OUTDIR/bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt.onnx \
107
+ --do_eval \
108
+ --per_device_eval_batch_size 16 \
109
+ --max_seq_length 384 \
110
+ --doc_stride 128 \
111
+ --overwrite_output_dir \
112
+ --output_dir $OUTDIR 2>&1 | tee $OUTDIR/run.log &
113
+ ```
XP_layer_wise_sparsity_global_rate_22.89.csv ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity
2
+ 0,nncf_module.bert.embeddings.word_embeddings,NNCFEmbedding,weight,"[30522, 768]",23440896,23440896,0.0
3
+ 1,nncf_module.bert.embeddings.position_embeddings,NNCFEmbedding,weight,"[512, 768]",393216,393216,0.0
4
+ 2,nncf_module.bert.embeddings.token_type_embeddings,NNCFEmbedding,weight,"[2, 768]",1536,1536,0.0
5
+ 3,nncf_module.bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0
6
+ 4,nncf_module.bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0
7
+ 5,nncf_module.bert.encoder.layer.0.attention.self.query,NNCFLinear,weight,"[320, 768]",245760,106337,0.5673136115074158
8
+ 6,nncf_module.bert.encoder.layer.0.attention.self.query,NNCFLinear,bias,[320],320,320,0.0
9
+ 7,nncf_module.bert.encoder.layer.0.attention.self.key,NNCFLinear,weight,"[320, 768]",245760,112375,0.5427449345588684
10
+ 8,nncf_module.bert.encoder.layer.0.attention.self.key,NNCFLinear,bias,[320],320,320,0.0
11
+ 9,nncf_module.bert.encoder.layer.0.attention.self.value,NNCFLinear,weight,"[320, 768]",245760,131965,0.4630330204963684
12
+ 10,nncf_module.bert.encoder.layer.0.attention.self.value,NNCFLinear,bias,[320],320,320,0.0
13
+ 11,nncf_module.bert.encoder.layer.0.attention.output.dense,NNCFLinear,weight,"[768, 320]",245760,137497,0.44052326679229736
14
+ 12,nncf_module.bert.encoder.layer.0.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
15
+ 13,nncf_module.bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
16
+ 14,nncf_module.bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
17
+ 15,nncf_module.bert.encoder.layer.0.intermediate.dense,NNCFLinear,weight,"[185, 768]",142080,110056,0.2253941297531128
18
+ 16,nncf_module.bert.encoder.layer.0.intermediate.dense,NNCFLinear,bias,[185],185,185,0.0
19
+ 17,nncf_module.bert.encoder.layer.0.output.dense,NNCFLinear,weight,"[768, 185]",142080,108631,0.23542368412017822
20
+ 18,nncf_module.bert.encoder.layer.0.output.dense,NNCFLinear,bias,[768],768,768,0.0
21
+ 19,nncf_module.bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
22
+ 20,nncf_module.bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
23
+ 21,nncf_module.bert.encoder.layer.1.attention.self.query,NNCFLinear,weight,"[320, 768]",245760,136222,0.44571125507354736
24
+ 22,nncf_module.bert.encoder.layer.1.attention.self.query,NNCFLinear,bias,[320],320,320,0.0
25
+ 23,nncf_module.bert.encoder.layer.1.attention.self.key,NNCFLinear,weight,"[320, 768]",245760,136267,0.44552814960479736
26
+ 24,nncf_module.bert.encoder.layer.1.attention.self.key,NNCFLinear,bias,[320],320,320,0.0
27
+ 25,nncf_module.bert.encoder.layer.1.attention.self.value,NNCFLinear,weight,"[320, 768]",245760,124892,0.49181312322616577
28
+ 26,nncf_module.bert.encoder.layer.1.attention.self.value,NNCFLinear,bias,[320],320,320,0.0
29
+ 27,nncf_module.bert.encoder.layer.1.attention.output.dense,NNCFLinear,weight,"[768, 320]",245760,130185,0.47027587890625
30
+ 28,nncf_module.bert.encoder.layer.1.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
31
+ 29,nncf_module.bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
32
+ 30,nncf_module.bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
33
+ 31,nncf_module.bert.encoder.layer.1.intermediate.dense,NNCFLinear,weight,"[315, 768]",241920,176203,0.2716476321220398
34
+ 32,nncf_module.bert.encoder.layer.1.intermediate.dense,NNCFLinear,bias,[315],315,315,0.0
35
+ 33,nncf_module.bert.encoder.layer.1.output.dense,NNCFLinear,weight,"[768, 315]",241920,172407,0.2873387932777405
36
+ 34,nncf_module.bert.encoder.layer.1.output.dense,NNCFLinear,bias,[768],768,768,0.0
37
+ 35,nncf_module.bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
38
+ 36,nncf_module.bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
39
+ 37,nncf_module.bert.encoder.layer.2.attention.self.query,NNCFLinear,weight,"[576, 768]",442368,198811,0.5505754947662354
40
+ 38,nncf_module.bert.encoder.layer.2.attention.self.query,NNCFLinear,bias,[576],576,576,0.0
41
+ 39,nncf_module.bert.encoder.layer.2.attention.self.key,NNCFLinear,weight,"[576, 768]",442368,201526,0.5444381237030029
42
+ 40,nncf_module.bert.encoder.layer.2.attention.self.key,NNCFLinear,bias,[576],576,576,0.0
43
+ 41,nncf_module.bert.encoder.layer.2.attention.self.value,NNCFLinear,weight,"[576, 768]",442368,163541,0.6303055286407471
44
+ 42,nncf_module.bert.encoder.layer.2.attention.self.value,NNCFLinear,bias,[576],576,576,0.0
45
+ 43,nncf_module.bert.encoder.layer.2.attention.output.dense,NNCFLinear,weight,"[768, 576]",442368,167245,0.6219323873519897
46
+ 44,nncf_module.bert.encoder.layer.2.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
47
+ 45,nncf_module.bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
48
+ 46,nncf_module.bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
49
+ 47,nncf_module.bert.encoder.layer.2.intermediate.dense,NNCFLinear,weight,"[339, 768]",260352,185491,0.2875376343727112
50
+ 48,nncf_module.bert.encoder.layer.2.intermediate.dense,NNCFLinear,bias,[339],339,339,0.0
51
+ 49,nncf_module.bert.encoder.layer.2.output.dense,NNCFLinear,weight,"[768, 339]",260352,183083,0.2967866063117981
52
+ 50,nncf_module.bert.encoder.layer.2.output.dense,NNCFLinear,bias,[768],768,768,0.0
53
+ 51,nncf_module.bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
54
+ 52,nncf_module.bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
55
+ 53,nncf_module.bert.encoder.layer.3.attention.self.query,NNCFLinear,weight,"[576, 768]",442368,205898,0.5345549583435059
56
+ 54,nncf_module.bert.encoder.layer.3.attention.self.query,NNCFLinear,bias,[576],576,576,0.0
57
+ 55,nncf_module.bert.encoder.layer.3.attention.self.key,NNCFLinear,weight,"[576, 768]",442368,217621,0.5080543756484985
58
+ 56,nncf_module.bert.encoder.layer.3.attention.self.key,NNCFLinear,bias,[576],576,576,0.0
59
+ 57,nncf_module.bert.encoder.layer.3.attention.self.value,NNCFLinear,weight,"[576, 768]",442368,209726,0.5259014964103699
60
+ 58,nncf_module.bert.encoder.layer.3.attention.self.value,NNCFLinear,bias,[576],576,576,0.0
61
+ 59,nncf_module.bert.encoder.layer.3.attention.output.dense,NNCFLinear,weight,"[768, 576]",442368,208038,0.5297173261642456
62
+ 60,nncf_module.bert.encoder.layer.3.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
63
+ 61,nncf_module.bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
64
+ 62,nncf_module.bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
65
+ 63,nncf_module.bert.encoder.layer.3.intermediate.dense,NNCFLinear,weight,"[368, 768]",282624,198154,0.29887765645980835
66
+ 64,nncf_module.bert.encoder.layer.3.intermediate.dense,NNCFLinear,bias,[368],368,368,0.0
67
+ 65,nncf_module.bert.encoder.layer.3.output.dense,NNCFLinear,weight,"[768, 368]",282624,194127,0.31312626600265503
68
+ 66,nncf_module.bert.encoder.layer.3.output.dense,NNCFLinear,bias,[768],768,768,0.0
69
+ 67,nncf_module.bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
70
+ 68,nncf_module.bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
71
+ 69,nncf_module.bert.encoder.layer.4.attention.self.query,NNCFLinear,weight,"[576, 768]",442368,212567,0.5194792747497559
72
+ 70,nncf_module.bert.encoder.layer.4.attention.self.query,NNCFLinear,bias,[576],576,576,0.0
73
+ 71,nncf_module.bert.encoder.layer.4.attention.self.key,NNCFLinear,weight,"[576, 768]",442368,214788,0.5144585371017456
74
+ 72,nncf_module.bert.encoder.layer.4.attention.self.key,NNCFLinear,bias,[576],576,576,0.0
75
+ 73,nncf_module.bert.encoder.layer.4.attention.self.value,NNCFLinear,weight,"[576, 768]",442368,197159,0.5543099641799927
76
+ 74,nncf_module.bert.encoder.layer.4.attention.self.value,NNCFLinear,bias,[576],576,576,0.0
77
+ 75,nncf_module.bert.encoder.layer.4.attention.output.dense,NNCFLinear,weight,"[768, 576]",442368,192495,0.5648532509803772
78
+ 76,nncf_module.bert.encoder.layer.4.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
79
+ 77,nncf_module.bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
80
+ 78,nncf_module.bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
81
+ 79,nncf_module.bert.encoder.layer.4.intermediate.dense,NNCFLinear,weight,"[386, 768]",296448,205830,0.3056792616844177
82
+ 80,nncf_module.bert.encoder.layer.4.intermediate.dense,NNCFLinear,bias,[386],386,386,0.0
83
+ 81,nncf_module.bert.encoder.layer.4.output.dense,NNCFLinear,weight,"[768, 386]",296448,199567,0.32680606842041016
84
+ 82,nncf_module.bert.encoder.layer.4.output.dense,NNCFLinear,bias,[768],768,768,0.0
85
+ 83,nncf_module.bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
86
+ 84,nncf_module.bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
87
+ 85,nncf_module.bert.encoder.layer.5.attention.self.query,NNCFLinear,weight,"[384, 768]",294912,131257,0.5549282431602478
88
+ 86,nncf_module.bert.encoder.layer.5.attention.self.query,NNCFLinear,bias,[384],384,384,0.0
89
+ 87,nncf_module.bert.encoder.layer.5.attention.self.key,NNCFLinear,weight,"[384, 768]",294912,154458,0.47625732421875
90
+ 88,nncf_module.bert.encoder.layer.5.attention.self.key,NNCFLinear,bias,[384],384,384,0.0
91
+ 89,nncf_module.bert.encoder.layer.5.attention.self.value,NNCFLinear,weight,"[384, 768]",294912,159646,0.4586656093597412
92
+ 90,nncf_module.bert.encoder.layer.5.attention.self.value,NNCFLinear,bias,[384],384,384,0.0
93
+ 91,nncf_module.bert.encoder.layer.5.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,156889,0.4680141806602478
94
+ 92,nncf_module.bert.encoder.layer.5.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
95
+ 93,nncf_module.bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
96
+ 94,nncf_module.bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
97
+ 95,nncf_module.bert.encoder.layer.5.intermediate.dense,NNCFLinear,weight,"[336, 768]",258048,184774,0.2839548587799072
98
+ 96,nncf_module.bert.encoder.layer.5.intermediate.dense,NNCFLinear,bias,[336],336,336,0.0
99
+ 97,nncf_module.bert.encoder.layer.5.output.dense,NNCFLinear,weight,"[768, 336]",258048,178574,0.3079814314842224
100
+ 98,nncf_module.bert.encoder.layer.5.output.dense,NNCFLinear,bias,[768],768,768,0.0
101
+ 99,nncf_module.bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
102
+ 100,nncf_module.bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
103
+ 101,nncf_module.bert.encoder.layer.6.attention.self.query,NNCFLinear,weight,"[448, 768]",344064,153126,0.5549490451812744
104
+ 102,nncf_module.bert.encoder.layer.6.attention.self.query,NNCFLinear,bias,[448],448,448,0.0
105
+ 103,nncf_module.bert.encoder.layer.6.attention.self.key,NNCFLinear,weight,"[448, 768]",344064,169706,0.5067603588104248
106
+ 104,nncf_module.bert.encoder.layer.6.attention.self.key,NNCFLinear,bias,[448],448,448,0.0
107
+ 105,nncf_module.bert.encoder.layer.6.attention.self.value,NNCFLinear,weight,"[448, 768]",344064,154213,0.5517897605895996
108
+ 106,nncf_module.bert.encoder.layer.6.attention.self.value,NNCFLinear,bias,[448],448,448,0.0
109
+ 107,nncf_module.bert.encoder.layer.6.attention.output.dense,NNCFLinear,weight,"[768, 448]",344064,148815,0.5674787163734436
110
+ 108,nncf_module.bert.encoder.layer.6.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
111
+ 109,nncf_module.bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
112
+ 110,nncf_module.bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
113
+ 111,nncf_module.bert.encoder.layer.6.intermediate.dense,NNCFLinear,weight,"[280, 768]",215040,158800,0.261532723903656
114
+ 112,nncf_module.bert.encoder.layer.6.intermediate.dense,NNCFLinear,bias,[280],280,280,0.0
115
+ 113,nncf_module.bert.encoder.layer.6.output.dense,NNCFLinear,weight,"[768, 280]",215040,156397,0.27270740270614624
116
+ 114,nncf_module.bert.encoder.layer.6.output.dense,NNCFLinear,bias,[768],768,768,0.0
117
+ 115,nncf_module.bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
118
+ 116,nncf_module.bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
119
+ 117,nncf_module.bert.encoder.layer.7.attention.self.query,NNCFLinear,weight,"[448, 768]",344064,153876,0.5527692437171936
120
+ 118,nncf_module.bert.encoder.layer.7.attention.self.query,NNCFLinear,bias,[448],448,448,0.0
121
+ 119,nncf_module.bert.encoder.layer.7.attention.self.key,NNCFLinear,weight,"[448, 768]",344064,180384,0.4757254123687744
122
+ 120,nncf_module.bert.encoder.layer.7.attention.self.key,NNCFLinear,bias,[448],448,448,0.0
123
+ 121,nncf_module.bert.encoder.layer.7.attention.self.value,NNCFLinear,weight,"[448, 768]",344064,166552,0.515927255153656
124
+ 122,nncf_module.bert.encoder.layer.7.attention.self.value,NNCFLinear,bias,[448],448,448,0.0
125
+ 123,nncf_module.bert.encoder.layer.7.attention.output.dense,NNCFLinear,weight,"[768, 448]",344064,160346,0.5339646339416504
126
+ 124,nncf_module.bert.encoder.layer.7.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
127
+ 125,nncf_module.bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
128
+ 126,nncf_module.bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
129
+ 127,nncf_module.bert.encoder.layer.7.intermediate.dense,NNCFLinear,weight,"[211, 768]",162048,125197,0.22740793228149414
130
+ 128,nncf_module.bert.encoder.layer.7.intermediate.dense,NNCFLinear,bias,[211],211,211,0.0
131
+ 129,nncf_module.bert.encoder.layer.7.output.dense,NNCFLinear,weight,"[768, 211]",162048,123451,0.23818248510360718
132
+ 130,nncf_module.bert.encoder.layer.7.output.dense,NNCFLinear,bias,[768],768,768,0.0
133
+ 131,nncf_module.bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
134
+ 132,nncf_module.bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
135
+ 133,nncf_module.bert.encoder.layer.8.attention.self.query,NNCFLinear,weight,"[448, 768]",344064,149419,0.5657232403755188
136
+ 134,nncf_module.bert.encoder.layer.8.attention.self.query,NNCFLinear,bias,[448],448,448,0.0
137
+ 135,nncf_module.bert.encoder.layer.8.attention.self.key,NNCFLinear,weight,"[448, 768]",344064,150699,0.5620030164718628
138
+ 136,nncf_module.bert.encoder.layer.8.attention.self.key,NNCFLinear,bias,[448],448,448,0.0
139
+ 137,nncf_module.bert.encoder.layer.8.attention.self.value,NNCFLinear,weight,"[448, 768]",344064,124770,0.6373639702796936
140
+ 138,nncf_module.bert.encoder.layer.8.attention.self.value,NNCFLinear,bias,[448],448,448,0.0
141
+ 139,nncf_module.bert.encoder.layer.8.attention.output.dense,NNCFLinear,weight,"[768, 448]",344064,120648,0.6493443250656128
142
+ 140,nncf_module.bert.encoder.layer.8.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
143
+ 141,nncf_module.bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
144
+ 142,nncf_module.bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
145
+ 143,nncf_module.bert.encoder.layer.8.intermediate.dense,NNCFLinear,weight,"[108, 768]",82944,68989,0.16824603080749512
146
+ 144,nncf_module.bert.encoder.layer.8.intermediate.dense,NNCFLinear,bias,[108],108,108,0.0
147
+ 145,nncf_module.bert.encoder.layer.8.output.dense,NNCFLinear,weight,"[768, 108]",82944,68556,0.17346644401550293
148
+ 146,nncf_module.bert.encoder.layer.8.output.dense,NNCFLinear,bias,[768],768,768,0.0
149
+ 147,nncf_module.bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
150
+ 148,nncf_module.bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
151
+ 149,nncf_module.bert.encoder.layer.9.attention.self.query,NNCFLinear,weight,"[320, 768]",245760,121225,0.5067341923713684
152
+ 150,nncf_module.bert.encoder.layer.9.attention.self.query,NNCFLinear,bias,[320],320,320,0.0
153
+ 151,nncf_module.bert.encoder.layer.9.attention.self.key,NNCFLinear,weight,"[320, 768]",245760,114789,0.5329223275184631
154
+ 152,nncf_module.bert.encoder.layer.9.attention.self.key,NNCFLinear,bias,[320],320,320,0.0
155
+ 153,nncf_module.bert.encoder.layer.9.attention.self.value,NNCFLinear,weight,"[320, 768]",245760,69260,0.7181802988052368
156
+ 154,nncf_module.bert.encoder.layer.9.attention.self.value,NNCFLinear,bias,[320],320,320,0.0
157
+ 155,nncf_module.bert.encoder.layer.9.attention.output.dense,NNCFLinear,weight,"[768, 320]",245760,73575,0.70062255859375
158
+ 156,nncf_module.bert.encoder.layer.9.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
159
+ 157,nncf_module.bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
160
+ 158,nncf_module.bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
161
+ 159,nncf_module.bert.encoder.layer.9.intermediate.dense,NNCFLinear,weight,"[53, 768]",40704,35399,0.13033121824264526
162
+ 160,nncf_module.bert.encoder.layer.9.intermediate.dense,NNCFLinear,bias,[53],53,53,0.0
163
+ 161,nncf_module.bert.encoder.layer.9.output.dense,NNCFLinear,weight,"[768, 53]",40704,34918,0.1421481966972351
164
+ 162,nncf_module.bert.encoder.layer.9.output.dense,NNCFLinear,bias,[768],768,768,0.0
165
+ 163,nncf_module.bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
166
+ 164,nncf_module.bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
167
+ 165,nncf_module.bert.encoder.layer.10.attention.self.query,NNCFLinear,weight,"[384, 768]",294912,128113,0.5655890703201294
168
+ 166,nncf_module.bert.encoder.layer.10.attention.self.query,NNCFLinear,bias,[384],384,384,0.0
169
+ 167,nncf_module.bert.encoder.layer.10.attention.self.key,NNCFLinear,weight,"[384, 768]",294912,124553,0.5776604413986206
170
+ 168,nncf_module.bert.encoder.layer.10.attention.self.key,NNCFLinear,bias,[384],384,384,0.0
171
+ 169,nncf_module.bert.encoder.layer.10.attention.self.value,NNCFLinear,weight,"[384, 768]",294912,79608,0.7300618886947632
172
+ 170,nncf_module.bert.encoder.layer.10.attention.self.value,NNCFLinear,bias,[384],384,384,0.0
173
+ 171,nncf_module.bert.encoder.layer.10.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,85158,0.71124267578125
174
+ 172,nncf_module.bert.encoder.layer.10.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
175
+ 173,nncf_module.bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
176
+ 174,nncf_module.bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
177
+ 175,nncf_module.bert.encoder.layer.10.intermediate.dense,NNCFLinear,weight,"[86, 768]",66048,55025,0.16689378023147583
178
+ 176,nncf_module.bert.encoder.layer.10.intermediate.dense,NNCFLinear,bias,[86],86,86,0.0
179
+ 177,nncf_module.bert.encoder.layer.10.output.dense,NNCFLinear,weight,"[768, 86]",66048,54584,0.17357075214385986
180
+ 178,nncf_module.bert.encoder.layer.10.output.dense,NNCFLinear,bias,[768],768,768,0.0
181
+ 179,nncf_module.bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
182
+ 180,nncf_module.bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
183
+ 181,nncf_module.bert.encoder.layer.11.attention.self.query,NNCFLinear,weight,"[384, 768]",294912,104737,0.6448533535003662
184
+ 182,nncf_module.bert.encoder.layer.11.attention.self.query,NNCFLinear,bias,[384],384,384,0.0
185
+ 183,nncf_module.bert.encoder.layer.11.attention.self.key,NNCFLinear,weight,"[384, 768]",294912,102558,0.6522420644760132
186
+ 184,nncf_module.bert.encoder.layer.11.attention.self.key,NNCFLinear,bias,[384],384,384,0.0
187
+ 185,nncf_module.bert.encoder.layer.11.attention.self.value,NNCFLinear,weight,"[384, 768]",294912,64855,0.7800869345664978
188
+ 186,nncf_module.bert.encoder.layer.11.attention.self.value,NNCFLinear,bias,[384],384,384,0.0
189
+ 187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
190
+ 188,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
191
+ 189,nncf_module.bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
192
+ 190,nncf_module.bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
193
+ 191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
194
+ 192,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,bias,[105],105,105,0.0
195
+ 193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
196
+ 194,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,bias,[768],768,768,0.0
197
+ 195,nncf_module.bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
198
+ 196,nncf_module.bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
199
+ 197,nncf_module.qa_outputs,NNCFLinear,weight,"[2, 768]",1536,1536,0.0
200
+ 198,nncf_module.qa_outputs,NNCFLinear,bias,[2],2,2,0.0
XP_layer_wise_sparsity_global_rate_22.89.md ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | | layer_id | layer_type | param_type | shape | nparam | nnz | sparsity |
2
+ |----:|:-------------------------------------------------------------|:--------------|:-------------|:-------------|---------:|---------:|-----------:|
3
+ | 0 | nncf_module.bert.embeddings.word_embeddings | NNCFEmbedding | weight | [30522, 768] | 23440896 | 23440896 | 0 |
4
+ | 1 | nncf_module.bert.embeddings.position_embeddings | NNCFEmbedding | weight | [512, 768] | 393216 | 393216 | 0 |
5
+ | 2 | nncf_module.bert.embeddings.token_type_embeddings | NNCFEmbedding | weight | [2, 768] | 1536 | 1536 | 0 |
6
+ | 3 | nncf_module.bert.embeddings.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
7
+ | 4 | nncf_module.bert.embeddings.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
8
+ | 5 | nncf_module.bert.encoder.layer.0.attention.self.query | NNCFLinear | weight | [320, 768] | 245760 | 106337 | 0.567314 |
9
+ | 6 | nncf_module.bert.encoder.layer.0.attention.self.query | NNCFLinear | bias | [320] | 320 | 320 | 0 |
10
+ | 7 | nncf_module.bert.encoder.layer.0.attention.self.key | NNCFLinear | weight | [320, 768] | 245760 | 112375 | 0.542745 |
11
+ | 8 | nncf_module.bert.encoder.layer.0.attention.self.key | NNCFLinear | bias | [320] | 320 | 320 | 0 |
12
+ | 9 | nncf_module.bert.encoder.layer.0.attention.self.value | NNCFLinear | weight | [320, 768] | 245760 | 131965 | 0.463033 |
13
+ | 10 | nncf_module.bert.encoder.layer.0.attention.self.value | NNCFLinear | bias | [320] | 320 | 320 | 0 |
14
+ | 11 | nncf_module.bert.encoder.layer.0.attention.output.dense | NNCFLinear | weight | [768, 320] | 245760 | 137497 | 0.440523 |
15
+ | 12 | nncf_module.bert.encoder.layer.0.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
16
+ | 13 | nncf_module.bert.encoder.layer.0.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
17
+ | 14 | nncf_module.bert.encoder.layer.0.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
18
+ | 15 | nncf_module.bert.encoder.layer.0.intermediate.dense | NNCFLinear | weight | [185, 768] | 142080 | 110056 | 0.225394 |
19
+ | 16 | nncf_module.bert.encoder.layer.0.intermediate.dense | NNCFLinear | bias | [185] | 185 | 185 | 0 |
20
+ | 17 | nncf_module.bert.encoder.layer.0.output.dense | NNCFLinear | weight | [768, 185] | 142080 | 108631 | 0.235424 |
21
+ | 18 | nncf_module.bert.encoder.layer.0.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
22
+ | 19 | nncf_module.bert.encoder.layer.0.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
23
+ | 20 | nncf_module.bert.encoder.layer.0.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
24
+ | 21 | nncf_module.bert.encoder.layer.1.attention.self.query | NNCFLinear | weight | [320, 768] | 245760 | 136222 | 0.445711 |
25
+ | 22 | nncf_module.bert.encoder.layer.1.attention.self.query | NNCFLinear | bias | [320] | 320 | 320 | 0 |
26
+ | 23 | nncf_module.bert.encoder.layer.1.attention.self.key | NNCFLinear | weight | [320, 768] | 245760 | 136267 | 0.445528 |
27
+ | 24 | nncf_module.bert.encoder.layer.1.attention.self.key | NNCFLinear | bias | [320] | 320 | 320 | 0 |
28
+ | 25 | nncf_module.bert.encoder.layer.1.attention.self.value | NNCFLinear | weight | [320, 768] | 245760 | 124892 | 0.491813 |
29
+ | 26 | nncf_module.bert.encoder.layer.1.attention.self.value | NNCFLinear | bias | [320] | 320 | 320 | 0 |
30
+ | 27 | nncf_module.bert.encoder.layer.1.attention.output.dense | NNCFLinear | weight | [768, 320] | 245760 | 130185 | 0.470276 |
31
+ | 28 | nncf_module.bert.encoder.layer.1.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
32
+ | 29 | nncf_module.bert.encoder.layer.1.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
33
+ | 30 | nncf_module.bert.encoder.layer.1.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
34
+ | 31 | nncf_module.bert.encoder.layer.1.intermediate.dense | NNCFLinear | weight | [315, 768] | 241920 | 176203 | 0.271648 |
35
+ | 32 | nncf_module.bert.encoder.layer.1.intermediate.dense | NNCFLinear | bias | [315] | 315 | 315 | 0 |
36
+ | 33 | nncf_module.bert.encoder.layer.1.output.dense | NNCFLinear | weight | [768, 315] | 241920 | 172407 | 0.287339 |
37
+ | 34 | nncf_module.bert.encoder.layer.1.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
38
+ | 35 | nncf_module.bert.encoder.layer.1.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
39
+ | 36 | nncf_module.bert.encoder.layer.1.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
40
+ | 37 | nncf_module.bert.encoder.layer.2.attention.self.query | NNCFLinear | weight | [576, 768] | 442368 | 198811 | 0.550575 |
41
+ | 38 | nncf_module.bert.encoder.layer.2.attention.self.query | NNCFLinear | bias | [576] | 576 | 576 | 0 |
42
+ | 39 | nncf_module.bert.encoder.layer.2.attention.self.key | NNCFLinear | weight | [576, 768] | 442368 | 201526 | 0.544438 |
43
+ | 40 | nncf_module.bert.encoder.layer.2.attention.self.key | NNCFLinear | bias | [576] | 576 | 576 | 0 |
44
+ | 41 | nncf_module.bert.encoder.layer.2.attention.self.value | NNCFLinear | weight | [576, 768] | 442368 | 163541 | 0.630306 |
45
+ | 42 | nncf_module.bert.encoder.layer.2.attention.self.value | NNCFLinear | bias | [576] | 576 | 576 | 0 |
46
+ | 43 | nncf_module.bert.encoder.layer.2.attention.output.dense | NNCFLinear | weight | [768, 576] | 442368 | 167245 | 0.621932 |
47
+ | 44 | nncf_module.bert.encoder.layer.2.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
48
+ | 45 | nncf_module.bert.encoder.layer.2.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
49
+ | 46 | nncf_module.bert.encoder.layer.2.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
50
+ | 47 | nncf_module.bert.encoder.layer.2.intermediate.dense | NNCFLinear | weight | [339, 768] | 260352 | 185491 | 0.287538 |
51
+ | 48 | nncf_module.bert.encoder.layer.2.intermediate.dense | NNCFLinear | bias | [339] | 339 | 339 | 0 |
52
+ | 49 | nncf_module.bert.encoder.layer.2.output.dense | NNCFLinear | weight | [768, 339] | 260352 | 183083 | 0.296787 |
53
+ | 50 | nncf_module.bert.encoder.layer.2.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
54
+ | 51 | nncf_module.bert.encoder.layer.2.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
55
+ | 52 | nncf_module.bert.encoder.layer.2.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
56
+ | 53 | nncf_module.bert.encoder.layer.3.attention.self.query | NNCFLinear | weight | [576, 768] | 442368 | 205898 | 0.534555 |
57
+ | 54 | nncf_module.bert.encoder.layer.3.attention.self.query | NNCFLinear | bias | [576] | 576 | 576 | 0 |
58
+ | 55 | nncf_module.bert.encoder.layer.3.attention.self.key | NNCFLinear | weight | [576, 768] | 442368 | 217621 | 0.508054 |
59
+ | 56 | nncf_module.bert.encoder.layer.3.attention.self.key | NNCFLinear | bias | [576] | 576 | 576 | 0 |
60
+ | 57 | nncf_module.bert.encoder.layer.3.attention.self.value | NNCFLinear | weight | [576, 768] | 442368 | 209726 | 0.525901 |
61
+ | 58 | nncf_module.bert.encoder.layer.3.attention.self.value | NNCFLinear | bias | [576] | 576 | 576 | 0 |
62
+ | 59 | nncf_module.bert.encoder.layer.3.attention.output.dense | NNCFLinear | weight | [768, 576] | 442368 | 208038 | 0.529717 |
63
+ | 60 | nncf_module.bert.encoder.layer.3.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
64
+ | 61 | nncf_module.bert.encoder.layer.3.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
65
+ | 62 | nncf_module.bert.encoder.layer.3.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
66
+ | 63 | nncf_module.bert.encoder.layer.3.intermediate.dense | NNCFLinear | weight | [368, 768] | 282624 | 198154 | 0.298878 |
67
+ | 64 | nncf_module.bert.encoder.layer.3.intermediate.dense | NNCFLinear | bias | [368] | 368 | 368 | 0 |
68
+ | 65 | nncf_module.bert.encoder.layer.3.output.dense | NNCFLinear | weight | [768, 368] | 282624 | 194127 | 0.313126 |
69
+ | 66 | nncf_module.bert.encoder.layer.3.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
70
+ | 67 | nncf_module.bert.encoder.layer.3.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
71
+ | 68 | nncf_module.bert.encoder.layer.3.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
72
+ | 69 | nncf_module.bert.encoder.layer.4.attention.self.query | NNCFLinear | weight | [576, 768] | 442368 | 212567 | 0.519479 |
73
+ | 70 | nncf_module.bert.encoder.layer.4.attention.self.query | NNCFLinear | bias | [576] | 576 | 576 | 0 |
74
+ | 71 | nncf_module.bert.encoder.layer.4.attention.self.key | NNCFLinear | weight | [576, 768] | 442368 | 214788 | 0.514459 |
75
+ | 72 | nncf_module.bert.encoder.layer.4.attention.self.key | NNCFLinear | bias | [576] | 576 | 576 | 0 |
76
+ | 73 | nncf_module.bert.encoder.layer.4.attention.self.value | NNCFLinear | weight | [576, 768] | 442368 | 197159 | 0.55431 |
77
+ | 74 | nncf_module.bert.encoder.layer.4.attention.self.value | NNCFLinear | bias | [576] | 576 | 576 | 0 |
78
+ | 75 | nncf_module.bert.encoder.layer.4.attention.output.dense | NNCFLinear | weight | [768, 576] | 442368 | 192495 | 0.564853 |
79
+ | 76 | nncf_module.bert.encoder.layer.4.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
80
+ | 77 | nncf_module.bert.encoder.layer.4.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
81
+ | 78 | nncf_module.bert.encoder.layer.4.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
82
+ | 79 | nncf_module.bert.encoder.layer.4.intermediate.dense | NNCFLinear | weight | [386, 768] | 296448 | 205830 | 0.305679 |
83
+ | 80 | nncf_module.bert.encoder.layer.4.intermediate.dense | NNCFLinear | bias | [386] | 386 | 386 | 0 |
84
+ | 81 | nncf_module.bert.encoder.layer.4.output.dense | NNCFLinear | weight | [768, 386] | 296448 | 199567 | 0.326806 |
85
+ | 82 | nncf_module.bert.encoder.layer.4.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
86
+ | 83 | nncf_module.bert.encoder.layer.4.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
87
+ | 84 | nncf_module.bert.encoder.layer.4.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
88
+ | 85 | nncf_module.bert.encoder.layer.5.attention.self.query | NNCFLinear | weight | [384, 768] | 294912 | 131257 | 0.554928 |
89
+ | 86 | nncf_module.bert.encoder.layer.5.attention.self.query | NNCFLinear | bias | [384] | 384 | 384 | 0 |
90
+ | 87 | nncf_module.bert.encoder.layer.5.attention.self.key | NNCFLinear | weight | [384, 768] | 294912 | 154458 | 0.476257 |
91
+ | 88 | nncf_module.bert.encoder.layer.5.attention.self.key | NNCFLinear | bias | [384] | 384 | 384 | 0 |
92
+ | 89 | nncf_module.bert.encoder.layer.5.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 159646 | 0.458666 |
93
+ | 90 | nncf_module.bert.encoder.layer.5.attention.self.value | NNCFLinear | bias | [384] | 384 | 384 | 0 |
94
+ | 91 | nncf_module.bert.encoder.layer.5.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 156889 | 0.468014 |
95
+ | 92 | nncf_module.bert.encoder.layer.5.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
96
+ | 93 | nncf_module.bert.encoder.layer.5.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
97
+ | 94 | nncf_module.bert.encoder.layer.5.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
98
+ | 95 | nncf_module.bert.encoder.layer.5.intermediate.dense | NNCFLinear | weight | [336, 768] | 258048 | 184774 | 0.283955 |
99
+ | 96 | nncf_module.bert.encoder.layer.5.intermediate.dense | NNCFLinear | bias | [336] | 336 | 336 | 0 |
100
+ | 97 | nncf_module.bert.encoder.layer.5.output.dense | NNCFLinear | weight | [768, 336] | 258048 | 178574 | 0.307981 |
101
+ | 98 | nncf_module.bert.encoder.layer.5.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
102
+ | 99 | nncf_module.bert.encoder.layer.5.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
103
+ | 100 | nncf_module.bert.encoder.layer.5.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
104
+ | 101 | nncf_module.bert.encoder.layer.6.attention.self.query | NNCFLinear | weight | [448, 768] | 344064 | 153126 | 0.554949 |
105
+ | 102 | nncf_module.bert.encoder.layer.6.attention.self.query | NNCFLinear | bias | [448] | 448 | 448 | 0 |
106
+ | 103 | nncf_module.bert.encoder.layer.6.attention.self.key | NNCFLinear | weight | [448, 768] | 344064 | 169706 | 0.50676 |
107
+ | 104 | nncf_module.bert.encoder.layer.6.attention.self.key | NNCFLinear | bias | [448] | 448 | 448 | 0 |
108
+ | 105 | nncf_module.bert.encoder.layer.6.attention.self.value | NNCFLinear | weight | [448, 768] | 344064 | 154213 | 0.55179 |
109
+ | 106 | nncf_module.bert.encoder.layer.6.attention.self.value | NNCFLinear | bias | [448] | 448 | 448 | 0 |
110
+ | 107 | nncf_module.bert.encoder.layer.6.attention.output.dense | NNCFLinear | weight | [768, 448] | 344064 | 148815 | 0.567479 |
111
+ | 108 | nncf_module.bert.encoder.layer.6.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
112
+ | 109 | nncf_module.bert.encoder.layer.6.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
113
+ | 110 | nncf_module.bert.encoder.layer.6.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
114
+ | 111 | nncf_module.bert.encoder.layer.6.intermediate.dense | NNCFLinear | weight | [280, 768] | 215040 | 158800 | 0.261533 |
115
+ | 112 | nncf_module.bert.encoder.layer.6.intermediate.dense | NNCFLinear | bias | [280] | 280 | 280 | 0 |
116
+ | 113 | nncf_module.bert.encoder.layer.6.output.dense | NNCFLinear | weight | [768, 280] | 215040 | 156397 | 0.272707 |
117
+ | 114 | nncf_module.bert.encoder.layer.6.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
118
+ | 115 | nncf_module.bert.encoder.layer.6.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
119
+ | 116 | nncf_module.bert.encoder.layer.6.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
120
+ | 117 | nncf_module.bert.encoder.layer.7.attention.self.query | NNCFLinear | weight | [448, 768] | 344064 | 153876 | 0.552769 |
121
+ | 118 | nncf_module.bert.encoder.layer.7.attention.self.query | NNCFLinear | bias | [448] | 448 | 448 | 0 |
122
+ | 119 | nncf_module.bert.encoder.layer.7.attention.self.key | NNCFLinear | weight | [448, 768] | 344064 | 180384 | 0.475725 |
123
+ | 120 | nncf_module.bert.encoder.layer.7.attention.self.key | NNCFLinear | bias | [448] | 448 | 448 | 0 |
124
+ | 121 | nncf_module.bert.encoder.layer.7.attention.self.value | NNCFLinear | weight | [448, 768] | 344064 | 166552 | 0.515927 |
125
+ | 122 | nncf_module.bert.encoder.layer.7.attention.self.value | NNCFLinear | bias | [448] | 448 | 448 | 0 |
126
+ | 123 | nncf_module.bert.encoder.layer.7.attention.output.dense | NNCFLinear | weight | [768, 448] | 344064 | 160346 | 0.533965 |
127
+ | 124 | nncf_module.bert.encoder.layer.7.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
128
+ | 125 | nncf_module.bert.encoder.layer.7.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
129
+ | 126 | nncf_module.bert.encoder.layer.7.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
130
+ | 127 | nncf_module.bert.encoder.layer.7.intermediate.dense | NNCFLinear | weight | [211, 768] | 162048 | 125197 | 0.227408 |
131
+ | 128 | nncf_module.bert.encoder.layer.7.intermediate.dense | NNCFLinear | bias | [211] | 211 | 211 | 0 |
132
+ | 129 | nncf_module.bert.encoder.layer.7.output.dense | NNCFLinear | weight | [768, 211] | 162048 | 123451 | 0.238182 |
133
+ | 130 | nncf_module.bert.encoder.layer.7.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
134
+ | 131 | nncf_module.bert.encoder.layer.7.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
135
+ | 132 | nncf_module.bert.encoder.layer.7.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
136
+ | 133 | nncf_module.bert.encoder.layer.8.attention.self.query | NNCFLinear | weight | [448, 768] | 344064 | 149419 | 0.565723 |
137
+ | 134 | nncf_module.bert.encoder.layer.8.attention.self.query | NNCFLinear | bias | [448] | 448 | 448 | 0 |
138
+ | 135 | nncf_module.bert.encoder.layer.8.attention.self.key | NNCFLinear | weight | [448, 768] | 344064 | 150699 | 0.562003 |
139
+ | 136 | nncf_module.bert.encoder.layer.8.attention.self.key | NNCFLinear | bias | [448] | 448 | 448 | 0 |
140
+ | 137 | nncf_module.bert.encoder.layer.8.attention.self.value | NNCFLinear | weight | [448, 768] | 344064 | 124770 | 0.637364 |
141
+ | 138 | nncf_module.bert.encoder.layer.8.attention.self.value | NNCFLinear | bias | [448] | 448 | 448 | 0 |
142
+ | 139 | nncf_module.bert.encoder.layer.8.attention.output.dense | NNCFLinear | weight | [768, 448] | 344064 | 120648 | 0.649344 |
143
+ | 140 | nncf_module.bert.encoder.layer.8.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
144
+ | 141 | nncf_module.bert.encoder.layer.8.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
145
+ | 142 | nncf_module.bert.encoder.layer.8.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
146
+ | 143 | nncf_module.bert.encoder.layer.8.intermediate.dense | NNCFLinear | weight | [108, 768] | 82944 | 68989 | 0.168246 |
147
+ | 144 | nncf_module.bert.encoder.layer.8.intermediate.dense | NNCFLinear | bias | [108] | 108 | 108 | 0 |
148
+ | 145 | nncf_module.bert.encoder.layer.8.output.dense | NNCFLinear | weight | [768, 108] | 82944 | 68556 | 0.173466 |
149
+ | 146 | nncf_module.bert.encoder.layer.8.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
150
+ | 147 | nncf_module.bert.encoder.layer.8.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
151
+ | 148 | nncf_module.bert.encoder.layer.8.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
152
+ | 149 | nncf_module.bert.encoder.layer.9.attention.self.query | NNCFLinear | weight | [320, 768] | 245760 | 121225 | 0.506734 |
153
+ | 150 | nncf_module.bert.encoder.layer.9.attention.self.query | NNCFLinear | bias | [320] | 320 | 320 | 0 |
154
+ | 151 | nncf_module.bert.encoder.layer.9.attention.self.key | NNCFLinear | weight | [320, 768] | 245760 | 114789 | 0.532922 |
155
+ | 152 | nncf_module.bert.encoder.layer.9.attention.self.key | NNCFLinear | bias | [320] | 320 | 320 | 0 |
156
+ | 153 | nncf_module.bert.encoder.layer.9.attention.self.value | NNCFLinear | weight | [320, 768] | 245760 | 69260 | 0.71818 |
157
+ | 154 | nncf_module.bert.encoder.layer.9.attention.self.value | NNCFLinear | bias | [320] | 320 | 320 | 0 |
158
+ | 155 | nncf_module.bert.encoder.layer.9.attention.output.dense | NNCFLinear | weight | [768, 320] | 245760 | 73575 | 0.700623 |
159
+ | 156 | nncf_module.bert.encoder.layer.9.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
160
+ | 157 | nncf_module.bert.encoder.layer.9.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
161
+ | 158 | nncf_module.bert.encoder.layer.9.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
162
+ | 159 | nncf_module.bert.encoder.layer.9.intermediate.dense | NNCFLinear | weight | [53, 768] | 40704 | 35399 | 0.130331 |
163
+ | 160 | nncf_module.bert.encoder.layer.9.intermediate.dense | NNCFLinear | bias | [53] | 53 | 53 | 0 |
164
+ | 161 | nncf_module.bert.encoder.layer.9.output.dense | NNCFLinear | weight | [768, 53] | 40704 | 34918 | 0.142148 |
165
+ | 162 | nncf_module.bert.encoder.layer.9.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
166
+ | 163 | nncf_module.bert.encoder.layer.9.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
167
+ | 164 | nncf_module.bert.encoder.layer.9.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
168
+ | 165 | nncf_module.bert.encoder.layer.10.attention.self.query | NNCFLinear | weight | [384, 768] | 294912 | 128113 | 0.565589 |
169
+ | 166 | nncf_module.bert.encoder.layer.10.attention.self.query | NNCFLinear | bias | [384] | 384 | 384 | 0 |
170
+ | 167 | nncf_module.bert.encoder.layer.10.attention.self.key | NNCFLinear | weight | [384, 768] | 294912 | 124553 | 0.57766 |
171
+ | 168 | nncf_module.bert.encoder.layer.10.attention.self.key | NNCFLinear | bias | [384] | 384 | 384 | 0 |
172
+ | 169 | nncf_module.bert.encoder.layer.10.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 79608 | 0.730062 |
173
+ | 170 | nncf_module.bert.encoder.layer.10.attention.self.value | NNCFLinear | bias | [384] | 384 | 384 | 0 |
174
+ | 171 | nncf_module.bert.encoder.layer.10.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 85158 | 0.711243 |
175
+ | 172 | nncf_module.bert.encoder.layer.10.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
176
+ | 173 | nncf_module.bert.encoder.layer.10.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
177
+ | 174 | nncf_module.bert.encoder.layer.10.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
178
+ | 175 | nncf_module.bert.encoder.layer.10.intermediate.dense | NNCFLinear | weight | [86, 768] | 66048 | 55025 | 0.166894 |
179
+ | 176 | nncf_module.bert.encoder.layer.10.intermediate.dense | NNCFLinear | bias | [86] | 86 | 86 | 0 |
180
+ | 177 | nncf_module.bert.encoder.layer.10.output.dense | NNCFLinear | weight | [768, 86] | 66048 | 54584 | 0.173571 |
181
+ | 178 | nncf_module.bert.encoder.layer.10.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
182
+ | 179 | nncf_module.bert.encoder.layer.10.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
183
+ | 180 | nncf_module.bert.encoder.layer.10.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
184
+ | 181 | nncf_module.bert.encoder.layer.11.attention.self.query | NNCFLinear | weight | [384, 768] | 294912 | 104737 | 0.644853 |
185
+ | 182 | nncf_module.bert.encoder.layer.11.attention.self.query | NNCFLinear | bias | [384] | 384 | 384 | 0 |
186
+ | 183 | nncf_module.bert.encoder.layer.11.attention.self.key | NNCFLinear | weight | [384, 768] | 294912 | 102558 | 0.652242 |
187
+ | 184 | nncf_module.bert.encoder.layer.11.attention.self.key | NNCFLinear | bias | [384] | 384 | 384 | 0 |
188
+ | 185 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 64855 | 0.780087 |
189
+ | 186 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | bias | [384] | 384 | 384 | 0 |
190
+ | 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 69674 | 0.763747 |
191
+ | 188 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
192
+ | 189 | nncf_module.bert.encoder.layer.11.attention.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
193
+ | 190 | nncf_module.bert.encoder.layer.11.attention.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
194
+ | 191 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | weight | [105, 768] | 80640 | 67724 | 0.160169 |
195
+ | 192 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | bias | [105] | 105 | 105 | 0 |
196
+ | 193 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | weight | [768, 105] | 80640 | 67519 | 0.162711 |
197
+ | 194 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | bias | [768] | 768 | 768 | 0 |
198
+ | 195 | nncf_module.bert.encoder.layer.11.output.LayerNorm | LayerNorm | weight | [768] | 768 | 768 | 0 |
199
+ | 196 | nncf_module.bert.encoder.layer.11.output.LayerNorm | LayerNorm | bias | [768] | 768 | 768 | 0 |
200
+ | 197 | nncf_module.qa_outputs | NNCFLinear | weight | [2, 768] | 1536 | 1536 | 0 |
201
+ | 198 | nncf_module.qa_outputs | NNCFLinear | bias | [2] | 2 | 2 | 0 |
XP_linear_layer_sparsity_20M_params_50.00_sparsity.csv ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity
2
+ 5,nncf_module.bert.encoder.layer.0.attention.self.query,NNCFLinear,weight,"[320, 768]",245760,106337,0.5673136115074158
3
+ 7,nncf_module.bert.encoder.layer.0.attention.self.key,NNCFLinear,weight,"[320, 768]",245760,112375,0.5427449345588684
4
+ 9,nncf_module.bert.encoder.layer.0.attention.self.value,NNCFLinear,weight,"[320, 768]",245760,131965,0.4630330204963684
5
+ 11,nncf_module.bert.encoder.layer.0.attention.output.dense,NNCFLinear,weight,"[768, 320]",245760,137497,0.44052326679229736
6
+ 15,nncf_module.bert.encoder.layer.0.intermediate.dense,NNCFLinear,weight,"[185, 768]",142080,110056,0.2253941297531128
7
+ 17,nncf_module.bert.encoder.layer.0.output.dense,NNCFLinear,weight,"[768, 185]",142080,108631,0.23542368412017822
8
+ 21,nncf_module.bert.encoder.layer.1.attention.self.query,NNCFLinear,weight,"[320, 768]",245760,136222,0.44571125507354736
9
+ 23,nncf_module.bert.encoder.layer.1.attention.self.key,NNCFLinear,weight,"[320, 768]",245760,136267,0.44552814960479736
10
+ 25,nncf_module.bert.encoder.layer.1.attention.self.value,NNCFLinear,weight,"[320, 768]",245760,124892,0.49181312322616577
11
+ 27,nncf_module.bert.encoder.layer.1.attention.output.dense,NNCFLinear,weight,"[768, 320]",245760,130185,0.47027587890625
12
+ 31,nncf_module.bert.encoder.layer.1.intermediate.dense,NNCFLinear,weight,"[315, 768]",241920,176203,0.2716476321220398
13
+ 33,nncf_module.bert.encoder.layer.1.output.dense,NNCFLinear,weight,"[768, 315]",241920,172407,0.2873387932777405
14
+ 37,nncf_module.bert.encoder.layer.2.attention.self.query,NNCFLinear,weight,"[576, 768]",442368,198811,0.5505754947662354
15
+ 39,nncf_module.bert.encoder.layer.2.attention.self.key,NNCFLinear,weight,"[576, 768]",442368,201526,0.5444381237030029
16
+ 41,nncf_module.bert.encoder.layer.2.attention.self.value,NNCFLinear,weight,"[576, 768]",442368,163541,0.6303055286407471
17
+ 43,nncf_module.bert.encoder.layer.2.attention.output.dense,NNCFLinear,weight,"[768, 576]",442368,167245,0.6219323873519897
18
+ 47,nncf_module.bert.encoder.layer.2.intermediate.dense,NNCFLinear,weight,"[339, 768]",260352,185491,0.2875376343727112
19
+ 49,nncf_module.bert.encoder.layer.2.output.dense,NNCFLinear,weight,"[768, 339]",260352,183083,0.2967866063117981
20
+ 53,nncf_module.bert.encoder.layer.3.attention.self.query,NNCFLinear,weight,"[576, 768]",442368,205898,0.5345549583435059
21
+ 55,nncf_module.bert.encoder.layer.3.attention.self.key,NNCFLinear,weight,"[576, 768]",442368,217621,0.5080543756484985
22
+ 57,nncf_module.bert.encoder.layer.3.attention.self.value,NNCFLinear,weight,"[576, 768]",442368,209726,0.5259014964103699
23
+ 59,nncf_module.bert.encoder.layer.3.attention.output.dense,NNCFLinear,weight,"[768, 576]",442368,208038,0.5297173261642456
24
+ 63,nncf_module.bert.encoder.layer.3.intermediate.dense,NNCFLinear,weight,"[368, 768]",282624,198154,0.29887765645980835
25
+ 65,nncf_module.bert.encoder.layer.3.output.dense,NNCFLinear,weight,"[768, 368]",282624,194127,0.31312626600265503
26
+ 69,nncf_module.bert.encoder.layer.4.attention.self.query,NNCFLinear,weight,"[576, 768]",442368,212567,0.5194792747497559
27
+ 71,nncf_module.bert.encoder.layer.4.attention.self.key,NNCFLinear,weight,"[576, 768]",442368,214788,0.5144585371017456
28
+ 73,nncf_module.bert.encoder.layer.4.attention.self.value,NNCFLinear,weight,"[576, 768]",442368,197159,0.5543099641799927
29
+ 75,nncf_module.bert.encoder.layer.4.attention.output.dense,NNCFLinear,weight,"[768, 576]",442368,192495,0.5648532509803772
30
+ 79,nncf_module.bert.encoder.layer.4.intermediate.dense,NNCFLinear,weight,"[386, 768]",296448,205830,0.3056792616844177
31
+ 81,nncf_module.bert.encoder.layer.4.output.dense,NNCFLinear,weight,"[768, 386]",296448,199567,0.32680606842041016
32
+ 85,nncf_module.bert.encoder.layer.5.attention.self.query,NNCFLinear,weight,"[384, 768]",294912,131257,0.5549282431602478
33
+ 87,nncf_module.bert.encoder.layer.5.attention.self.key,NNCFLinear,weight,"[384, 768]",294912,154458,0.47625732421875
34
+ 89,nncf_module.bert.encoder.layer.5.attention.self.value,NNCFLinear,weight,"[384, 768]",294912,159646,0.4586656093597412
35
+ 91,nncf_module.bert.encoder.layer.5.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,156889,0.4680141806602478
36
+ 95,nncf_module.bert.encoder.layer.5.intermediate.dense,NNCFLinear,weight,"[336, 768]",258048,184774,0.2839548587799072
37
+ 97,nncf_module.bert.encoder.layer.5.output.dense,NNCFLinear,weight,"[768, 336]",258048,178574,0.3079814314842224
38
+ 101,nncf_module.bert.encoder.layer.6.attention.self.query,NNCFLinear,weight,"[448, 768]",344064,153126,0.5549490451812744
39
+ 103,nncf_module.bert.encoder.layer.6.attention.self.key,NNCFLinear,weight,"[448, 768]",344064,169706,0.5067603588104248
40
+ 105,nncf_module.bert.encoder.layer.6.attention.self.value,NNCFLinear,weight,"[448, 768]",344064,154213,0.5517897605895996
41
+ 107,nncf_module.bert.encoder.layer.6.attention.output.dense,NNCFLinear,weight,"[768, 448]",344064,148815,0.5674787163734436
42
+ 111,nncf_module.bert.encoder.layer.6.intermediate.dense,NNCFLinear,weight,"[280, 768]",215040,158800,0.261532723903656
43
+ 113,nncf_module.bert.encoder.layer.6.output.dense,NNCFLinear,weight,"[768, 280]",215040,156397,0.27270740270614624
44
+ 117,nncf_module.bert.encoder.layer.7.attention.self.query,NNCFLinear,weight,"[448, 768]",344064,153876,0.5527692437171936
45
+ 119,nncf_module.bert.encoder.layer.7.attention.self.key,NNCFLinear,weight,"[448, 768]",344064,180384,0.4757254123687744
46
+ 121,nncf_module.bert.encoder.layer.7.attention.self.value,NNCFLinear,weight,"[448, 768]",344064,166552,0.515927255153656
47
+ 123,nncf_module.bert.encoder.layer.7.attention.output.dense,NNCFLinear,weight,"[768, 448]",344064,160346,0.5339646339416504
48
+ 127,nncf_module.bert.encoder.layer.7.intermediate.dense,NNCFLinear,weight,"[211, 768]",162048,125197,0.22740793228149414
49
+ 129,nncf_module.bert.encoder.layer.7.output.dense,NNCFLinear,weight,"[768, 211]",162048,123451,0.23818248510360718
50
+ 133,nncf_module.bert.encoder.layer.8.attention.self.query,NNCFLinear,weight,"[448, 768]",344064,149419,0.5657232403755188
51
+ 135,nncf_module.bert.encoder.layer.8.attention.self.key,NNCFLinear,weight,"[448, 768]",344064,150699,0.5620030164718628
52
+ 137,nncf_module.bert.encoder.layer.8.attention.self.value,NNCFLinear,weight,"[448, 768]",344064,124770,0.6373639702796936
53
+ 139,nncf_module.bert.encoder.layer.8.attention.output.dense,NNCFLinear,weight,"[768, 448]",344064,120648,0.6493443250656128
54
+ 143,nncf_module.bert.encoder.layer.8.intermediate.dense,NNCFLinear,weight,"[108, 768]",82944,68989,0.16824603080749512
55
+ 145,nncf_module.bert.encoder.layer.8.output.dense,NNCFLinear,weight,"[768, 108]",82944,68556,0.17346644401550293
56
+ 149,nncf_module.bert.encoder.layer.9.attention.self.query,NNCFLinear,weight,"[320, 768]",245760,121225,0.5067341923713684
57
+ 151,nncf_module.bert.encoder.layer.9.attention.self.key,NNCFLinear,weight,"[320, 768]",245760,114789,0.5329223275184631
58
+ 153,nncf_module.bert.encoder.layer.9.attention.self.value,NNCFLinear,weight,"[320, 768]",245760,69260,0.7181802988052368
59
+ 155,nncf_module.bert.encoder.layer.9.attention.output.dense,NNCFLinear,weight,"[768, 320]",245760,73575,0.70062255859375
60
+ 159,nncf_module.bert.encoder.layer.9.intermediate.dense,NNCFLinear,weight,"[53, 768]",40704,35399,0.13033121824264526
61
+ 161,nncf_module.bert.encoder.layer.9.output.dense,NNCFLinear,weight,"[768, 53]",40704,34918,0.1421481966972351
62
+ 165,nncf_module.bert.encoder.layer.10.attention.self.query,NNCFLinear,weight,"[384, 768]",294912,128113,0.5655890703201294
63
+ 167,nncf_module.bert.encoder.layer.10.attention.self.key,NNCFLinear,weight,"[384, 768]",294912,124553,0.5776604413986206
64
+ 169,nncf_module.bert.encoder.layer.10.attention.self.value,NNCFLinear,weight,"[384, 768]",294912,79608,0.7300618886947632
65
+ 171,nncf_module.bert.encoder.layer.10.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,85158,0.71124267578125
66
+ 175,nncf_module.bert.encoder.layer.10.intermediate.dense,NNCFLinear,weight,"[86, 768]",66048,55025,0.16689378023147583
67
+ 177,nncf_module.bert.encoder.layer.10.output.dense,NNCFLinear,weight,"[768, 86]",66048,54584,0.17357075214385986
68
+ 181,nncf_module.bert.encoder.layer.11.attention.self.query,NNCFLinear,weight,"[384, 768]",294912,104737,0.6448533535003662
69
+ 183,nncf_module.bert.encoder.layer.11.attention.self.key,NNCFLinear,weight,"[384, 768]",294912,102558,0.6522420644760132
70
+ 185,nncf_module.bert.encoder.layer.11.attention.self.value,NNCFLinear,weight,"[384, 768]",294912,64855,0.7800869345664978
71
+ 187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 384]",294912,69674,0.7637465000152588
72
+ 191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[105, 768]",80640,67724,0.16016864776611328
73
+ 193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 105]",80640,67519,0.1627107858657837
74
+ 197,nncf_module.qa_outputs,NNCFLinear,weight,"[2, 768]",1536,1536,0.0
XP_linear_layer_sparsity_20M_params_50.00_sparsity.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | | layer_id | layer_type | param_type | shape | nparam | nnz | sparsity |
2
+ |----:|:---------------------------------------------------------|:-------------|:-------------|:-----------|---------:|-------:|-----------:|
3
+ | 5 | nncf_module.bert.encoder.layer.0.attention.self.query | NNCFLinear | weight | [320, 768] | 245760 | 106337 | 0.567314 |
4
+ | 7 | nncf_module.bert.encoder.layer.0.attention.self.key | NNCFLinear | weight | [320, 768] | 245760 | 112375 | 0.542745 |
5
+ | 9 | nncf_module.bert.encoder.layer.0.attention.self.value | NNCFLinear | weight | [320, 768] | 245760 | 131965 | 0.463033 |
6
+ | 11 | nncf_module.bert.encoder.layer.0.attention.output.dense | NNCFLinear | weight | [768, 320] | 245760 | 137497 | 0.440523 |
7
+ | 15 | nncf_module.bert.encoder.layer.0.intermediate.dense | NNCFLinear | weight | [185, 768] | 142080 | 110056 | 0.225394 |
8
+ | 17 | nncf_module.bert.encoder.layer.0.output.dense | NNCFLinear | weight | [768, 185] | 142080 | 108631 | 0.235424 |
9
+ | 21 | nncf_module.bert.encoder.layer.1.attention.self.query | NNCFLinear | weight | [320, 768] | 245760 | 136222 | 0.445711 |
10
+ | 23 | nncf_module.bert.encoder.layer.1.attention.self.key | NNCFLinear | weight | [320, 768] | 245760 | 136267 | 0.445528 |
11
+ | 25 | nncf_module.bert.encoder.layer.1.attention.self.value | NNCFLinear | weight | [320, 768] | 245760 | 124892 | 0.491813 |
12
+ | 27 | nncf_module.bert.encoder.layer.1.attention.output.dense | NNCFLinear | weight | [768, 320] | 245760 | 130185 | 0.470276 |
13
+ | 31 | nncf_module.bert.encoder.layer.1.intermediate.dense | NNCFLinear | weight | [315, 768] | 241920 | 176203 | 0.271648 |
14
+ | 33 | nncf_module.bert.encoder.layer.1.output.dense | NNCFLinear | weight | [768, 315] | 241920 | 172407 | 0.287339 |
15
+ | 37 | nncf_module.bert.encoder.layer.2.attention.self.query | NNCFLinear | weight | [576, 768] | 442368 | 198811 | 0.550575 |
16
+ | 39 | nncf_module.bert.encoder.layer.2.attention.self.key | NNCFLinear | weight | [576, 768] | 442368 | 201526 | 0.544438 |
17
+ | 41 | nncf_module.bert.encoder.layer.2.attention.self.value | NNCFLinear | weight | [576, 768] | 442368 | 163541 | 0.630306 |
18
+ | 43 | nncf_module.bert.encoder.layer.2.attention.output.dense | NNCFLinear | weight | [768, 576] | 442368 | 167245 | 0.621932 |
19
+ | 47 | nncf_module.bert.encoder.layer.2.intermediate.dense | NNCFLinear | weight | [339, 768] | 260352 | 185491 | 0.287538 |
20
+ | 49 | nncf_module.bert.encoder.layer.2.output.dense | NNCFLinear | weight | [768, 339] | 260352 | 183083 | 0.296787 |
21
+ | 53 | nncf_module.bert.encoder.layer.3.attention.self.query | NNCFLinear | weight | [576, 768] | 442368 | 205898 | 0.534555 |
22
+ | 55 | nncf_module.bert.encoder.layer.3.attention.self.key | NNCFLinear | weight | [576, 768] | 442368 | 217621 | 0.508054 |
23
+ | 57 | nncf_module.bert.encoder.layer.3.attention.self.value | NNCFLinear | weight | [576, 768] | 442368 | 209726 | 0.525901 |
24
+ | 59 | nncf_module.bert.encoder.layer.3.attention.output.dense | NNCFLinear | weight | [768, 576] | 442368 | 208038 | 0.529717 |
25
+ | 63 | nncf_module.bert.encoder.layer.3.intermediate.dense | NNCFLinear | weight | [368, 768] | 282624 | 198154 | 0.298878 |
26
+ | 65 | nncf_module.bert.encoder.layer.3.output.dense | NNCFLinear | weight | [768, 368] | 282624 | 194127 | 0.313126 |
27
+ | 69 | nncf_module.bert.encoder.layer.4.attention.self.query | NNCFLinear | weight | [576, 768] | 442368 | 212567 | 0.519479 |
28
+ | 71 | nncf_module.bert.encoder.layer.4.attention.self.key | NNCFLinear | weight | [576, 768] | 442368 | 214788 | 0.514459 |
29
+ | 73 | nncf_module.bert.encoder.layer.4.attention.self.value | NNCFLinear | weight | [576, 768] | 442368 | 197159 | 0.55431 |
30
+ | 75 | nncf_module.bert.encoder.layer.4.attention.output.dense | NNCFLinear | weight | [768, 576] | 442368 | 192495 | 0.564853 |
31
+ | 79 | nncf_module.bert.encoder.layer.4.intermediate.dense | NNCFLinear | weight | [386, 768] | 296448 | 205830 | 0.305679 |
32
+ | 81 | nncf_module.bert.encoder.layer.4.output.dense | NNCFLinear | weight | [768, 386] | 296448 | 199567 | 0.326806 |
33
+ | 85 | nncf_module.bert.encoder.layer.5.attention.self.query | NNCFLinear | weight | [384, 768] | 294912 | 131257 | 0.554928 |
34
+ | 87 | nncf_module.bert.encoder.layer.5.attention.self.key | NNCFLinear | weight | [384, 768] | 294912 | 154458 | 0.476257 |
35
+ | 89 | nncf_module.bert.encoder.layer.5.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 159646 | 0.458666 |
36
+ | 91 | nncf_module.bert.encoder.layer.5.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 156889 | 0.468014 |
37
+ | 95 | nncf_module.bert.encoder.layer.5.intermediate.dense | NNCFLinear | weight | [336, 768] | 258048 | 184774 | 0.283955 |
38
+ | 97 | nncf_module.bert.encoder.layer.5.output.dense | NNCFLinear | weight | [768, 336] | 258048 | 178574 | 0.307981 |
39
+ | 101 | nncf_module.bert.encoder.layer.6.attention.self.query | NNCFLinear | weight | [448, 768] | 344064 | 153126 | 0.554949 |
40
+ | 103 | nncf_module.bert.encoder.layer.6.attention.self.key | NNCFLinear | weight | [448, 768] | 344064 | 169706 | 0.50676 |
41
+ | 105 | nncf_module.bert.encoder.layer.6.attention.self.value | NNCFLinear | weight | [448, 768] | 344064 | 154213 | 0.55179 |
42
+ | 107 | nncf_module.bert.encoder.layer.6.attention.output.dense | NNCFLinear | weight | [768, 448] | 344064 | 148815 | 0.567479 |
43
+ | 111 | nncf_module.bert.encoder.layer.6.intermediate.dense | NNCFLinear | weight | [280, 768] | 215040 | 158800 | 0.261533 |
44
+ | 113 | nncf_module.bert.encoder.layer.6.output.dense | NNCFLinear | weight | [768, 280] | 215040 | 156397 | 0.272707 |
45
+ | 117 | nncf_module.bert.encoder.layer.7.attention.self.query | NNCFLinear | weight | [448, 768] | 344064 | 153876 | 0.552769 |
46
+ | 119 | nncf_module.bert.encoder.layer.7.attention.self.key | NNCFLinear | weight | [448, 768] | 344064 | 180384 | 0.475725 |
47
+ | 121 | nncf_module.bert.encoder.layer.7.attention.self.value | NNCFLinear | weight | [448, 768] | 344064 | 166552 | 0.515927 |
48
+ | 123 | nncf_module.bert.encoder.layer.7.attention.output.dense | NNCFLinear | weight | [768, 448] | 344064 | 160346 | 0.533965 |
49
+ | 127 | nncf_module.bert.encoder.layer.7.intermediate.dense | NNCFLinear | weight | [211, 768] | 162048 | 125197 | 0.227408 |
50
+ | 129 | nncf_module.bert.encoder.layer.7.output.dense | NNCFLinear | weight | [768, 211] | 162048 | 123451 | 0.238182 |
51
+ | 133 | nncf_module.bert.encoder.layer.8.attention.self.query | NNCFLinear | weight | [448, 768] | 344064 | 149419 | 0.565723 |
52
+ | 135 | nncf_module.bert.encoder.layer.8.attention.self.key | NNCFLinear | weight | [448, 768] | 344064 | 150699 | 0.562003 |
53
+ | 137 | nncf_module.bert.encoder.layer.8.attention.self.value | NNCFLinear | weight | [448, 768] | 344064 | 124770 | 0.637364 |
54
+ | 139 | nncf_module.bert.encoder.layer.8.attention.output.dense | NNCFLinear | weight | [768, 448] | 344064 | 120648 | 0.649344 |
55
+ | 143 | nncf_module.bert.encoder.layer.8.intermediate.dense | NNCFLinear | weight | [108, 768] | 82944 | 68989 | 0.168246 |
56
+ | 145 | nncf_module.bert.encoder.layer.8.output.dense | NNCFLinear | weight | [768, 108] | 82944 | 68556 | 0.173466 |
57
+ | 149 | nncf_module.bert.encoder.layer.9.attention.self.query | NNCFLinear | weight | [320, 768] | 245760 | 121225 | 0.506734 |
58
+ | 151 | nncf_module.bert.encoder.layer.9.attention.self.key | NNCFLinear | weight | [320, 768] | 245760 | 114789 | 0.532922 |
59
+ | 153 | nncf_module.bert.encoder.layer.9.attention.self.value | NNCFLinear | weight | [320, 768] | 245760 | 69260 | 0.71818 |
60
+ | 155 | nncf_module.bert.encoder.layer.9.attention.output.dense | NNCFLinear | weight | [768, 320] | 245760 | 73575 | 0.700623 |
61
+ | 159 | nncf_module.bert.encoder.layer.9.intermediate.dense | NNCFLinear | weight | [53, 768] | 40704 | 35399 | 0.130331 |
62
+ | 161 | nncf_module.bert.encoder.layer.9.output.dense | NNCFLinear | weight | [768, 53] | 40704 | 34918 | 0.142148 |
63
+ | 165 | nncf_module.bert.encoder.layer.10.attention.self.query | NNCFLinear | weight | [384, 768] | 294912 | 128113 | 0.565589 |
64
+ | 167 | nncf_module.bert.encoder.layer.10.attention.self.key | NNCFLinear | weight | [384, 768] | 294912 | 124553 | 0.57766 |
65
+ | 169 | nncf_module.bert.encoder.layer.10.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 79608 | 0.730062 |
66
+ | 171 | nncf_module.bert.encoder.layer.10.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 85158 | 0.711243 |
67
+ | 175 | nncf_module.bert.encoder.layer.10.intermediate.dense | NNCFLinear | weight | [86, 768] | 66048 | 55025 | 0.166894 |
68
+ | 177 | nncf_module.bert.encoder.layer.10.output.dense | NNCFLinear | weight | [768, 86] | 66048 | 54584 | 0.173571 |
69
+ | 181 | nncf_module.bert.encoder.layer.11.attention.self.query | NNCFLinear | weight | [384, 768] | 294912 | 104737 | 0.644853 |
70
+ | 183 | nncf_module.bert.encoder.layer.11.attention.self.key | NNCFLinear | weight | [384, 768] | 294912 | 102558 | 0.652242 |
71
+ | 185 | nncf_module.bert.encoder.layer.11.attention.self.value | NNCFLinear | weight | [384, 768] | 294912 | 64855 | 0.780087 |
72
+ | 187 | nncf_module.bert.encoder.layer.11.attention.output.dense | NNCFLinear | weight | [768, 384] | 294912 | 69674 | 0.763747 |
73
+ | 191 | nncf_module.bert.encoder.layer.11.intermediate.dense | NNCFLinear | weight | [105, 768] | 80640 | 67724 | 0.160169 |
74
+ | 193 | nncf_module.bert.encoder.layer.11.output.dense | NNCFLinear | weight | [768, 105] | 80640 | 67519 | 0.162711 |
75
+ | 197 | nncf_module.qa_outputs | NNCFLinear | weight | [2, 768] | 1536 | 1536 | 0 |
XP_onnx_sparsity.csv ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,layer_id,shape,nparam,nnz,sparsity
2
+ 0,Constant_15,"[30522, 768]",23440896,23440896,0.0
3
+ 1,Constant_23,"[2, 768]",1536,1536,0.0
4
+ 2,Constant_35,"[512, 768]",393216,393216,0.0
5
+ 3,Constant_61,"[320, 768]",245760,106337,0.5673136393229167
6
+ 4,Constant_71,"[320, 768]",245760,112375,0.5427449544270833
7
+ 5,Constant_91,"[320, 768]",245760,131965,0.46303304036458337
8
+ 6,Constant_150,"[768, 320]",245760,137497,0.4405232747395833
9
+ 7,Constant_178,"[185, 768]",142080,110056,0.22539414414414416
10
+ 8,Constant_196,"[768, 185]",142080,108631,0.2354237049549549
11
+ 9,Constant_224,"[320, 768]",245760,136222,0.4457112630208333
12
+ 10,Constant_234,"[320, 768]",245760,136267,0.4455281575520833
13
+ 11,Constant_254,"[320, 768]",245760,124892,0.4918131510416667
14
+ 12,Constant_313,"[768, 320]",245760,130185,0.47027587890625
15
+ 13,Constant_341,"[315, 768]",241920,176203,0.27164765211640207
16
+ 14,Constant_359,"[768, 315]",241920,172407,0.2873387896825397
17
+ 15,Constant_387,"[576, 768]",442368,198811,0.5505755389178241
18
+ 16,Constant_397,"[576, 768]",442368,201526,0.5444381148726851
19
+ 17,Constant_417,"[576, 768]",442368,163541,0.6303055374710649
20
+ 18,Constant_476,"[768, 576]",442368,167245,0.6219324182581019
21
+ 19,Constant_504,"[339, 768]",260352,185491,0.28753764134709936
22
+ 20,Constant_522,"[768, 339]",260352,183083,0.29678665806293014
23
+ 21,Constant_550,"[576, 768]",442368,205898,0.5345549406828703
24
+ 22,Constant_560,"[576, 768]",442368,217621,0.5080543800636574
25
+ 23,Constant_580,"[576, 768]",442368,209726,0.5259015118634259
26
+ 24,Constant_639,"[768, 576]",442368,208038,0.5297173394097222
27
+ 25,Constant_667,"[368, 768]",282624,198154,0.2988776607789855
28
+ 26,Constant_685,"[768, 368]",282624,194127,0.31312627377717395
29
+ 27,Constant_713,"[576, 768]",442368,212567,0.5194792570891203
30
+ 28,Constant_723,"[576, 768]",442368,214788,0.5144585503472222
31
+ 29,Constant_743,"[576, 768]",442368,197159,0.554309986255787
32
+ 30,Constant_802,"[768, 576]",442368,192495,0.5648532443576388
33
+ 31,Constant_830,"[386, 768]",296448,205830,0.30567924222797926
34
+ 32,Constant_848,"[768, 386]",296448,199567,0.3268060503022453
35
+ 33,Constant_876,"[384, 768]",294912,131257,0.5549282497829862
36
+ 34,Constant_886,"[384, 768]",294912,154458,0.47625732421875
37
+ 35,Constant_906,"[384, 768]",294912,159646,0.4586656358506944
38
+ 36,Constant_965,"[768, 384]",294912,156889,0.46801418728298616
39
+ 37,Constant_993,"[336, 768]",258048,184774,0.2839549231150794
40
+ 38,Constant_1011,"[768, 336]",258048,178574,0.3079814608134921
41
+ 39,Constant_1039,"[448, 768]",344064,153126,0.5549490792410714
42
+ 40,Constant_1049,"[448, 768]",344064,169706,0.5067603701636905
43
+ 41,Constant_1069,"[448, 768]",344064,154213,0.5517897832961309
44
+ 42,Constant_1128,"[768, 448]",344064,148815,0.5674787248883928
45
+ 43,Constant_1156,"[280, 768]",215040,158800,0.26153273809523814
46
+ 44,Constant_1174,"[768, 280]",215040,156397,0.2727074032738095
47
+ 45,Constant_1202,"[448, 768]",344064,153876,0.5527692522321428
48
+ 46,Constant_1212,"[448, 768]",344064,180384,0.4757254464285714
49
+ 47,Constant_1232,"[448, 768]",344064,166552,0.5159272693452381
50
+ 48,Constant_1291,"[768, 448]",344064,160346,0.5339646112351191
51
+ 49,Constant_1319,"[211, 768]",162048,125197,0.2274079285150079
52
+ 50,Constant_1337,"[768, 211]",162048,123451,0.23818251382306477
53
+ 51,Constant_1365,"[448, 768]",344064,149419,0.5657232375372023
54
+ 52,Constant_1375,"[448, 768]",344064,150699,0.5620029994419643
55
+ 53,Constant_1395,"[448, 768]",344064,124770,0.6373639787946428
56
+ 54,Constant_1454,"[768, 448]",344064,120648,0.6493443080357143
57
+ 55,Constant_1482,"[108, 768]",82944,68989,0.16824604552469136
58
+ 56,Constant_1500,"[768, 108]",82944,68556,0.17346643518518523
59
+ 57,Constant_1528,"[320, 768]",245760,121225,0.5067342122395833
60
+ 58,Constant_1538,"[320, 768]",245760,114789,0.53292236328125
61
+ 59,Constant_1558,"[320, 768]",245760,69260,0.7181803385416667
62
+ 60,Constant_1617,"[768, 320]",245760,73575,0.70062255859375
63
+ 61,Constant_1645,"[53, 768]",40704,35399,0.1303311713836478
64
+ 62,Constant_1663,"[768, 53]",40704,34918,0.14214819182389937
65
+ 63,Constant_1691,"[384, 768]",294912,128113,0.5655890570746528
66
+ 64,Constant_1701,"[384, 768]",294912,124553,0.5776604546440972
67
+ 65,Constant_1721,"[384, 768]",294912,79608,0.7300618489583333
68
+ 66,Constant_1780,"[768, 384]",294912,85158,0.71124267578125
69
+ 67,Constant_1808,"[86, 768]",66048,55025,0.16689377422480622
70
+ 68,Constant_1826,"[768, 86]",66048,54584,0.17357073643410847
71
+ 69,Constant_1854,"[384, 768]",294912,104737,0.6448533799913194
72
+ 70,Constant_1864,"[384, 768]",294912,102558,0.6522420247395833
73
+ 71,Constant_1884,"[384, 768]",294912,64855,0.7800869411892362
74
+ 72,Constant_1943,"[768, 384]",294912,69674,0.7637464735243056
75
+ 73,Constant_1971,"[105, 768]",80640,67724,0.16016865079365084
76
+ 74,Constant_1989,"[768, 105]",80640,67519,0.1627108134920635
77
+ 75,Constant_2017,"[2, 768]",1536,1536,0.0
XP_onnx_sparsity.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | | layer_id | shape | nparam | nnz | sparsity |
2
+ |---:|:--------------|:-------------|---------:|---------:|-----------:|
3
+ | 0 | Constant_15 | [30522, 768] | 23440896 | 23440896 | 0 |
4
+ | 1 | Constant_23 | [2, 768] | 1536 | 1536 | 0 |
5
+ | 2 | Constant_35 | [512, 768] | 393216 | 393216 | 0 |
6
+ | 3 | Constant_61 | [320, 768] | 245760 | 106337 | 0.567314 |
7
+ | 4 | Constant_71 | [320, 768] | 245760 | 112375 | 0.542745 |
8
+ | 5 | Constant_91 | [320, 768] | 245760 | 131965 | 0.463033 |
9
+ | 6 | Constant_150 | [768, 320] | 245760 | 137497 | 0.440523 |
10
+ | 7 | Constant_178 | [185, 768] | 142080 | 110056 | 0.225394 |
11
+ | 8 | Constant_196 | [768, 185] | 142080 | 108631 | 0.235424 |
12
+ | 9 | Constant_224 | [320, 768] | 245760 | 136222 | 0.445711 |
13
+ | 10 | Constant_234 | [320, 768] | 245760 | 136267 | 0.445528 |
14
+ | 11 | Constant_254 | [320, 768] | 245760 | 124892 | 0.491813 |
15
+ | 12 | Constant_313 | [768, 320] | 245760 | 130185 | 0.470276 |
16
+ | 13 | Constant_341 | [315, 768] | 241920 | 176203 | 0.271648 |
17
+ | 14 | Constant_359 | [768, 315] | 241920 | 172407 | 0.287339 |
18
+ | 15 | Constant_387 | [576, 768] | 442368 | 198811 | 0.550576 |
19
+ | 16 | Constant_397 | [576, 768] | 442368 | 201526 | 0.544438 |
20
+ | 17 | Constant_417 | [576, 768] | 442368 | 163541 | 0.630306 |
21
+ | 18 | Constant_476 | [768, 576] | 442368 | 167245 | 0.621932 |
22
+ | 19 | Constant_504 | [339, 768] | 260352 | 185491 | 0.287538 |
23
+ | 20 | Constant_522 | [768, 339] | 260352 | 183083 | 0.296787 |
24
+ | 21 | Constant_550 | [576, 768] | 442368 | 205898 | 0.534555 |
25
+ | 22 | Constant_560 | [576, 768] | 442368 | 217621 | 0.508054 |
26
+ | 23 | Constant_580 | [576, 768] | 442368 | 209726 | 0.525902 |
27
+ | 24 | Constant_639 | [768, 576] | 442368 | 208038 | 0.529717 |
28
+ | 25 | Constant_667 | [368, 768] | 282624 | 198154 | 0.298878 |
29
+ | 26 | Constant_685 | [768, 368] | 282624 | 194127 | 0.313126 |
30
+ | 27 | Constant_713 | [576, 768] | 442368 | 212567 | 0.519479 |
31
+ | 28 | Constant_723 | [576, 768] | 442368 | 214788 | 0.514459 |
32
+ | 29 | Constant_743 | [576, 768] | 442368 | 197159 | 0.55431 |
33
+ | 30 | Constant_802 | [768, 576] | 442368 | 192495 | 0.564853 |
34
+ | 31 | Constant_830 | [386, 768] | 296448 | 205830 | 0.305679 |
35
+ | 32 | Constant_848 | [768, 386] | 296448 | 199567 | 0.326806 |
36
+ | 33 | Constant_876 | [384, 768] | 294912 | 131257 | 0.554928 |
37
+ | 34 | Constant_886 | [384, 768] | 294912 | 154458 | 0.476257 |
38
+ | 35 | Constant_906 | [384, 768] | 294912 | 159646 | 0.458666 |
39
+ | 36 | Constant_965 | [768, 384] | 294912 | 156889 | 0.468014 |
40
+ | 37 | Constant_993 | [336, 768] | 258048 | 184774 | 0.283955 |
41
+ | 38 | Constant_1011 | [768, 336] | 258048 | 178574 | 0.307981 |
42
+ | 39 | Constant_1039 | [448, 768] | 344064 | 153126 | 0.554949 |
43
+ | 40 | Constant_1049 | [448, 768] | 344064 | 169706 | 0.50676 |
44
+ | 41 | Constant_1069 | [448, 768] | 344064 | 154213 | 0.55179 |
45
+ | 42 | Constant_1128 | [768, 448] | 344064 | 148815 | 0.567479 |
46
+ | 43 | Constant_1156 | [280, 768] | 215040 | 158800 | 0.261533 |
47
+ | 44 | Constant_1174 | [768, 280] | 215040 | 156397 | 0.272707 |
48
+ | 45 | Constant_1202 | [448, 768] | 344064 | 153876 | 0.552769 |
49
+ | 46 | Constant_1212 | [448, 768] | 344064 | 180384 | 0.475725 |
50
+ | 47 | Constant_1232 | [448, 768] | 344064 | 166552 | 0.515927 |
51
+ | 48 | Constant_1291 | [768, 448] | 344064 | 160346 | 0.533965 |
52
+ | 49 | Constant_1319 | [211, 768] | 162048 | 125197 | 0.227408 |
53
+ | 50 | Constant_1337 | [768, 211] | 162048 | 123451 | 0.238183 |
54
+ | 51 | Constant_1365 | [448, 768] | 344064 | 149419 | 0.565723 |
55
+ | 52 | Constant_1375 | [448, 768] | 344064 | 150699 | 0.562003 |
56
+ | 53 | Constant_1395 | [448, 768] | 344064 | 124770 | 0.637364 |
57
+ | 54 | Constant_1454 | [768, 448] | 344064 | 120648 | 0.649344 |
58
+ | 55 | Constant_1482 | [108, 768] | 82944 | 68989 | 0.168246 |
59
+ | 56 | Constant_1500 | [768, 108] | 82944 | 68556 | 0.173466 |
60
+ | 57 | Constant_1528 | [320, 768] | 245760 | 121225 | 0.506734 |
61
+ | 58 | Constant_1538 | [320, 768] | 245760 | 114789 | 0.532922 |
62
+ | 59 | Constant_1558 | [320, 768] | 245760 | 69260 | 0.71818 |
63
+ | 60 | Constant_1617 | [768, 320] | 245760 | 73575 | 0.700623 |
64
+ | 61 | Constant_1645 | [53, 768] | 40704 | 35399 | 0.130331 |
65
+ | 62 | Constant_1663 | [768, 53] | 40704 | 34918 | 0.142148 |
66
+ | 63 | Constant_1691 | [384, 768] | 294912 | 128113 | 0.565589 |
67
+ | 64 | Constant_1701 | [384, 768] | 294912 | 124553 | 0.57766 |
68
+ | 65 | Constant_1721 | [384, 768] | 294912 | 79608 | 0.730062 |
69
+ | 66 | Constant_1780 | [768, 384] | 294912 | 85158 | 0.711243 |
70
+ | 67 | Constant_1808 | [86, 768] | 66048 | 55025 | 0.166894 |
71
+ | 68 | Constant_1826 | [768, 86] | 66048 | 54584 | 0.173571 |
72
+ | 69 | Constant_1854 | [384, 768] | 294912 | 104737 | 0.644853 |
73
+ | 70 | Constant_1864 | [384, 768] | 294912 | 102558 | 0.652242 |
74
+ | 71 | Constant_1884 | [384, 768] | 294912 | 64855 | 0.780087 |
75
+ | 72 | Constant_1943 | [768, 384] | 294912 | 69674 | 0.763746 |
76
+ | 73 | Constant_1971 | [105, 768] | 80640 | 67724 | 0.160169 |
77
+ | 74 | Constant_1989 | [768, 105] | 80640 | 67519 | 0.162711 |
78
+ | 75 | Constant_2017 | [2, 768] | 1536 | 1536 | 0 |
all_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_exact_match": 80.10406811731315,
4
+ "eval_f1": 87.47131878910791,
5
+ "eval_samples": 10784,
6
+ "train_loss": 0.26923960941476244,
7
+ "train_runtime": 49132.7191,
8
+ "train_samples": 88524,
9
+ "train_samples_per_second": 9.009,
10
+ "train_steps_per_second": 0.563
11
+ }
bert-base-squadv1-block-pruning-hybrid-filled-lt-nncf-50.0sparse-qat-lt.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c048cd6605c9b42df96cca2e8934153dd428c64996c7917222e098ffd6131a96
3
+ size 176517681
checkpoint-26250/config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled",
3
+ "architectures": [
4
+ "NNCFNetwork"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "pruned_heads": {
22
+ "0": [
23
+ 0,
24
+ 2,
25
+ 4,
26
+ 5,
27
+ 6,
28
+ 7,
29
+ 11
30
+ ],
31
+ "1": [
32
+ 0,
33
+ 2,
34
+ 3,
35
+ 5,
36
+ 6,
37
+ 7,
38
+ 8
39
+ ],
40
+ "2": [
41
+ 8,
42
+ 4,
43
+ 7
44
+ ],
45
+ "3": [
46
+ 2,
47
+ 4,
48
+ 6
49
+ ],
50
+ "4": [
51
+ 1,
52
+ 2,
53
+ 11
54
+ ],
55
+ "5": [
56
+ 1,
57
+ 2,
58
+ 5,
59
+ 6,
60
+ 7,
61
+ 11
62
+ ],
63
+ "6": [
64
+ 0,
65
+ 2,
66
+ 3,
67
+ 7,
68
+ 10
69
+ ],
70
+ "7": [
71
+ 1,
72
+ 3,
73
+ 6,
74
+ 7,
75
+ 11
76
+ ],
77
+ "8": [
78
+ 0,
79
+ 3,
80
+ 4,
81
+ 5,
82
+ 8
83
+ ],
84
+ "9": [
85
+ 1,
86
+ 3,
87
+ 4,
88
+ 5,
89
+ 7,
90
+ 9,
91
+ 10
92
+ ],
93
+ "10": [
94
+ 1,
95
+ 4,
96
+ 5,
97
+ 6,
98
+ 7,
99
+ 8
100
+ ],
101
+ "11": [
102
+ 4,
103
+ 5,
104
+ 7,
105
+ 8,
106
+ 10,
107
+ 11
108
+ ]
109
+ },
110
+ "torch_dtype": "float32",
111
+ "transformers_version": "4.9.1",
112
+ "type_vocab_size": 2,
113
+ "use_cache": true,
114
+ "vocab_size": 30522
115
+ }
checkpoint-26250/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8d0efd4f34cf087d296b1534d4bb338de8436cdafc1e81ad7da1f1493bdb078
3
+ size 353016429
checkpoint-26250/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90dbdf1f188ec9d712e67c809d701db476c06082684072c1066ce3d69c78f803
3
+ size 257716945
checkpoint-26250/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb81e1cc84380b23f27968bbc84e3b029c97525337c2237684e0c3ba2ca67c2
3
+ size 14503
checkpoint-26250/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585a9f6ab83e39b8e4cee2a51e778028de8b4ec0bcee884f80ffb4c0bbdb3811
3
+ size 623
checkpoint-26250/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-26250/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-26250/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled", "tokenizer_class": "BertTokenizer"}
checkpoint-26250/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e3ef7f8b11fd63a9e3c5ed51d11f4326af76306c42a7abe0a91a4616b194d7
3
+ size 13266322
checkpoint-26250/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f165763d6ac0ef7389bda54903d4f385832550ebf25276851fc240d311eed08
3
+ size 3439
checkpoint-26250/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
compressed_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled",
3
+ "architectures": [
4
+ "NNCFNetwork"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "pruned_heads": {
22
+ "0": [
23
+ 0,
24
+ 2,
25
+ 4,
26
+ 5,
27
+ 6,
28
+ 7,
29
+ 11
30
+ ],
31
+ "1": [
32
+ 0,
33
+ 2,
34
+ 3,
35
+ 5,
36
+ 6,
37
+ 7,
38
+ 8
39
+ ],
40
+ "2": [
41
+ 8,
42
+ 4,
43
+ 7
44
+ ],
45
+ "3": [
46
+ 2,
47
+ 4,
48
+ 6
49
+ ],
50
+ "4": [
51
+ 1,
52
+ 2,
53
+ 11
54
+ ],
55
+ "5": [
56
+ 1,
57
+ 2,
58
+ 5,
59
+ 6,
60
+ 7,
61
+ 11
62
+ ],
63
+ "6": [
64
+ 0,
65
+ 2,
66
+ 3,
67
+ 7,
68
+ 10
69
+ ],
70
+ "7": [
71
+ 1,
72
+ 3,
73
+ 6,
74
+ 7,
75
+ 11
76
+ ],
77
+ "8": [
78
+ 0,
79
+ 3,
80
+ 4,
81
+ 5,
82
+ 8
83
+ ],
84
+ "9": [
85
+ 1,
86
+ 3,
87
+ 4,
88
+ 5,
89
+ 7,
90
+ 9,
91
+ 10
92
+ ],
93
+ "10": [
94
+ 1,
95
+ 4,
96
+ 5,
97
+ 6,
98
+ 7,
99
+ 8
100
+ ],
101
+ "11": [
102
+ 4,
103
+ 5,
104
+ 7,
105
+ 8,
106
+ 10,
107
+ 11
108
+ ]
109
+ },
110
+ "torch_dtype": "float32",
111
+ "transformers_version": "4.9.1",
112
+ "type_vocab_size": 2,
113
+ "use_cache": true,
114
+ "vocab_size": 30522
115
+ }
eval_XP_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_exact_match": 80.10406811731315,
4
+ "eval_f1": 87.47131878910791,
5
+ "eval_samples": 10784
6
+ }
eval_nbest_predictions.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7763073ce09e59016bc913ac5fa0c0ffa03cedde11804e2a7ae97b297d2584d1
3
+ size 48946987
eval_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
nncf_bert_squad_sparsity.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "input_info": [
3
+ {
4
+ "sample_size": [1, 384],
5
+ "type": "long"
6
+ },
7
+ {
8
+ "sample_size": [1, 384],
9
+ "type": "long"
10
+ },
11
+ {
12
+ "sample_size": [1, 384],
13
+ "type": "long"
14
+ }
15
+ ],
16
+ "compression":
17
+ [
18
+ // {
19
+ // "algorithm": "knowledge_distillation",
20
+ // "type": "softmax"
21
+ // },
22
+ {
23
+ "algorithm": "magnitude_sparsity",
24
+ "sparsity_init": 0.5,
25
+ "params": {
26
+ "schedule": "multistep",
27
+ "multistep_steps": [
28
+ 2,
29
+ 4,
30
+ 6,
31
+ 8
32
+ ],
33
+ "multistep_sparsity_levels": [
34
+ 0.5,
35
+ 0.5,
36
+ 0.5,
37
+ 0.5,
38
+ 0.5,
39
+ ]
40
+ },
41
+ "ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*qa_outputs*"]
42
+ },
43
+ {
44
+ "algorithm": "quantization",
45
+ "initializer": {
46
+ "range": {
47
+ "num_init_samples": 32,
48
+ "type": "percentile",
49
+ "params":
50
+ {
51
+ "min_percentile": 0.01,
52
+ "max_percentile": 99.99
53
+ }
54
+ },
55
+
56
+ "batchnorm_adaptation": {
57
+ "num_bn_adaptation_samples": 200
58
+ }
59
+ },
60
+ "activations":
61
+ {
62
+ "mode": "symmetric"
63
+ },
64
+ "weights":
65
+ {
66
+ "mode": "symmetric",
67
+ "signed": true,
68
+ "per_channel": false
69
+ }
70
+ }
71
+ ]
72
+ }
original_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be7414c68d489bdd4b9acec66333bb43c8822123bf3c6a04dc825a87cf85636
3
+ size 257716945
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/data1/vchua/tld-poc/bert-base-squadv1-local-hybrid-compiled", "tokenizer_class": "BertTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.26923960941476244,
4
+ "train_runtime": 49132.7191,
5
+ "train_samples": 88524,
6
+ "train_samples_per_second": 9.009,
7
+ "train_steps_per_second": 0.563
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e79ab2b82686900e4fbc8c74311805c509a4f595374fe9c3259a388d864500cc
3
+ size 13977730
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f165763d6ac0ef7389bda54903d4f385832550ebf25276851fc240d311eed08
3
+ size 3439
vocab.txt ADDED
The diff for this file is too large to render. See raw diff