helenai commited on
Commit
fa8c491
1 Parent(s): b90aa01

Update IR model to reshapeable version.

Browse files
README.md CHANGED
@@ -17,6 +17,8 @@ model-index:
17
  dataset:
18
  name: GLUE SST2
19
  type: glue
 
 
20
  args: sst2
21
  metrics:
22
  - name: Accuracy
@@ -30,7 +32,7 @@ should probably proofread and complete it, then remove this comment. -->
30
  # jpqd-bert-base-ft-sst2
31
 
32
  > **Note**
33
- > This model was trained for only 1 epoch and is shared for testing purposes
34
 
35
  This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the GLUE SST2 dataset.
36
  It achieves the following results on the evaluation set:
@@ -79,7 +81,7 @@ The following hyperparameters were used during training:
79
 
80
  ### Framework versions
81
 
82
- - Transformers 4.25.1
83
  - Pytorch 1.13.1+cu117
84
  - Datasets 2.8.0
85
  - Tokenizers 0.13.2
 
17
  dataset:
18
  name: GLUE SST2
19
  type: glue
20
+ config: sst2
21
+ split: validation
22
  args: sst2
23
  metrics:
24
  - name: Accuracy
 
32
  # jpqd-bert-base-ft-sst2
33
 
34
  > **Note**
35
+ > This model was trained for only 1 epoch and is shared for testing purposes.
36
 
37
  This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the GLUE SST2 dataset.
38
  It achieves the following results on the evaluation set:
 
81
 
82
  ### Framework versions
83
 
84
+ - Transformers 4.26.1
85
  - Pytorch 1.13.1+cu117
86
  - Datasets 2.8.0
87
  - Tokenizers 0.13.2
all_results.json CHANGED
@@ -2,13 +2,13 @@
2
  "epoch": 1.0,
3
  "eval_accuracy": 0.9254587155963303,
4
  "eval_loss": 0.21810248494148254,
5
- "eval_runtime": 22.1504,
6
  "eval_samples": 872,
7
- "eval_samples_per_second": 39.367,
8
- "eval_steps_per_second": 4.921,
9
  "train_loss": 0.40093172477146793,
10
- "train_runtime": 1341.7612,
11
  "train_samples": 67349,
12
- "train_samples_per_second": 50.194,
13
- "train_steps_per_second": 1.569
14
  }
 
2
  "epoch": 1.0,
3
  "eval_accuracy": 0.9254587155963303,
4
  "eval_loss": 0.21810248494148254,
5
+ "eval_runtime": 22.3128,
6
  "eval_samples": 872,
7
+ "eval_samples_per_second": 39.081,
8
+ "eval_steps_per_second": 4.885,
9
  "train_loss": 0.40093172477146793,
10
+ "train_runtime": 1354.5918,
11
  "train_samples": 67349,
12
+ "train_samples_per_second": 49.719,
13
+ "train_steps_per_second": 1.554
14
  }
config.json CHANGED
@@ -29,7 +29,7 @@
29
  "position_embedding_type": "absolute",
30
  "problem_type": "single_label_classification",
31
  "torch_dtype": "float32",
32
- "transformers_version": "4.25.1",
33
  "type_vocab_size": 2,
34
  "use_cache": true,
35
  "vocab_size": 30522
 
29
  "position_embedding_type": "absolute",
30
  "problem_type": "single_label_classification",
31
  "torch_dtype": "float32",
32
+ "transformers_version": "4.26.1",
33
  "type_vocab_size": 2,
34
  "use_cache": true,
35
  "vocab_size": 30522
eval_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "eval_accuracy": 0.9254587155963303,
4
  "eval_loss": 0.21810248494148254,
5
- "eval_runtime": 22.1504,
6
  "eval_samples": 872,
7
- "eval_samples_per_second": 39.367,
8
- "eval_steps_per_second": 4.921
9
  }
 
2
  "epoch": 1.0,
3
  "eval_accuracy": 0.9254587155963303,
4
  "eval_loss": 0.21810248494148254,
5
+ "eval_runtime": 22.3128,
6
  "eval_samples": 872,
7
+ "eval_samples_per_second": 39.081,
8
+ "eval_steps_per_second": 4.885
9
  }
model.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:74220f4ab04e8b2cf0a8eb0ac647ab95ad8e9cf3abe9af47aee0f4d2b45dab3b
3
- size 439470129
 
 
 
 
nncf_output.log CHANGED
@@ -75,6 +75,8 @@ INFO:nncf:Not adding activation input quantizer for operation: 352 BertForSequen
75
  INFO:nncf:Not adding activation input quantizer for operation: 357 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/__add___0
76
  INFO:nncf:Not adding activation input quantizer for operation: 358 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0
77
  INFO:nncf:Collecting tensor statistics |████████████████| 1 / 1
 
 
78
  INFO:nncf:BatchNorm statistics adaptation |██ | 1 / 7
79
  INFO:nncf:BatchNorm statistics adaptation |████ | 2 / 7
80
  INFO:nncf:BatchNorm statistics adaptation |██████ | 3 / 7
@@ -1547,4 +1549,3 @@ Epoch 0 |+==============+=====================+====================+============
1547
  Epoch 0 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / |
1548
  Epoch 0 || | | 101) | 178) |
1549
  Epoch 0 |+--------------+---------------------+--------------------+--------------------+
1550
- INFO:nncf:Movement sparsity scheduler updates importance threshold and regularizationfactor per optimizer step, but steps_per_epoch was not set in config. Will measure the actual steps per epoch as signaled by a .epoch_step() call.
 
75
  INFO:nncf:Not adding activation input quantizer for operation: 357 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/__add___0
76
  INFO:nncf:Not adding activation input quantizer for operation: 358 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0
77
  INFO:nncf:Collecting tensor statistics |████████████████| 1 / 1
78
+ INFO:nncf:Compiling and loading torch extension: quantized_functions_cuda...
79
+ INFO:nncf:Finished loading torch extension: quantized_functions_cuda
80
  INFO:nncf:BatchNorm statistics adaptation |██ | 1 / 7
81
  INFO:nncf:BatchNorm statistics adaptation |████ | 2 / 7
82
  INFO:nncf:BatchNorm statistics adaptation |██████ | 3 / 7
 
1549
  Epoch 0 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / |
1550
  Epoch 0 || | | 101) | 178) |
1551
  Epoch 0 |+--------------+---------------------+--------------------+--------------------+
 
openvino_config.json CHANGED
@@ -94,8 +94,8 @@
94
  "type": "long"
95
  }
96
  ],
97
- "log_dir": "/tmp/jpqd-bert-base-ft-sst2",
98
- "optimum_version": "1.6.1",
99
  "save_onnx_model": false,
100
- "transformers_version": "4.25.1"
101
  }
 
94
  "type": "long"
95
  }
96
  ],
97
+ "log_dir": "jpqd-bert-base-ft-sst2",
98
+ "optimum_version": "1.6.4",
99
  "save_onnx_model": false,
100
+ "transformers_version": "4.26.1"
101
  }
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9cfad0e7bc7bf2932066a8a392d1315f9bbbaf4db18504ce184266bbdf288c8
3
- size 110187016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7383489839e89fe2ad7daf09beed249bfc2dcb35476cf17776c9c2c95fdf9ec2
3
+ size 438609916
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99590d19bdea8d7f6dbf5a37e3947a09af3ef5805eb39e5238e5afa19452aeed
3
  size 779394143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa61dca8594fa489f7d978ab5fd49ec7dc82397cfe93d54f751384de9e85dc2a
3
  size 779394143
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 128,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "train_loss": 0.40093172477146793,
4
- "train_runtime": 1341.7612,
5
  "train_samples": 67349,
6
- "train_samples_per_second": 50.194,
7
- "train_steps_per_second": 1.569
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "train_loss": 0.40093172477146793,
4
+ "train_runtime": 1354.5918,
5
  "train_samples": 67349,
6
+ "train_samples_per_second": 49.719,
7
+ "train_steps_per_second": 1.554
8
  }
trainer_state.json CHANGED
@@ -336,9 +336,9 @@
336
  "epoch": 0.12,
337
  "eval_accuracy": 0.8761467889908257,
338
  "eval_loss": 0.44158032536506653,
339
- "eval_runtime": 21.7125,
340
- "eval_samples_per_second": 40.161,
341
- "eval_steps_per_second": 5.02,
342
  "step": 250
343
  },
344
  {
@@ -670,9 +670,9 @@
670
  "epoch": 0.24,
671
  "eval_accuracy": 0.8899082568807339,
672
  "eval_loss": 0.49690014123916626,
673
- "eval_runtime": 21.7581,
674
- "eval_samples_per_second": 40.077,
675
- "eval_steps_per_second": 5.01,
676
  "step": 500
677
  },
678
  {
@@ -1004,9 +1004,9 @@
1004
  "epoch": 0.36,
1005
  "eval_accuracy": 0.9162844036697247,
1006
  "eval_loss": 0.2716875970363617,
1007
- "eval_runtime": 21.7417,
1008
- "eval_samples_per_second": 40.107,
1009
- "eval_steps_per_second": 5.013,
1010
  "step": 750
1011
  },
1012
  {
@@ -1338,9 +1338,9 @@
1338
  "epoch": 0.48,
1339
  "eval_accuracy": 0.911697247706422,
1340
  "eval_loss": 0.24315589666366577,
1341
- "eval_runtime": 21.9413,
1342
- "eval_samples_per_second": 39.742,
1343
- "eval_steps_per_second": 4.968,
1344
  "step": 1000
1345
  },
1346
  {
@@ -1672,9 +1672,9 @@
1672
  "epoch": 0.59,
1673
  "eval_accuracy": 0.9243119266055045,
1674
  "eval_loss": 0.20327819883823395,
1675
- "eval_runtime": 21.7723,
1676
- "eval_samples_per_second": 40.051,
1677
- "eval_steps_per_second": 5.006,
1678
  "step": 1250
1679
  },
1680
  {
@@ -2006,9 +2006,9 @@
2006
  "epoch": 0.71,
2007
  "eval_accuracy": 0.9243119266055045,
2008
  "eval_loss": 0.2382841557264328,
2009
- "eval_runtime": 21.7644,
2010
- "eval_samples_per_second": 40.065,
2011
- "eval_steps_per_second": 5.008,
2012
  "step": 1500
2013
  },
2014
  {
@@ -2340,9 +2340,9 @@
2340
  "epoch": 0.83,
2341
  "eval_accuracy": 0.9254587155963303,
2342
  "eval_loss": 0.22330859303474426,
2343
- "eval_runtime": 21.7241,
2344
- "eval_samples_per_second": 40.14,
2345
- "eval_steps_per_second": 5.017,
2346
  "step": 1750
2347
  },
2348
  {
@@ -2674,9 +2674,9 @@
2674
  "epoch": 0.95,
2675
  "eval_accuracy": 0.9254587155963303,
2676
  "eval_loss": 0.22065171599388123,
2677
- "eval_runtime": 21.7453,
2678
- "eval_samples_per_second": 40.101,
2679
- "eval_steps_per_second": 5.013,
2680
  "step": 2000
2681
  },
2682
  {
@@ -2814,9 +2814,9 @@
2814
  "step": 2105,
2815
  "total_flos": 4441630972486656.0,
2816
  "train_loss": 0.40093172477146793,
2817
- "train_runtime": 1341.7612,
2818
- "train_samples_per_second": 50.194,
2819
- "train_steps_per_second": 1.569
2820
  }
2821
  ],
2822
  "max_steps": 2105,
 
336
  "epoch": 0.12,
337
  "eval_accuracy": 0.8761467889908257,
338
  "eval_loss": 0.44158032536506653,
339
+ "eval_runtime": 21.9879,
340
+ "eval_samples_per_second": 39.658,
341
+ "eval_steps_per_second": 4.957,
342
  "step": 250
343
  },
344
  {
 
670
  "epoch": 0.24,
671
  "eval_accuracy": 0.8899082568807339,
672
  "eval_loss": 0.49690014123916626,
673
+ "eval_runtime": 22.004,
674
+ "eval_samples_per_second": 39.629,
675
+ "eval_steps_per_second": 4.954,
676
  "step": 500
677
  },
678
  {
 
1004
  "epoch": 0.36,
1005
  "eval_accuracy": 0.9162844036697247,
1006
  "eval_loss": 0.2716875970363617,
1007
+ "eval_runtime": 21.9554,
1008
+ "eval_samples_per_second": 39.717,
1009
+ "eval_steps_per_second": 4.965,
1010
  "step": 750
1011
  },
1012
  {
 
1338
  "epoch": 0.48,
1339
  "eval_accuracy": 0.911697247706422,
1340
  "eval_loss": 0.24315589666366577,
1341
+ "eval_runtime": 22.0177,
1342
+ "eval_samples_per_second": 39.604,
1343
+ "eval_steps_per_second": 4.951,
1344
  "step": 1000
1345
  },
1346
  {
 
1672
  "epoch": 0.59,
1673
  "eval_accuracy": 0.9243119266055045,
1674
  "eval_loss": 0.20327819883823395,
1675
+ "eval_runtime": 22.009,
1676
+ "eval_samples_per_second": 39.62,
1677
+ "eval_steps_per_second": 4.953,
1678
  "step": 1250
1679
  },
1680
  {
 
2006
  "epoch": 0.71,
2007
  "eval_accuracy": 0.9243119266055045,
2008
  "eval_loss": 0.2382841557264328,
2009
+ "eval_runtime": 22.0278,
2010
+ "eval_samples_per_second": 39.586,
2011
+ "eval_steps_per_second": 4.948,
2012
  "step": 1500
2013
  },
2014
  {
 
2340
  "epoch": 0.83,
2341
  "eval_accuracy": 0.9254587155963303,
2342
  "eval_loss": 0.22330859303474426,
2343
+ "eval_runtime": 21.9928,
2344
+ "eval_samples_per_second": 39.649,
2345
+ "eval_steps_per_second": 4.956,
2346
  "step": 1750
2347
  },
2348
  {
 
2674
  "epoch": 0.95,
2675
  "eval_accuracy": 0.9254587155963303,
2676
  "eval_loss": 0.22065171599388123,
2677
+ "eval_runtime": 21.9957,
2678
+ "eval_samples_per_second": 39.644,
2679
+ "eval_steps_per_second": 4.956,
2680
  "step": 2000
2681
  },
2682
  {
 
2814
  "step": 2105,
2815
  "total_flos": 4441630972486656.0,
2816
  "train_loss": 0.40093172477146793,
2817
+ "train_runtime": 1354.5918,
2818
+ "train_samples_per_second": 49.719,
2819
+ "train_steps_per_second": 1.554
2820
  }
2821
  ],
2822
  "max_steps": 2105,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fbb005c34586cdf1d9dfa3fa4f3e0e2b7209a1984b065821a0ac11dbbf8e40b
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512755fad1ec8d16b5f8c62bc77ce4f3655b8f55c801d2452ac80202929c67a1
3
+ size 3579