Vui Seng Chua commited on
Commit
e1db940
1 Parent(s): c34ad17

Add content

Browse files
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - wikitext
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: ov-opt-350m-fp32-kv-cache
11
+ results:
12
+ - task:
13
+ name: Causal Language Modeling
14
+ type: text-generation
15
+ dataset:
16
+ name: wikitext wikitext-2-raw-v1
17
+ type: wikitext
18
+ config: wikitext-2-raw-v1
19
+ split: validation
20
+ args: wikitext-2-raw-v1
21
+ metrics:
22
+ - name: Accuracy
23
+ type: accuracy
24
+ value: 0.1335940045617465
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # ov-opt-350m-fp32-kv-cache
31
+
32
+ This model is a fine-tuned version of [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on the wikitext wikitext-2-raw-v1 dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 6.2368
35
+ - Accuracy: 0.1336
36
+
37
+ ## Model description
38
+
39
+ More information needed
40
+
41
+ ## Intended uses & limitations
42
+
43
+ More information needed
44
+
45
+ ## Training and evaluation data
46
+
47
+ More information needed
48
+
49
+ ## Training procedure
50
+
51
+ ### Training hyperparameters
52
+
53
+ The following hyperparameters were used during training:
54
+ - learning_rate: 5e-05
55
+ - train_batch_size: 8
56
+ - eval_batch_size: 1
57
+ - seed: 42
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - training_steps: 1
61
+
62
+ ### Training results
63
+
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 4.30.2
69
+ - Pytorch 2.0.1+cu117
70
+ - Datasets 2.13.1
71
+ - Tokenizers 0.13.3
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "eval_accuracy": 0.1335940045617465,
4
+ "eval_loss": 6.2367634773254395,
5
+ "eval_runtime": 2.1831,
6
+ "eval_samples": 3,
7
+ "eval_samples_per_second": 1.374,
8
+ "eval_steps_per_second": 1.374,
9
+ "perplexity": 511.2013159644867,
10
+ "train_loss": 7.921147346496582,
11
+ "train_runtime": 26.2971,
12
+ "train_samples": 2355,
13
+ "train_samples_per_second": 0.304,
14
+ "train_steps_per_second": 0.038
15
+ }
compressed_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/opt-350m",
3
+ "_remove_final_layer_norm": false,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
+ "architectures": [
7
+ "OPTForCausalLM"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 2,
11
+ "do_layer_norm_before": false,
12
+ "dropout": 0.1,
13
+ "enable_bias": true,
14
+ "eos_token_id": 2,
15
+ "ffn_dim": 4096,
16
+ "hidden_size": 1024,
17
+ "init_std": 0.02,
18
+ "layer_norm_elementwise_affine": true,
19
+ "layerdrop": 0.0,
20
+ "max_position_embeddings": 2048,
21
+ "model_type": "opt",
22
+ "num_attention_heads": 16,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 1,
25
+ "prefix": "</s>",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.30.2",
28
+ "use_cache": true,
29
+ "vocab_size": 50272,
30
+ "word_embed_proj_dim": 512
31
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "eval_accuracy": 0.1335940045617465,
4
+ "eval_loss": 6.2367634773254395,
5
+ "eval_runtime": 2.1831,
6
+ "eval_samples": 3,
7
+ "eval_samples_per_second": 1.374,
8
+ "eval_steps_per_second": 1.374,
9
+ "perplexity": 511.2013159644867
10
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.30.2"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
nncf_output.log ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO:nncf:
2
+ WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
3
+ NNCF relies on custom-wrapping the `forward` call in order to function properly.
4
+ Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
5
+ If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
6
+ model.nncf.set_original_unbound_forward(fn)
7
+ if `fn` has an unbound 0-th `self` argument, or
8
+ with model.nncf.temporary_bound_original_forward(fn): ...
9
+ if `fn` already had 0-th `self` argument bound or never had it in the first place.
10
+ WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
11
+ NNCF relies on custom-wrapping the `forward` call in order to function properly.
12
+ Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
13
+ If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
14
+ model.nncf.set_original_unbound_forward(fn)
15
+ if `fn` has an unbound 0-th `self` argument, or
16
+ with model.nncf.temporary_bound_original_forward(fn): ...
17
+ if `fn` already had 0-th `self` argument bound or never had it in the first place.
openvino_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compression": {
3
+ "algorithm": "NoCompressionAlgorithm"
4
+ },
5
+ "input_info": [
6
+ {
7
+ "keyword": "input_ids",
8
+ "sample_size": [
9
+ 8,
10
+ 1024
11
+ ],
12
+ "type": "long"
13
+ },
14
+ {
15
+ "keyword": "attention_mask",
16
+ "sample_size": [
17
+ 8,
18
+ 1024
19
+ ],
20
+ "type": "long"
21
+ }
22
+ ],
23
+ "log_dir": "/data1/vchua/temp/ov-opt-350m-fp32-kv-cache",
24
+ "optimum_version": "1.8.8",
25
+ "save_onnx_model": false,
26
+ "transformers_version": "4.30.2"
27
+ }
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feea529894e04917df213164ccba0e32d06d46c996d9798798e164d5fe5f6380
3
+ size 1427742864
openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
original_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a269cf40f695787d1328f0502cb555ba7a3e90e38f46abbab4fd1fbaead581
3
+ size 1324911837
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "</s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<pad>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "tokenizer_class": "GPT2Tokenizer",
32
+ "unk_token": {
33
+ "__type": "AddedToken",
34
+ "content": "</s>",
35
+ "lstrip": false,
36
+ "normalized": true,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "train_loss": 7.921147346496582,
4
+ "train_runtime": 26.2971,
5
+ "train_samples": 2355,
6
+ "train_samples_per_second": 0.304,
7
+ "train_steps_per_second": 0.038
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.003389830508474576,
5
+ "global_step": 1,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "step": 1,
13
+ "total_flos": 14910650056704.0,
14
+ "train_loss": 7.921147346496582,
15
+ "train_runtime": 26.2971,
16
+ "train_samples_per_second": 0.304,
17
+ "train_steps_per_second": 0.038
18
+ }
19
+ ],
20
+ "max_steps": 1,
21
+ "num_train_epochs": 1,
22
+ "total_flos": 14910650056704.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957b37cdb11b6853cd2b4232530d1f216263f43991786f948426080587b2aabf
3
+ size 3963
vocab.json ADDED
The diff for this file is too large to render. See raw diff