Vui Seng Chua commited on Jun 27, 2023

Commit

57022cc

•

1 Parent(s): d17a7c1

Add content

Browse files

Files changed (20) hide show

README.md +71 -0
all_results.json +15 -0
compressed_graph.dot +0 -0
config.json +39 -0
eval_results.json +10 -0
generation_config.json +6 -0
merges.txt +0 -0
nncf_output.log +17 -0
openvino_config.json +27 -0
openvino_model.bin +3 -0
openvino_model.xml +0 -0
original_graph.dot +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +9 -0
train_results.json +8 -0
trainer_state.json +25 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,71 @@

+---
+license: mit
+tags:
+- generated_from_trainer
+datasets:
+- wikitext
+metrics:
+- accuracy
+model-index:
+- name: gpt2-fp32-ov-kv-cache
+  results:
+  - task:
+      name: Causal Language Modeling
+      type: text-generation
+    dataset:
+      name: wikitext wikitext-2-raw-v1
+      type: wikitext
+      config: wikitext-2-raw-v1
+      split: validation
+      args: wikitext-2-raw-v1
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.38449006190941676
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# gpt2-fp32-ov-kv-cache
+This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the wikitext wikitext-2-raw-v1 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 3.3894
+- Accuracy: 0.3845
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 8
+- eval_batch_size: 1
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- training_steps: 1
+### Training results
+### Framework versions
+- Transformers 4.30.2
+- Pytorch 2.0.1+cu117
+- Datasets 2.13.1
+- Tokenizers 0.13.3

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 0.0,
+    "eval_accuracy": 0.38449006190941676,
+    "eval_loss": 3.3894119262695312,
+    "eval_runtime": 83.4955,
+    "eval_samples": 240,
+    "eval_samples_per_second": 2.874,
+    "eval_steps_per_second": 2.874,
+    "perplexity": 29.648511631842613,
+    "train_loss": 3.6670310497283936,
+    "train_runtime": 12.1659,
+    "train_samples": 2318,
+    "train_samples_per_second": 0.658,
+    "train_steps_per_second": 0.082
+}

compressed_graph.dot ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 0.0,
+    "eval_accuracy": 0.38449006190941676,
+    "eval_loss": 3.3894119262695312,
+    "eval_runtime": 83.4955,
+    "eval_samples": 240,
+    "eval_samples_per_second": 2.874,
+    "eval_steps_per_second": 2.874,
+    "perplexity": 29.648511631842613
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.30.2"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

nncf_output.log ADDED Viewed

	@@ -0,0 +1,17 @@

+INFO:nncf:
+WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
+NNCF relies on custom-wrapping the `forward` call in order to function properly.
+Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
+If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
+model.nncf.set_original_unbound_forward(fn)
+if `fn` has an unbound 0-th `self` argument, or
+with model.nncf.temporary_bound_original_forward(fn): ...
+if `fn` already had 0-th `self` argument bound or never had it in the first place.
+WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
+NNCF relies on custom-wrapping the `forward` call in order to function properly.
+Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
+If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
+model.nncf.set_original_unbound_forward(fn)
+if `fn` has an unbound 0-th `self` argument, or
+with model.nncf.temporary_bound_original_forward(fn): ...
+if `fn` already had 0-th `self` argument bound or never had it in the first place.

openvino_config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "compression": {
+    "algorithm": "NoCompressionAlgorithm"
+  },
+  "input_info": [
+    {
+      "keyword": "input_ids",
+      "sample_size": [
+        8,
+        1024
+      ],
+      "type": "long"
+    },
+    {
+      "keyword": "attention_mask",
+      "sample_size": [
+        8,
+        1024
+      ],
+      "type": "long"
+    }
+  ],
+  "log_dir": "/data1/vchua/run/hf-model/gpt2-fp32-ov-kv-cache",
+  "optimum_version": "1.8.8",
+  "save_onnx_model": false,
+  "transformers_version": "4.30.2"
+}

openvino_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a771357878b17d079691575f42a6a63d27a70572590684e5917651edb504078
+size 653197532

openvino_model.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

original_graph.dot ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0889a79134325a4518fd10b985d8ab8531534482a257030ce8b1ac98a4b23cf6
+size 497805149

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.0,
+    "train_loss": 3.6670310497283936,
+    "train_runtime": 12.1659,
+    "train_samples": 2318,
+    "train_samples_per_second": 0.658,
+    "train_steps_per_second": 0.082
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0034482758620689655,
+  "global_step": 1,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "step": 1,
+      "total_flos": 4180672512000.0,
+      "train_loss": 3.6670310497283936,
+      "train_runtime": 12.1659,
+      "train_samples_per_second": 0.658,
+      "train_steps_per_second": 0.082
+    }
+  ],
+  "max_steps": 1,
+  "num_train_epochs": 1,
+  "total_flos": 4180672512000.0,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95e8ccf8f23e5daae94f3d8d35091d4dacac844e382868907ac456cf0bf3a799
+size 3963

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff