Vui Seng Chua
commited on
Commit
•
57022cc
1
Parent(s):
d17a7c1
Add content
Browse files- README.md +71 -0
- all_results.json +15 -0
- compressed_graph.dot +0 -0
- config.json +39 -0
- eval_results.json +10 -0
- generation_config.json +6 -0
- merges.txt +0 -0
- nncf_output.log +17 -0
- openvino_config.json +27 -0
- openvino_model.bin +3 -0
- openvino_model.xml +0 -0
- original_graph.dot +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +5 -0
- tokenizer.json +0 -0
- tokenizer_config.json +9 -0
- train_results.json +8 -0
- trainer_state.json +25 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
tags:
|
4 |
+
- generated_from_trainer
|
5 |
+
datasets:
|
6 |
+
- wikitext
|
7 |
+
metrics:
|
8 |
+
- accuracy
|
9 |
+
model-index:
|
10 |
+
- name: gpt2-fp32-ov-kv-cache
|
11 |
+
results:
|
12 |
+
- task:
|
13 |
+
name: Causal Language Modeling
|
14 |
+
type: text-generation
|
15 |
+
dataset:
|
16 |
+
name: wikitext wikitext-2-raw-v1
|
17 |
+
type: wikitext
|
18 |
+
config: wikitext-2-raw-v1
|
19 |
+
split: validation
|
20 |
+
args: wikitext-2-raw-v1
|
21 |
+
metrics:
|
22 |
+
- name: Accuracy
|
23 |
+
type: accuracy
|
24 |
+
value: 0.38449006190941676
|
25 |
+
---
|
26 |
+
|
27 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
28 |
+
should probably proofread and complete it, then remove this comment. -->
|
29 |
+
|
30 |
+
# gpt2-fp32-ov-kv-cache
|
31 |
+
|
32 |
+
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the wikitext wikitext-2-raw-v1 dataset.
|
33 |
+
It achieves the following results on the evaluation set:
|
34 |
+
- Loss: 3.3894
|
35 |
+
- Accuracy: 0.3845
|
36 |
+
|
37 |
+
## Model description
|
38 |
+
|
39 |
+
More information needed
|
40 |
+
|
41 |
+
## Intended uses & limitations
|
42 |
+
|
43 |
+
More information needed
|
44 |
+
|
45 |
+
## Training and evaluation data
|
46 |
+
|
47 |
+
More information needed
|
48 |
+
|
49 |
+
## Training procedure
|
50 |
+
|
51 |
+
### Training hyperparameters
|
52 |
+
|
53 |
+
The following hyperparameters were used during training:
|
54 |
+
- learning_rate: 5e-05
|
55 |
+
- train_batch_size: 8
|
56 |
+
- eval_batch_size: 1
|
57 |
+
- seed: 42
|
58 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
59 |
+
- lr_scheduler_type: linear
|
60 |
+
- training_steps: 1
|
61 |
+
|
62 |
+
### Training results
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
### Framework versions
|
67 |
+
|
68 |
+
- Transformers 4.30.2
|
69 |
+
- Pytorch 2.0.1+cu117
|
70 |
+
- Datasets 2.13.1
|
71 |
+
- Tokenizers 0.13.3
|
all_results.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.0,
|
3 |
+
"eval_accuracy": 0.38449006190941676,
|
4 |
+
"eval_loss": 3.3894119262695312,
|
5 |
+
"eval_runtime": 83.4955,
|
6 |
+
"eval_samples": 240,
|
7 |
+
"eval_samples_per_second": 2.874,
|
8 |
+
"eval_steps_per_second": 2.874,
|
9 |
+
"perplexity": 29.648511631842613,
|
10 |
+
"train_loss": 3.6670310497283936,
|
11 |
+
"train_runtime": 12.1659,
|
12 |
+
"train_samples": 2318,
|
13 |
+
"train_samples_per_second": 0.658,
|
14 |
+
"train_steps_per_second": 0.082
|
15 |
+
}
|
compressed_graph.dot
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "gpt2",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 50256,
|
9 |
+
"embd_pdrop": 0.1,
|
10 |
+
"eos_token_id": 50256,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 1024,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 12,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"reorder_and_upcast_attn": false,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
23 |
+
"scale_attn_weights": true,
|
24 |
+
"summary_activation": null,
|
25 |
+
"summary_first_dropout": 0.1,
|
26 |
+
"summary_proj_to_labels": true,
|
27 |
+
"summary_type": "cls_index",
|
28 |
+
"summary_use_proj": true,
|
29 |
+
"task_specific_params": {
|
30 |
+
"text-generation": {
|
31 |
+
"do_sample": true,
|
32 |
+
"max_length": 50
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.30.2",
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 50257
|
39 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.0,
|
3 |
+
"eval_accuracy": 0.38449006190941676,
|
4 |
+
"eval_loss": 3.3894119262695312,
|
5 |
+
"eval_runtime": 83.4955,
|
6 |
+
"eval_samples": 240,
|
7 |
+
"eval_samples_per_second": 2.874,
|
8 |
+
"eval_steps_per_second": 2.874,
|
9 |
+
"perplexity": 29.648511631842613
|
10 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 50256,
|
4 |
+
"eos_token_id": 50256,
|
5 |
+
"transformers_version": "4.30.2"
|
6 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
nncf_output.log
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
INFO:nncf:
|
2 |
+
WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
|
3 |
+
NNCF relies on custom-wrapping the `forward` call in order to function properly.
|
4 |
+
Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
|
5 |
+
If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
|
6 |
+
model.nncf.set_original_unbound_forward(fn)
|
7 |
+
if `fn` has an unbound 0-th `self` argument, or
|
8 |
+
with model.nncf.temporary_bound_original_forward(fn): ...
|
9 |
+
if `fn` already had 0-th `self` argument bound or never had it in the first place.
|
10 |
+
WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
|
11 |
+
NNCF relies on custom-wrapping the `forward` call in order to function properly.
|
12 |
+
Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
|
13 |
+
If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
|
14 |
+
model.nncf.set_original_unbound_forward(fn)
|
15 |
+
if `fn` has an unbound 0-th `self` argument, or
|
16 |
+
with model.nncf.temporary_bound_original_forward(fn): ...
|
17 |
+
if `fn` already had 0-th `self` argument bound or never had it in the first place.
|
openvino_config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"compression": {
|
3 |
+
"algorithm": "NoCompressionAlgorithm"
|
4 |
+
},
|
5 |
+
"input_info": [
|
6 |
+
{
|
7 |
+
"keyword": "input_ids",
|
8 |
+
"sample_size": [
|
9 |
+
8,
|
10 |
+
1024
|
11 |
+
],
|
12 |
+
"type": "long"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"keyword": "attention_mask",
|
16 |
+
"sample_size": [
|
17 |
+
8,
|
18 |
+
1024
|
19 |
+
],
|
20 |
+
"type": "long"
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"log_dir": "/data1/vchua/run/hf-model/gpt2-fp32-ov-kv-cache",
|
24 |
+
"optimum_version": "1.8.8",
|
25 |
+
"save_onnx_model": false,
|
26 |
+
"transformers_version": "4.30.2"
|
27 |
+
}
|
openvino_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a771357878b17d079691575f42a6a63d27a70572590684e5917651edb504078
|
3 |
+
size 653197532
|
openvino_model.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
original_graph.dot
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0889a79134325a4518fd10b985d8ab8531534482a257030ce8b1ac98a4b23cf6
|
3 |
+
size 497805149
|
special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"unk_token": "<|endoftext|>"
|
5 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<|endoftext|>",
|
4 |
+
"clean_up_tokenization_spaces": true,
|
5 |
+
"eos_token": "<|endoftext|>",
|
6 |
+
"model_max_length": 1024,
|
7 |
+
"tokenizer_class": "GPT2Tokenizer",
|
8 |
+
"unk_token": "<|endoftext|>"
|
9 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.0,
|
3 |
+
"train_loss": 3.6670310497283936,
|
4 |
+
"train_runtime": 12.1659,
|
5 |
+
"train_samples": 2318,
|
6 |
+
"train_samples_per_second": 0.658,
|
7 |
+
"train_steps_per_second": 0.082
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0034482758620689655,
|
5 |
+
"global_step": 1,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.0,
|
12 |
+
"step": 1,
|
13 |
+
"total_flos": 4180672512000.0,
|
14 |
+
"train_loss": 3.6670310497283936,
|
15 |
+
"train_runtime": 12.1659,
|
16 |
+
"train_samples_per_second": 0.658,
|
17 |
+
"train_steps_per_second": 0.082
|
18 |
+
}
|
19 |
+
],
|
20 |
+
"max_steps": 1,
|
21 |
+
"num_train_epochs": 1,
|
22 |
+
"total_flos": 4180672512000.0,
|
23 |
+
"trial_name": null,
|
24 |
+
"trial_params": null
|
25 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95e8ccf8f23e5daae94f3d8d35091d4dacac844e382868907ac456cf0bf3a799
|
3 |
+
size 3963
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|