Luca-Engel commited on
Commit
7f5fa4f
1 Parent(s): e0565ec

do test run on colab with base gpt mode

Browse files
README.md CHANGED
@@ -1,71 +1,70 @@
1
- ---
2
- license: mit
3
- base_model: gpt2
4
- tags:
5
- - trl
6
- - dpo
7
- - generated_from_trainer
8
- model-index:
9
- - name: distilgpt2-dpo_test_run
10
- results: []
11
- ---
12
-
13
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
- should probably proofread and complete it, then remove this comment. -->
15
-
16
- # distilgpt2-dpo_test_run
17
-
18
- This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
19
- It achieves the following results on the evaluation set:
20
- - Loss: 0.6931
21
- - Rewards/chosen: 0.0
22
- - Rewards/rejected: 0.0
23
- - Rewards/accuracies: 0.0
24
- - Rewards/margins: 0.0
25
- - Logps/rejected: -606.5995
26
- - Logps/chosen: -1121.9315
27
- - Logits/rejected: -132.4945
28
- - Logits/chosen: -148.9527
29
-
30
- ## Model description
31
-
32
- More information needed
33
-
34
- ## Intended uses & limitations
35
-
36
- More information needed
37
-
38
- ## Training and evaluation data
39
-
40
- More information needed
41
-
42
- ## Training procedure
43
-
44
- ### Training hyperparameters
45
-
46
- The following hyperparameters were used during training:
47
- - learning_rate: 5e-05
48
- - train_batch_size: 8
49
- - eval_batch_size: 8
50
- - seed: 42
51
- - gradient_accumulation_steps: 2
52
- - total_train_batch_size: 16
53
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
- - lr_scheduler_type: linear
55
- - lr_scheduler_warmup_ratio: 0.1
56
- - num_epochs: 3
57
-
58
- ### Training results
59
-
60
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
- | No log | 0.67 | 1 | 0.6931 | 0.0 | 0.0 | 0.0 | 0.0 | -606.5995 | -1121.9315 | -132.4945 | -148.9527 |
63
- | No log | 2.0 | 3 | 4.0741 | 3.4192 | 4.4772 | 0.6000 | -1.0580 | -561.8280 | -1087.7399 | -119.7184 | -136.7517 |
64
-
65
-
66
- ### Framework versions
67
-
68
- - Transformers 4.38.1
69
- - Pytorch 2.3.0+cpu
70
- - Datasets 2.3.2
71
- - Tokenizers 0.15.2
 
1
+ ---
2
+ license: mit
3
+ base_model: gpt2
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: distilgpt2-dpo_test_run
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # distilgpt2-dpo_test_run
17
+
18
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.0786
21
+ - Rewards/chosen: -0.1353
22
+ - Rewards/rejected: -0.5974
23
+ - Rewards/accuracies: 0.5959
24
+ - Rewards/margins: 0.4621
25
+ - Logps/rejected: -493.6547
26
+ - Logps/chosen: -559.9373
27
+ - Logits/rejected: -82.4215
28
+ - Logits/chosen: -80.3884
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 5e-05
48
+ - train_batch_size: 4
49
+ - eval_batch_size: 4
50
+ - seed: 42
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 3
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
59
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
60
+ | No log | 1.0 | 289 | 1.0786 | -0.1353 | -0.5974 | 0.5959 | 0.4621 | -493.6547 | -559.9373 | -82.4215 | -80.3884 |
61
+ | 0.7672 | 2.0 | 578 | 1.1977 | 1.1873 | 0.5208 | 0.5993 | 0.6665 | -482.4724 | -546.7113 | -89.5540 | -87.9300 |
62
+ | 0.7672 | 3.0 | 867 | 1.4420 | 0.6108 | -0.0653 | 0.5788 | 0.6761 | -488.3335 | -552.4765 | -97.7897 | -96.8133 |
63
+
64
+
65
+ ### Framework versions
66
+
67
+ - Transformers 4.40.2
68
+ - Pytorch 2.2.1+cu121
69
+ - Datasets 2.19.1
70
+ - Tokenizers 0.19.1
 
config.json CHANGED
@@ -1,39 +1,39 @@
1
- {
2
- "_name_or_path": "gpt2",
3
- "activation_function": "gelu_new",
4
- "architectures": [
5
- "GPT2LMHeadModel"
6
- ],
7
- "attn_pdrop": 0.1,
8
- "bos_token_id": 50256,
9
- "embd_pdrop": 0.1,
10
- "eos_token_id": 50256,
11
- "initializer_range": 0.02,
12
- "layer_norm_epsilon": 1e-05,
13
- "model_type": "gpt2",
14
- "n_ctx": 1024,
15
- "n_embd": 768,
16
- "n_head": 12,
17
- "n_inner": null,
18
- "n_layer": 12,
19
- "n_positions": 1024,
20
- "reorder_and_upcast_attn": false,
21
- "resid_pdrop": 0.1,
22
- "scale_attn_by_inverse_layer_idx": false,
23
- "scale_attn_weights": true,
24
- "summary_activation": null,
25
- "summary_first_dropout": 0.1,
26
- "summary_proj_to_labels": true,
27
- "summary_type": "cls_index",
28
- "summary_use_proj": true,
29
- "task_specific_params": {
30
- "text-generation": {
31
- "do_sample": true,
32
- "max_length": 50
33
- }
34
- },
35
- "torch_dtype": "float32",
36
- "transformers_version": "4.38.1",
37
- "use_cache": true,
38
- "vocab_size": 50257
39
- }
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 50256,
4
- "eos_token_id": 50256,
5
- "transformers_version": "4.38.1"
6
- }
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.2"
6
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f898bfc8b55dfe5e4431f298888973cb311f93698e1b75dcd178dafe3d80a82a
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30ede28f2f06bdc930fc26a85e69c25f0f63a22bff0ec0932aaf67ab980c67dd
3
  size 497774208
runs/May16_10-54-37_c181a1ecec5b/events.out.tfevents.1715857070.c181a1ecec5b.247.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e9c1c451c7c98da663ef42508547b7c4a23abd9bd3913c4a356f83bc5854f9d
3
+ size 4917
runs/May16_10-58-52_c181a1ecec5b/events.out.tfevents.1715857249.c181a1ecec5b.247.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae85b7f9deabe9992b2fbd6f7b46fa664864b2120a9dd8612d82cc36b3c4b7c
3
+ size 88
runs/May16_11-03-56_51e195417f1c/events.out.tfevents.1715857518.51e195417f1c.1843.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe7ad14f2943e7b4226606e11127908fe52255903abfd8b010eec25731ff7e4
3
+ size 4917
runs/May18_13-33-40_586feedd8b82/events.out.tfevents.1716039306.586feedd8b82.2907.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af820a56159accfe4ebe8c15eca5de0ffccbb7102ac94813f31db7346c54f3c8
3
+ size 4955
runs/May18_13-35-44_586feedd8b82/events.out.tfevents.1716039429.586feedd8b82.2907.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b0b22b1509972c0cc08460746aecc58dde108dba9914820a8d25af3990d730
3
+ size 4954
runs/May18_13-44-27_7618a08b7f98/events.out.tfevents.1716039949.7618a08b7f98.6286.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9aef911a07257a90b322c972ebbb3d32ccef59006dd15f0188cd31503521b8
3
+ size 4954
runs/May18_13-48-38_7618a08b7f98/events.out.tfevents.1716040130.7618a08b7f98.6286.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93f661cceb40fa4a09446dd942e83393bd27dfa33e858c2a9b04669ccee8260
3
+ size 8280
runs/May18_13-53-10_1113de9dbcce/events.out.tfevents.1716040404.1113de9dbcce.6107.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6374c197a3c32e025028ccd03eb66c2f603778780360be1359497b332c2dddff
3
+ size 6382
runs/May18_14-18-11_857c112c0c60/events.out.tfevents.1716041915.857c112c0c60.307.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ad33fdd24d4629b7b9fef52f84c5cceb888056c3e803b55e0b00423ec88723
3
+ size 8216
runs/May18_14-18-11_857c112c0c60/events.out.tfevents.1716043541.857c112c0c60.307.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8988c761aa8df45edbc32ec28812df2acb32c8596b5f1e71b8b9fab172162a3
3
+ size 828
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
6
- }
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer.json CHANGED
@@ -40,6 +40,7 @@
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
42
  "byte_fallback": false,
 
43
  "vocab": {
44
  "!": 0,
45
  "\"": 1,
 
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
42
  "byte_fallback": false,
43
+ "ignore_merges": false,
44
  "vocab": {
45
  "!": 0,
46
  "\"": 1,
tokenizer_config.json CHANGED
@@ -1,20 +1,20 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "50256": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- }
12
- },
13
- "bos_token": "<|endoftext|>",
14
- "clean_up_tokenization_spaces": true,
15
- "eos_token": "<|endoftext|>",
16
- "model_max_length": 1024,
17
- "pad_token": "<|endoftext|>",
18
- "tokenizer_class": "GPT2Tokenizer",
19
- "unk_token": "<|endoftext|>"
20
- }
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e7c002ad0f6dbec31187867d72777d2d20d87717dba4dbbdcbe71d959f1f98c
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e609589856f247c47efa96b0eaa75fe4585e470031d55d96d4e31f8ab162ec2e
3
+ size 5048