wzhouad commited on
Commit
cf1620a
1 Parent(s): c67323c

Model save

Browse files
README.md CHANGED
@@ -13,6 +13,7 @@ model-index:
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
 
16
  # zephyr-7b-dpo-full
17
 
18
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
@@ -54,7 +55,7 @@ The following hyperparameters were used during training:
54
 
55
  ### Framework versions
56
 
57
- - Transformers 4.35.2
58
  - Pytorch 2.1.2+cu121
59
  - Datasets 2.14.6
60
- - Tokenizers 0.14.1
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/sanqiang/wdpo/runs/h1ajp915)
17
  # zephyr-7b-dpo-full
18
 
19
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
 
55
 
56
  ### Framework versions
57
 
58
+ - Transformers 4.41.0.dev0
59
  - Pytorch 2.1.2+cu121
60
  - Datasets 2.14.6
61
+ - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6385756753525644,
4
- "train_runtime": 422.4133,
 
5
  "train_samples": 6750,
6
- "train_samples_per_second": 15.98,
7
- "train_steps_per_second": 0.125
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.35319842594974443,
5
+ "train_runtime": 385.1956,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 17.524,
8
+ "train_steps_per_second": 0.138
9
  }
config.json CHANGED
@@ -3,6 +3,7 @@
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
6
  "bos_token_id": 1,
7
  "eos_token_id": 2,
8
  "hidden_act": "silu",
@@ -19,7 +20,7 @@
19
  "sliding_window": 4096,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "bfloat16",
22
- "transformers_version": "4.35.2",
23
  "use_cache": false,
24
  "vocab_size": 32000
25
  }
 
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
+ "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
 
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.41.0.dev0",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.35.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.41.0.dev0"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c5eccf170cb2807dafed373e6970a49edaa5ad3f88e5e35e03e143abba6dcb2
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5bd9c84e12897714bb059b0d34b3405acb56dee326a24d387c5ed9e074087ec
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7cb9a2089e1f16a8ba2daaa3b1a78b5e41b35c6b1086cfa84a7c364f26c2418
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6279da6e32757cc13c89a3f5b1c28dffe48b0cab5c7f7102e9b34e53c3692ad9
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a44cd1f203562bd6cb7aff11086c30c740baefe049f30b9c91acb227c1de6938
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e80e298dbd33ad03a2a8eb1dd41c314d2e5c5e7f1c09a8ebaa304197676e361
3
  size 4540516344
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -34,7 +36,6 @@
34
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "</s>",
37
- "legacy": true,
38
  "model_max_length": 2048,
39
  "pad_token": "</s>",
40
  "sp_model_kwargs": {},
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
36
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
 
39
  "model_max_length": 2048,
40
  "pad_token": "</s>",
41
  "sp_model_kwargs": {},
train_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6385756753525644,
4
- "train_runtime": 422.4133,
 
5
  "train_samples": 6750,
6
- "train_samples_per_second": 15.98,
7
- "train_steps_per_second": 0.125
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.35319842594974443,
5
+ "train_runtime": 385.1956,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 17.524,
8
+ "train_steps_per_second": 0.138
9
  }
trainer_state.json CHANGED
@@ -9,13 +9,17 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.02,
 
 
 
 
13
  "learning_rate": 8.333333333333333e-08,
14
- "logits/chosen": -2.8462421894073486,
15
- "logits/rejected": -2.8283610343933105,
16
- "logps/chosen": -274.7393798828125,
17
- "logps/rejected": -204.42575073242188,
18
- "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
@@ -23,90 +27,124 @@
23
  "step": 1
24
  },
25
  {
26
- "epoch": 0.19,
 
 
 
 
27
  "learning_rate": 4.911172937635942e-07,
28
- "logits/chosen": -2.8527991771698,
29
- "logits/rejected": -2.8377315998077393,
30
- "logps/chosen": -305.9073181152344,
31
- "logps/rejected": -295.8478698730469,
32
- "loss": 0.6914,
33
- "rewards/accuracies": 0.4513888955116272,
34
- "rewards/chosen": 0.0023197412956506014,
35
- "rewards/margins": 0.0025084479711949825,
36
- "rewards/rejected": -0.00018870655912905931,
37
  "step": 10
38
  },
39
  {
40
- "epoch": 0.38,
 
 
 
 
41
  "learning_rate": 3.982949361823388e-07,
42
- "logits/chosen": -2.859750270843506,
43
- "logits/rejected": -2.880180835723877,
44
- "logps/chosen": -295.7957458496094,
45
- "logps/rejected": -332.6015930175781,
46
- "loss": 0.6653,
47
- "rewards/accuracies": 0.675000011920929,
48
- "rewards/chosen": 0.02228003740310669,
49
- "rewards/margins": 0.059415679425001144,
50
- "rewards/rejected": -0.037135638296604156,
51
  "step": 20
52
  },
53
  {
54
- "epoch": 0.57,
 
 
 
 
55
  "learning_rate": 2.416462557480814e-07,
56
- "logits/chosen": -2.843632936477661,
57
- "logits/rejected": -2.8286781311035156,
58
- "logps/chosen": -310.9751892089844,
59
- "logps/rejected": -322.77532958984375,
60
- "loss": 0.6283,
61
- "rewards/accuracies": 0.699999988079071,
62
- "rewards/chosen": 0.027804672718048096,
63
- "rewards/margins": 0.1940310001373291,
64
- "rewards/rejected": -0.1662263423204422,
65
  "step": 30
66
  },
67
  {
68
- "epoch": 0.75,
 
 
 
 
69
  "learning_rate": 8.859303711029939e-08,
70
- "logits/chosen": -2.8175368309020996,
71
- "logits/rejected": -2.821326494216919,
72
- "logps/chosen": -274.8536682128906,
73
- "logps/rejected": -349.11505126953125,
74
- "loss": 0.6099,
75
- "rewards/accuracies": 0.699999988079071,
76
- "rewards/chosen": 0.002673505572602153,
77
- "rewards/margins": 0.2130366563796997,
78
- "rewards/rejected": -0.21036314964294434,
79
  "step": 40
80
  },
81
  {
82
- "epoch": 0.94,
 
 
 
 
83
  "learning_rate": 5.009573740853313e-09,
84
- "logits/chosen": -2.8560073375701904,
85
- "logits/rejected": -2.867896556854248,
86
- "logps/chosen": -307.2721862792969,
87
- "logps/rejected": -350.257568359375,
88
- "loss": 0.5984,
89
- "rewards/accuracies": 0.7250000238418579,
90
- "rewards/chosen": 0.0023462946992367506,
91
- "rewards/margins": 0.33910489082336426,
92
- "rewards/rejected": -0.336758553981781,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 1.0,
97
  "step": 53,
98
  "total_flos": 0.0,
99
- "train_loss": 0.6385756753525644,
100
- "train_runtime": 422.4133,
101
- "train_samples_per_second": 15.98,
102
- "train_steps_per_second": 0.125
103
  }
104
  ],
105
  "logging_steps": 10,
106
  "max_steps": 53,
 
107
  "num_train_epochs": 1,
108
  "save_steps": 100,
 
 
 
 
 
 
 
 
 
 
 
 
109
  "total_flos": 0.0,
 
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "debug/losses": 0.34206920862197876,
13
+ "debug/policy_weights": 0.49350154399871826,
14
+ "debug/raw_losses": 0.6931471824645996,
15
+ "epoch": 0.018867924528301886,
16
+ "grad_norm": 5.3608531887517,
17
  "learning_rate": 8.333333333333333e-08,
18
+ "logits/chosen": -2.855412006378174,
19
+ "logits/rejected": -2.8797199726104736,
20
+ "logps/chosen": -320.43853759765625,
21
+ "logps/rejected": -340.07073974609375,
22
+ "loss": 0.378,
23
  "rewards/accuracies": 0.0,
24
  "rewards/chosen": 0.0,
25
  "rewards/margins": 0.0,
 
27
  "step": 1
28
  },
29
  {
30
+ "debug/losses": 0.37441229820251465,
31
+ "debug/policy_weights": 0.541907548904419,
32
+ "debug/raw_losses": 0.6909322738647461,
33
+ "epoch": 0.18867924528301888,
34
+ "grad_norm": 5.523379066284079,
35
  "learning_rate": 4.911172937635942e-07,
36
+ "logits/chosen": -2.8660757541656494,
37
+ "logits/rejected": -2.892007350921631,
38
+ "logps/chosen": -305.7418518066406,
39
+ "logps/rejected": -332.18719482421875,
40
+ "loss": 0.3735,
41
+ "rewards/accuracies": 0.5277777910232544,
42
+ "rewards/chosen": 0.0008213530527427793,
43
+ "rewards/margins": 0.004522037226706743,
44
+ "rewards/rejected": -0.003700683591887355,
45
  "step": 10
46
  },
47
  {
48
+ "debug/losses": 0.3686671853065491,
49
+ "debug/policy_weights": 0.5464785099029541,
50
+ "debug/raw_losses": 0.6741721630096436,
51
+ "epoch": 0.37735849056603776,
52
+ "grad_norm": 5.208106443938916,
53
  "learning_rate": 3.982949361823388e-07,
54
+ "logits/chosen": -2.862377643585205,
55
+ "logits/rejected": -2.8641226291656494,
56
+ "logps/chosen": -323.9226989746094,
57
+ "logps/rejected": -330.8751525878906,
58
+ "loss": 0.3688,
59
+ "rewards/accuracies": 0.6187499761581421,
60
+ "rewards/chosen": 0.006944864057004452,
61
+ "rewards/margins": 0.04258845001459122,
62
+ "rewards/rejected": -0.03564358502626419,
63
  "step": 20
64
  },
65
  {
66
+ "debug/losses": 0.36411529779434204,
67
+ "debug/policy_weights": 0.563360333442688,
68
+ "debug/raw_losses": 0.6445623636245728,
69
+ "epoch": 0.5660377358490566,
70
+ "grad_norm": 5.583750439900413,
71
  "learning_rate": 2.416462557480814e-07,
72
+ "logits/chosen": -2.8544485569000244,
73
+ "logits/rejected": -2.857438325881958,
74
+ "logps/chosen": -296.89434814453125,
75
+ "logps/rejected": -313.2176513671875,
76
+ "loss": 0.3529,
77
+ "rewards/accuracies": 0.737500011920929,
78
+ "rewards/chosen": -0.0008020855602808297,
79
+ "rewards/margins": 0.12708209455013275,
80
+ "rewards/rejected": -0.127884179353714,
81
  "step": 30
82
  },
83
  {
84
+ "debug/losses": 0.3165283799171448,
85
+ "debug/policy_weights": 0.5360943675041199,
86
+ "debug/raw_losses": 0.5777658224105835,
87
+ "epoch": 0.7547169811320755,
88
+ "grad_norm": 4.898347932161407,
89
  "learning_rate": 8.859303711029939e-08,
90
+ "logits/chosen": -2.861898183822632,
91
+ "logits/rejected": -2.8589160442352295,
92
+ "logps/chosen": -290.1973876953125,
93
+ "logps/rejected": -323.25640869140625,
94
+ "loss": 0.3411,
95
+ "rewards/accuracies": 0.7437499761581421,
96
+ "rewards/chosen": -0.0066400328651070595,
97
+ "rewards/margins": 0.3345140516757965,
98
+ "rewards/rejected": -0.3411540687084198,
99
  "step": 40
100
  },
101
  {
102
+ "debug/losses": 0.3771621882915497,
103
+ "debug/policy_weights": 0.5814041495323181,
104
+ "debug/raw_losses": 0.6445982456207275,
105
+ "epoch": 0.9433962264150944,
106
+ "grad_norm": 5.24238516043373,
107
  "learning_rate": 5.009573740853313e-09,
108
+ "logits/chosen": -2.899583101272583,
109
+ "logits/rejected": -2.886239528656006,
110
+ "logps/chosen": -282.59393310546875,
111
+ "logps/rejected": -317.896728515625,
112
+ "loss": 0.329,
113
+ "rewards/accuracies": 0.6625000238418579,
114
+ "rewards/chosen": -0.03263551741838455,
115
+ "rewards/margins": 0.1680021584033966,
116
+ "rewards/rejected": -0.20063766837120056,
117
  "step": 50
118
  },
119
  {
120
  "epoch": 1.0,
121
  "step": 53,
122
  "total_flos": 0.0,
123
+ "train_loss": 0.35319842594974443,
124
+ "train_runtime": 385.1956,
125
+ "train_samples_per_second": 17.524,
126
+ "train_steps_per_second": 0.138
127
  }
128
  ],
129
  "logging_steps": 10,
130
  "max_steps": 53,
131
+ "num_input_tokens_seen": 0,
132
  "num_train_epochs": 1,
133
  "save_steps": 100,
134
+ "stateful_callbacks": {
135
+ "TrainerControl": {
136
+ "args": {
137
+ "should_epoch_stop": false,
138
+ "should_evaluate": false,
139
+ "should_log": false,
140
+ "should_save": false,
141
+ "should_training_stop": false
142
+ },
143
+ "attributes": {}
144
+ }
145
+ },
146
  "total_flos": 0.0,
147
+ "train_batch_size": 8,
148
  "trial_name": null,
149
  "trial_params": null
150
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e22877006729d75e97bedd72552860d4e541abd9fccbfa2263c9d16124331dd
3
- size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611a06a0768ab2de8b7d5348e31fd43482ca67ffc0e3b31a51215e68b7877c6a
3
+ size 6456