wzhouad commited on
Commit
41ca0c2
1 Parent(s): 439d0dd

Model save

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ model-index:
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/sanqiang/wdpo/runs/i9fm0xk3)
17
  # zephyr-7b-dpo-full
18
 
19
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/sanqiang/wdpo/runs/sypbih9g)
17
  # zephyr-7b-dpo-full
18
 
19
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.20092295110225677,
5
- "train_runtime": 384.8987,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 17.537,
8
  "train_steps_per_second": 0.138
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.2009538251836345,
5
+ "train_runtime": 384.7218,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 17.545,
8
  "train_steps_per_second": 0.138
9
  }
config.json CHANGED
@@ -3,6 +3,7 @@
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
6
  "bos_token_id": 1,
7
  "eos_token_id": 2,
8
  "hidden_act": "silu",
@@ -19,7 +20,7 @@
19
  "sliding_window": 4096,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "bfloat16",
22
- "transformers_version": "4.35.2",
23
  "use_cache": false,
24
  "vocab_size": 32000
25
  }
 
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
+ "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
 
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.41.0.dev0",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a46cc3b5f84d4d99bb5b73eed876f0b5578ddd4737df15a4e469e33c95c08170
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ac0bec7ee02cdce6549e87eb5021fadfbc68acc6097efe408ba777bd78ded0
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde9eb125e0d5a3b3305ef0cdb4bea397e66b0d0622e3a57126bb6a5687634fa
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa2404e9e95bdb4831267fbcb3c05850f0ff8f8576225b95f5aadc01aeb29fb
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c0430eff547cb6ba5d3819817467987ed432cfa8a3a5fcd748e54a73cb5ddc4
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03bfb347f7333d6cdebfaf4c3a4baf4be3d0a11a34389519ee0cf86eb26080c2
3
  size 4540516344
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -34,7 +36,6 @@
34
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "</s>",
37
- "legacy": true,
38
  "model_max_length": 2048,
39
  "pad_token": "</s>",
40
  "sp_model_kwargs": {},
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
36
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
 
39
  "model_max_length": 2048,
40
  "pad_token": "</s>",
41
  "sp_model_kwargs": {},
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.20092295110225677,
5
- "train_runtime": 384.8987,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 17.537,
8
  "train_steps_per_second": 0.138
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.2009538251836345,
5
+ "train_runtime": 384.7218,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 17.545,
8
  "train_steps_per_second": 0.138
9
  }
trainer_state.json CHANGED
@@ -13,7 +13,7 @@
13
  "debug/policy_weights": 0.24804016947746277,
14
  "debug/raw_losses": 0.6931471824645996,
15
  "epoch": 0.018867924528301886,
16
- "grad_norm": 3.0791833143219045,
17
  "learning_rate": 8.333333333333333e-08,
18
  "logits/chosen": -2.855412006378174,
19
  "logits/rejected": -2.8797199726104736,
@@ -27,102 +27,102 @@
27
  "step": 1
28
  },
29
  {
30
- "debug/losses": 0.20367620885372162,
31
- "debug/policy_weights": 0.2948996424674988,
32
- "debug/raw_losses": 0.6906173229217529,
33
  "epoch": 0.18867924528301888,
34
- "grad_norm": 3.2025034006962603,
35
  "learning_rate": 4.911172937635942e-07,
36
- "logits/chosen": -2.8658909797668457,
37
- "logits/rejected": -2.8917548656463623,
38
- "logps/chosen": -305.7406005859375,
39
- "logps/rejected": -332.2490234375,
40
  "loss": 0.2033,
41
- "rewards/accuracies": 0.4791666567325592,
42
- "rewards/chosen": 0.0008342999499291182,
43
- "rewards/margins": 0.005153011996299028,
44
- "rewards/rejected": -0.004318712279200554,
45
  "step": 10
46
  },
47
  {
48
- "debug/losses": 0.20357565581798553,
49
- "debug/policy_weights": 0.30120497941970825,
50
- "debug/raw_losses": 0.675395131111145,
51
  "epoch": 0.37735849056603776,
52
- "grad_norm": 3.097721440067098,
53
  "learning_rate": 3.982949361823388e-07,
54
- "logits/chosen": -2.855691432952881,
55
- "logits/rejected": -2.8575425148010254,
56
- "logps/chosen": -324.3226623535156,
57
- "logps/rejected": -331.01007080078125,
58
- "loss": 0.2068,
59
- "rewards/accuracies": 0.612500011920929,
60
- "rewards/chosen": 0.0029448498971760273,
61
- "rewards/margins": 0.03993762657046318,
62
- "rewards/rejected": -0.03699277713894844,
63
  "step": 20
64
  },
65
  {
66
- "debug/losses": 0.20828036963939667,
67
- "debug/policy_weights": 0.3203299641609192,
68
- "debug/raw_losses": 0.6491612195968628,
69
  "epoch": 0.5660377358490566,
70
- "grad_norm": 3.2758930059808353,
71
  "learning_rate": 2.416462557480814e-07,
72
- "logits/chosen": -2.839444398880005,
73
- "logits/rejected": -2.842738628387451,
74
- "logps/chosen": -298.2297668457031,
75
- "logps/rejected": -313.391845703125,
76
- "loss": 0.2007,
77
  "rewards/accuracies": 0.731249988079071,
78
- "rewards/chosen": -0.014156119897961617,
79
- "rewards/margins": 0.11547034978866577,
80
- "rewards/rejected": -0.12962646782398224,
81
  "step": 30
82
  },
83
  {
84
- "debug/losses": 0.18270191550254822,
85
- "debug/policy_weights": 0.30252760648727417,
86
- "debug/raw_losses": 0.584720253944397,
87
  "epoch": 0.7547169811320755,
88
- "grad_norm": 2.7163808332522805,
89
  "learning_rate": 8.859303711029939e-08,
90
- "logits/chosen": -2.842470645904541,
91
- "logits/rejected": -2.8394596576690674,
92
- "logps/chosen": -290.64208984375,
93
- "logps/rejected": -320.837890625,
94
- "loss": 0.1963,
95
- "rewards/accuracies": 0.731249988079071,
96
- "rewards/chosen": -0.011087085120379925,
97
- "rewards/margins": 0.30588188767433167,
98
- "rewards/rejected": -0.31696897745132446,
99
  "step": 40
100
  },
101
  {
102
- "debug/losses": 0.23133957386016846,
103
- "debug/policy_weights": 0.3536807894706726,
104
- "debug/raw_losses": 0.6469973921775818,
105
  "epoch": 0.9433962264150944,
106
- "grad_norm": 3.004599614335292,
107
  "learning_rate": 5.009573740853313e-09,
108
- "logits/chosen": -2.8772711753845215,
109
- "logits/rejected": -2.864253520965576,
110
- "logps/chosen": -282.04022216796875,
111
- "logps/rejected": -316.0839538574219,
112
- "loss": 0.1939,
113
- "rewards/accuracies": 0.668749988079071,
114
- "rewards/chosen": -0.027098428457975388,
115
- "rewards/margins": 0.1554117500782013,
116
- "rewards/rejected": -0.1825101673603058,
117
  "step": 50
118
  },
119
  {
120
  "epoch": 1.0,
121
  "step": 53,
122
  "total_flos": 0.0,
123
- "train_loss": 0.20092295110225677,
124
- "train_runtime": 384.8987,
125
- "train_samples_per_second": 17.537,
126
  "train_steps_per_second": 0.138
127
  }
128
  ],
 
13
  "debug/policy_weights": 0.24804016947746277,
14
  "debug/raw_losses": 0.6931471824645996,
15
  "epoch": 0.018867924528301886,
16
+ "grad_norm": 3.079222482298495,
17
  "learning_rate": 8.333333333333333e-08,
18
  "logits/chosen": -2.855412006378174,
19
  "logits/rejected": -2.8797199726104736,
 
27
  "step": 1
28
  },
29
  {
30
+ "debug/losses": 0.20364493131637573,
31
+ "debug/policy_weights": 0.2948070466518402,
32
+ "debug/raw_losses": 0.6907545924186707,
33
  "epoch": 0.18867924528301888,
34
+ "grad_norm": 3.193404567956391,
35
  "learning_rate": 4.911172937635942e-07,
36
+ "logits/chosen": -2.865929126739502,
37
+ "logits/rejected": -2.8918821811676025,
38
+ "logps/chosen": -305.75299072265625,
39
+ "logps/rejected": -332.2339172363281,
40
  "loss": 0.2033,
41
+ "rewards/accuracies": 0.5069444179534912,
42
+ "rewards/chosen": 0.0007104460964910686,
43
+ "rewards/margins": 0.004878002218902111,
44
+ "rewards/rejected": -0.0041675567626953125,
45
  "step": 10
46
  },
47
  {
48
+ "debug/losses": 0.2036052942276001,
49
+ "debug/policy_weights": 0.3014451563358307,
50
+ "debug/raw_losses": 0.6749827265739441,
51
  "epoch": 0.37735849056603776,
52
+ "grad_norm": 3.0715644454401403,
53
  "learning_rate": 3.982949361823388e-07,
54
+ "logits/chosen": -2.8559136390686035,
55
+ "logits/rejected": -2.8577218055725098,
56
+ "logps/chosen": -324.2321472167969,
57
+ "logps/rejected": -331.00140380859375,
58
+ "loss": 0.2069,
59
+ "rewards/accuracies": 0.6000000238418579,
60
+ "rewards/chosen": 0.003850350622087717,
61
+ "rewards/margins": 0.040756504982709885,
62
+ "rewards/rejected": -0.036906156688928604,
63
  "step": 20
64
  },
65
  {
66
+ "debug/losses": 0.2083912342786789,
67
+ "debug/policy_weights": 0.3206340968608856,
68
+ "debug/raw_losses": 0.6488931775093079,
69
  "epoch": 0.5660377358490566,
70
+ "grad_norm": 3.2842987687808507,
71
  "learning_rate": 2.416462557480814e-07,
72
+ "logits/chosen": -2.839723825454712,
73
+ "logits/rejected": -2.842979907989502,
74
+ "logps/chosen": -298.1597595214844,
75
+ "logps/rejected": -313.3924560546875,
76
+ "loss": 0.2009,
77
  "rewards/accuracies": 0.731249988079071,
78
+ "rewards/chosen": -0.01345623005181551,
79
+ "rewards/margins": 0.11617596447467804,
80
+ "rewards/rejected": -0.12963220477104187,
81
  "step": 30
82
  },
83
  {
84
+ "debug/losses": 0.18273566663265228,
85
+ "debug/policy_weights": 0.3026610016822815,
86
+ "debug/raw_losses": 0.5847629904747009,
87
  "epoch": 0.7547169811320755,
88
+ "grad_norm": 2.7988373684679493,
89
  "learning_rate": 8.859303711029939e-08,
90
+ "logits/chosen": -2.8427796363830566,
91
+ "logits/rejected": -2.839672327041626,
92
+ "logps/chosen": -290.6694641113281,
93
+ "logps/rejected": -320.8838806152344,
94
+ "loss": 0.1964,
95
+ "rewards/accuracies": 0.737500011920929,
96
+ "rewards/chosen": -0.011360697448253632,
97
+ "rewards/margins": 0.3060683310031891,
98
+ "rewards/rejected": -0.3174290060997009,
99
  "step": 40
100
  },
101
  {
102
+ "debug/losses": 0.23139193654060364,
103
+ "debug/policy_weights": 0.35352057218551636,
104
+ "debug/raw_losses": 0.6475176811218262,
105
  "epoch": 0.9433962264150944,
106
+ "grad_norm": 3.0097008369658336,
107
  "learning_rate": 5.009573740853313e-09,
108
+ "logits/chosen": -2.877237319946289,
109
+ "logits/rejected": -2.864227533340454,
110
+ "logps/chosen": -282.114501953125,
111
+ "logps/rejected": -316.05438232421875,
112
+ "loss": 0.1937,
113
+ "rewards/accuracies": 0.6625000238418579,
114
+ "rewards/chosen": -0.02784130536019802,
115
+ "rewards/margins": 0.15437331795692444,
116
+ "rewards/rejected": -0.1822146326303482,
117
  "step": 50
118
  },
119
  {
120
  "epoch": 1.0,
121
  "step": 53,
122
  "total_flos": 0.0,
123
+ "train_loss": 0.2009538251836345,
124
+ "train_runtime": 384.7218,
125
+ "train_samples_per_second": 17.545,
126
  "train_steps_per_second": 0.138
127
  }
128
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8c151d7becb900bed631e41dff74cbdb1243adce5d3b22205a355b75f2b0912
3
- size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cd92261bd298819f5c7cf60f5729c763c4e3f4d7cdf1279294fbc67f09e3a3b
3
+ size 6456