ShenaoZhang commited on
Commit
79f1ab2
1 Parent(s): 40180ec

Model save

Browse files
README.md CHANGED
@@ -2,14 +2,9 @@
2
  license: mit
3
  base_model: ShenaoZhang/0.01_ablation_4iters_bs128_nodpo_iter_1
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - updated
12
- - original
13
  model-index:
14
  - name: 0.01_ablation_4iters_bs128_nodpo_iter_2
15
  results: []
@@ -20,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # 0.01_ablation_4iters_bs128_nodpo_iter_2
22
 
23
- This model is a fine-tuned version of [ShenaoZhang/0.01_ablation_4iters_bs128_nodpo_iter_1](https://huggingface.co/ShenaoZhang/0.01_ablation_4iters_bs128_nodpo_iter_1) on the updated and the original datasets.
24
 
25
  ## Model description
26
 
 
2
  license: mit
3
  base_model: ShenaoZhang/0.01_ablation_4iters_bs128_nodpo_iter_1
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
8
  model-index:
9
  - name: 0.01_ablation_4iters_bs128_nodpo_iter_2
10
  results: []
 
15
 
16
  # 0.01_ablation_4iters_bs128_nodpo_iter_2
17
 
18
+ This model is a fine-tuned version of [ShenaoZhang/0.01_ablation_4iters_bs128_nodpo_iter_1](https://huggingface.co/ShenaoZhang/0.01_ablation_4iters_bs128_nodpo_iter_1) on the None dataset.
19
 
20
  ## Model description
21
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.4913304132573745,
4
- "train_runtime": 3553.949,
5
  "train_samples": 15283,
6
- "train_samples_per_second": 4.3,
7
  "train_steps_per_second": 0.033
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5008462216673779,
4
+ "train_runtime": 3576.23,
5
  "train_samples": 15283,
6
+ "train_samples_per_second": 4.273,
7
  "train_steps_per_second": 0.033
8
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.36.2",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.36.2",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22ffe23c047433d75550cf9b256a8d4eb0b30f0705b83e58b319acf1ec02f614
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636b4cedcb0312c208321733c21681d09266193b814b0aa53fe525721c240ad4
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be26fde64fb76d74226782cbe0f48a1d5b93d20b18ac17dbbfea045c152b8b7b
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8cd6e69535273cb0000d1f28f84b8df3477963905ad75ffff50e5b78cf90100
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b5338a4ac27088d16b7f36c160c0e4b60498df26b230e74df08b44f706209a7
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdd2bc441aa26a2b7ff8505f6841c17bef188cadc87bbc2d8cf3bd020348caf
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.4913304132573745,
4
- "train_runtime": 3553.949,
5
  "train_samples": 15283,
6
- "train_samples_per_second": 4.3,
7
  "train_steps_per_second": 0.033
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5008462216673779,
4
+ "train_runtime": 3576.23,
5
  "train_samples": 15283,
6
+ "train_samples_per_second": 4.273,
7
  "train_steps_per_second": 0.033
8
  }
trainer_state.json CHANGED
@@ -11,12 +11,12 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 4.166666666666666e-08,
14
- "logits/chosen": -2.613878011703491,
15
- "logits/rejected": -2.5660829544067383,
16
- "logps/chosen": -217.45541381835938,
17
- "logps/pi_response": -177.27903747558594,
18
- "logps/ref_response": -177.27903747558594,
19
- "logps/rejected": -366.14935302734375,
20
  "loss": 0.6931,
21
  "rewards/accuracies": 0.0,
22
  "rewards/chosen": 0.0,
@@ -27,186 +27,186 @@
27
  {
28
  "epoch": 0.08,
29
  "learning_rate": 4.1666666666666667e-07,
30
- "logits/chosen": -2.6187098026275635,
31
- "logits/rejected": -2.5807690620422363,
32
- "logps/chosen": -267.33782958984375,
33
- "logps/pi_response": -188.70687866210938,
34
- "logps/ref_response": -188.9645538330078,
35
- "logps/rejected": -408.8757019042969,
36
- "loss": 0.6806,
37
- "rewards/accuracies": 0.5763888955116272,
38
- "rewards/chosen": -0.011542385444045067,
39
- "rewards/margins": 0.028939779847860336,
40
- "rewards/rejected": -0.040482163429260254,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.17,
45
  "learning_rate": 4.931352528237397e-07,
46
- "logits/chosen": -2.49855637550354,
47
- "logits/rejected": -2.4376301765441895,
48
- "logps/chosen": -310.2395324707031,
49
- "logps/pi_response": -189.06040954589844,
50
- "logps/ref_response": -197.4763641357422,
51
- "logps/rejected": -463.3460388183594,
52
- "loss": 0.6019,
53
- "rewards/accuracies": 0.731249988079071,
54
- "rewards/chosen": -0.2946819067001343,
55
- "rewards/margins": 0.44170132279396057,
56
- "rewards/rejected": -0.7363831996917725,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 0.25,
61
  "learning_rate": 4.658920803689553e-07,
62
- "logits/chosen": -2.4012527465820312,
63
- "logits/rejected": -2.36314058303833,
64
- "logps/chosen": -331.32171630859375,
65
- "logps/pi_response": -186.35186767578125,
66
- "logps/ref_response": -172.22605895996094,
67
- "logps/rejected": -547.4236450195312,
68
- "loss": 0.5577,
69
- "rewards/accuracies": 0.731249988079071,
70
- "rewards/chosen": -0.7254283428192139,
71
- "rewards/margins": 0.8743426203727722,
72
- "rewards/rejected": -1.5997710227966309,
73
  "step": 30
74
  },
75
  {
76
  "epoch": 0.33,
77
  "learning_rate": 4.201712553872657e-07,
78
- "logits/chosen": -2.3269619941711426,
79
- "logits/rejected": -2.240555763244629,
80
- "logps/chosen": -363.3132629394531,
81
- "logps/pi_response": -221.7946014404297,
82
- "logps/ref_response": -197.30540466308594,
83
- "logps/rejected": -587.7777709960938,
84
- "loss": 0.4939,
85
- "rewards/accuracies": 0.762499988079071,
86
- "rewards/chosen": -0.7226591110229492,
87
- "rewards/margins": 0.8834163546562195,
88
- "rewards/rejected": -1.6060755252838135,
89
  "step": 40
90
  },
91
  {
92
  "epoch": 0.42,
93
  "learning_rate": 3.598859066780754e-07,
94
- "logits/chosen": -2.2625174522399902,
95
- "logits/rejected": -2.212681293487549,
96
- "logps/chosen": -370.35406494140625,
97
- "logps/pi_response": -239.9530792236328,
98
- "logps/ref_response": -191.80979919433594,
99
- "logps/rejected": -613.0040893554688,
100
- "loss": 0.4833,
101
- "rewards/accuracies": 0.8062499761581421,
102
- "rewards/chosen": -0.9313956499099731,
103
- "rewards/margins": 1.1655590534210205,
104
- "rewards/rejected": -2.096954822540283,
105
  "step": 50
106
  },
107
  {
108
  "epoch": 0.5,
109
  "learning_rate": 2.9019570347986706e-07,
110
- "logits/chosen": -2.123149871826172,
111
- "logits/rejected": -2.0870766639709473,
112
- "logps/chosen": -417.89910888671875,
113
- "logps/pi_response": -260.7689514160156,
114
- "logps/ref_response": -176.25942993164062,
115
- "logps/rejected": -641.0624389648438,
116
- "loss": 0.4766,
117
- "rewards/accuracies": 0.7437499761581421,
118
- "rewards/chosen": -1.3918354511260986,
119
- "rewards/margins": 0.9974287152290344,
120
- "rewards/rejected": -2.389263868331909,
121
  "step": 60
122
  },
123
  {
124
  "epoch": 0.59,
125
  "learning_rate": 2.1706525253979534e-07,
126
- "logits/chosen": -2.1073548793792725,
127
- "logits/rejected": -2.020155429840088,
128
- "logps/chosen": -420.85125732421875,
129
- "logps/pi_response": -286.98138427734375,
130
- "logps/ref_response": -186.5110626220703,
131
- "logps/rejected": -682.7928466796875,
132
- "loss": 0.4544,
133
- "rewards/accuracies": 0.793749988079071,
134
- "rewards/chosen": -1.4277279376983643,
135
- "rewards/margins": 1.2013485431671143,
136
- "rewards/rejected": -2.6290764808654785,
137
  "step": 70
138
  },
139
  {
140
  "epoch": 0.67,
141
  "learning_rate": 1.4675360263490295e-07,
142
- "logits/chosen": -2.016476631164551,
143
- "logits/rejected": -1.9265648126602173,
144
- "logps/chosen": -421.92877197265625,
145
- "logps/pi_response": -293.125732421875,
146
- "logps/ref_response": -171.429931640625,
147
- "logps/rejected": -659.17822265625,
148
- "loss": 0.4232,
149
- "rewards/accuracies": 0.762499988079071,
150
- "rewards/chosen": -1.6673657894134521,
151
- "rewards/margins": 1.0874935388565063,
152
- "rewards/rejected": -2.754859447479248,
153
  "step": 80
154
  },
155
  {
156
  "epoch": 0.75,
157
  "learning_rate": 8.527854855097224e-08,
158
- "logits/chosen": -2.043644428253174,
159
- "logits/rejected": -1.8766616582870483,
160
- "logps/chosen": -490.6349182128906,
161
- "logps/pi_response": -333.29473876953125,
162
- "logps/ref_response": -185.3270263671875,
163
- "logps/rejected": -671.7935791015625,
164
- "loss": 0.4292,
165
- "rewards/accuracies": 0.7437499761581421,
166
- "rewards/chosen": -2.052643299102783,
167
- "rewards/margins": 0.9849675893783569,
168
- "rewards/rejected": -3.0376105308532715,
169
  "step": 90
170
  },
171
  {
172
  "epoch": 0.84,
173
  "learning_rate": 3.790158337517127e-08,
174
- "logits/chosen": -1.9523603916168213,
175
- "logits/rejected": -1.9160430431365967,
176
- "logps/chosen": -483.1397399902344,
177
- "logps/pi_response": -349.1231994628906,
178
- "logps/ref_response": -181.93067932128906,
179
- "logps/rejected": -702.1567993164062,
180
- "loss": 0.4485,
181
  "rewards/accuracies": 0.737500011920929,
182
- "rewards/chosen": -2.182821273803711,
183
- "rewards/margins": 0.9882111549377441,
184
- "rewards/rejected": -3.171032428741455,
185
  "step": 100
186
  },
187
  {
188
  "epoch": 0.92,
189
  "learning_rate": 8.677580722139671e-09,
190
- "logits/chosen": -1.974956750869751,
191
- "logits/rejected": -1.8683421611785889,
192
- "logps/chosen": -464.55010986328125,
193
- "logps/pi_response": -351.19903564453125,
194
- "logps/ref_response": -178.7843780517578,
195
- "logps/rejected": -691.6851196289062,
196
- "loss": 0.4197,
197
- "rewards/accuracies": 0.800000011920929,
198
- "rewards/chosen": -2.106511116027832,
199
- "rewards/margins": 1.0789883136749268,
200
- "rewards/rejected": -3.185499429702759,
201
  "step": 110
202
  },
203
  {
204
  "epoch": 1.0,
205
  "step": 119,
206
  "total_flos": 0.0,
207
- "train_loss": 0.4913304132573745,
208
- "train_runtime": 3553.949,
209
- "train_samples_per_second": 4.3,
210
  "train_steps_per_second": 0.033
211
  }
212
  ],
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 4.166666666666666e-08,
14
+ "logits/chosen": -2.528585433959961,
15
+ "logits/rejected": -2.36845064163208,
16
+ "logps/chosen": -359.6035461425781,
17
+ "logps/pi_response": -267.75177001953125,
18
+ "logps/ref_response": -267.75177001953125,
19
+ "logps/rejected": -414.00299072265625,
20
  "loss": 0.6931,
21
  "rewards/accuracies": 0.0,
22
  "rewards/chosen": 0.0,
 
27
  {
28
  "epoch": 0.08,
29
  "learning_rate": 4.1666666666666667e-07,
30
+ "logits/chosen": -2.517207145690918,
31
+ "logits/rejected": -2.463898181915283,
32
+ "logps/chosen": -328.0975341796875,
33
+ "logps/pi_response": -269.1342468261719,
34
+ "logps/ref_response": -269.0060119628906,
35
+ "logps/rejected": -410.38604736328125,
36
+ "loss": 0.6864,
37
+ "rewards/accuracies": 0.4930555522441864,
38
+ "rewards/chosen": 0.0020175855606794357,
39
+ "rewards/margins": 0.008086067624390125,
40
+ "rewards/rejected": -0.006068482529371977,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.17,
45
  "learning_rate": 4.931352528237397e-07,
46
+ "logits/chosen": -2.467337131500244,
47
+ "logits/rejected": -2.404489755630493,
48
+ "logps/chosen": -374.01763916015625,
49
+ "logps/pi_response": -322.4764709472656,
50
+ "logps/ref_response": -265.96600341796875,
51
+ "logps/rejected": -440.1626892089844,
52
+ "loss": 0.6083,
53
+ "rewards/accuracies": 0.6812499761581421,
54
+ "rewards/chosen": -0.3233157694339752,
55
+ "rewards/margins": 0.2321336269378662,
56
+ "rewards/rejected": -0.5554494261741638,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 0.25,
61
  "learning_rate": 4.658920803689553e-07,
62
+ "logits/chosen": -2.250195264816284,
63
+ "logits/rejected": -2.1748290061950684,
64
+ "logps/chosen": -426.3290100097656,
65
+ "logps/pi_response": -466.5293884277344,
66
+ "logps/ref_response": -280.33502197265625,
67
+ "logps/rejected": -543.4297485351562,
68
+ "loss": 0.5736,
69
+ "rewards/accuracies": 0.762499988079071,
70
+ "rewards/chosen": -0.9779343605041504,
71
+ "rewards/margins": 0.7406744360923767,
72
+ "rewards/rejected": -1.7186088562011719,
73
  "step": 30
74
  },
75
  {
76
  "epoch": 0.33,
77
  "learning_rate": 4.201712553872657e-07,
78
+ "logits/chosen": -2.1817965507507324,
79
+ "logits/rejected": -2.1026203632354736,
80
+ "logps/chosen": -485.78289794921875,
81
+ "logps/pi_response": -538.3165283203125,
82
+ "logps/ref_response": -283.85516357421875,
83
+ "logps/rejected": -658.8212280273438,
84
+ "loss": 0.5045,
85
+ "rewards/accuracies": 0.7749999761581421,
86
+ "rewards/chosen": -1.3960727453231812,
87
+ "rewards/margins": 1.1884379386901855,
88
+ "rewards/rejected": -2.584510564804077,
89
  "step": 40
90
  },
91
  {
92
  "epoch": 0.42,
93
  "learning_rate": 3.598859066780754e-07,
94
+ "logits/chosen": -2.1869587898254395,
95
+ "logits/rejected": -2.1076114177703857,
96
+ "logps/chosen": -507.6856994628906,
97
+ "logps/pi_response": -538.4204711914062,
98
+ "logps/ref_response": -288.7411193847656,
99
+ "logps/rejected": -600.2962036132812,
100
+ "loss": 0.4632,
101
+ "rewards/accuracies": 0.737500011920929,
102
+ "rewards/chosen": -1.3790150880813599,
103
+ "rewards/margins": 0.8974047899246216,
104
+ "rewards/rejected": -2.2764198780059814,
105
  "step": 50
106
  },
107
  {
108
  "epoch": 0.5,
109
  "learning_rate": 2.9019570347986706e-07,
110
+ "logits/chosen": -2.127645969390869,
111
+ "logits/rejected": -2.0537991523742676,
112
+ "logps/chosen": -503.4345703125,
113
+ "logps/pi_response": -531.116455078125,
114
+ "logps/ref_response": -279.95196533203125,
115
+ "logps/rejected": -640.44189453125,
116
+ "loss": 0.446,
117
+ "rewards/accuracies": 0.8062499761581421,
118
+ "rewards/chosen": -1.5597326755523682,
119
+ "rewards/margins": 1.0874087810516357,
120
+ "rewards/rejected": -2.647141218185425,
121
  "step": 60
122
  },
123
  {
124
  "epoch": 0.59,
125
  "learning_rate": 2.1706525253979534e-07,
126
+ "logits/chosen": -2.116987943649292,
127
+ "logits/rejected": -2.034440279006958,
128
+ "logps/chosen": -514.0672607421875,
129
+ "logps/pi_response": -500.27056884765625,
130
+ "logps/ref_response": -249.1424560546875,
131
+ "logps/rejected": -607.4042358398438,
132
+ "loss": 0.4319,
133
+ "rewards/accuracies": 0.8062499761581421,
134
+ "rewards/chosen": -1.739128828048706,
135
+ "rewards/margins": 0.8193766474723816,
136
+ "rewards/rejected": -2.5585055351257324,
137
  "step": 70
138
  },
139
  {
140
  "epoch": 0.67,
141
  "learning_rate": 1.4675360263490295e-07,
142
+ "logits/chosen": -2.014500379562378,
143
+ "logits/rejected": -1.956604242324829,
144
+ "logps/chosen": -505.2294921875,
145
+ "logps/pi_response": -537.6165771484375,
146
+ "logps/ref_response": -249.3338165283203,
147
+ "logps/rejected": -657.5457153320312,
148
+ "loss": 0.4453,
149
+ "rewards/accuracies": 0.768750011920929,
150
+ "rewards/chosen": -1.9716640710830688,
151
+ "rewards/margins": 1.018070936203003,
152
+ "rewards/rejected": -2.9897356033325195,
153
  "step": 80
154
  },
155
  {
156
  "epoch": 0.75,
157
  "learning_rate": 8.527854855097224e-08,
158
+ "logits/chosen": -2.076690673828125,
159
+ "logits/rejected": -2.0011613368988037,
160
+ "logps/chosen": -553.7380981445312,
161
+ "logps/pi_response": -552.9459228515625,
162
+ "logps/ref_response": -259.7544860839844,
163
+ "logps/rejected": -676.1278686523438,
164
+ "loss": 0.455,
165
+ "rewards/accuracies": 0.762499988079071,
166
+ "rewards/chosen": -2.102799892425537,
167
+ "rewards/margins": 1.045424222946167,
168
+ "rewards/rejected": -3.148223638534546,
169
  "step": 90
170
  },
171
  {
172
  "epoch": 0.84,
173
  "learning_rate": 3.790158337517127e-08,
174
+ "logits/chosen": -2.011647939682007,
175
+ "logits/rejected": -1.9519059658050537,
176
+ "logps/chosen": -584.7652587890625,
177
+ "logps/pi_response": -556.218017578125,
178
+ "logps/ref_response": -270.5803527832031,
179
+ "logps/rejected": -668.7252197265625,
180
+ "loss": 0.4638,
181
  "rewards/accuracies": 0.737500011920929,
182
+ "rewards/chosen": -2.02268123626709,
183
+ "rewards/margins": 0.9910328984260559,
184
+ "rewards/rejected": -3.013713836669922,
185
  "step": 100
186
  },
187
  {
188
  "epoch": 0.92,
189
  "learning_rate": 8.677580722139671e-09,
190
+ "logits/chosen": -2.1074235439300537,
191
+ "logits/rejected": -1.9840141534805298,
192
+ "logps/chosen": -589.9749145507812,
193
+ "logps/pi_response": -545.8538818359375,
194
+ "logps/ref_response": -263.1878967285156,
195
+ "logps/rejected": -688.42529296875,
196
+ "loss": 0.4825,
197
+ "rewards/accuracies": 0.78125,
198
+ "rewards/chosen": -2.097093343734741,
199
+ "rewards/margins": 1.098931908607483,
200
+ "rewards/rejected": -3.1960251331329346,
201
  "step": 110
202
  },
203
  {
204
  "epoch": 1.0,
205
  "step": 119,
206
  "total_flos": 0.0,
207
+ "train_loss": 0.5008462216673779,
208
+ "train_runtime": 3576.23,
209
+ "train_samples_per_second": 4.273,
210
  "train_steps_per_second": 0.033
211
  }
212
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fc9210e4528085e516084d985b3a15d6874f4e92aa31af54975e2c0cf8132d8
3
  size 6008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d422ab21f3020cc8f88fdb3a9a3c5233d2b1956f92df486f8a16b09196b42a6
3
  size 6008