Text Generation
Transformers
Safetensors
mistral
Generated from Trainer
conversational
Inference Endpoints
text-generation-inference
plaguss commited on
Commit
921932e
1 Parent(s): 8063738

Model save

Browse files
README.md CHANGED
@@ -1,33 +1,29 @@
1
  ---
2
  license: apache-2.0
3
- base_model: plaguss/zephyr-7b-spin-iter1-v0
4
  tags:
5
- - alignment-handbook
6
  - generated_from_trainer
7
- datasets:
8
- - argilla/10k_prompts_SPIN_iter1_zephyr_top
9
- - argilla/10k_prompts_SPIN_iter2_zephyr_top
10
  model-index:
11
- - name: outputs
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # outputs
19
 
20
- This model is a fine-tuned version of [plaguss/zephyr-7b-spin-iter1-v0](https://huggingface.co/plaguss/zephyr-7b-spin-iter1-v0) on the argilla/10k_prompts_SPIN_iter1_zephyr_top and the argilla/10k_prompts_SPIN_iter2_zephyr_top datasets.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.1253
23
- - Rewards/real: -0.5683
24
- - Rewards/generated: -4.9538
25
- - Rewards/accuracies: 0.9479
26
- - Rewards/margins: 4.3854
27
- - Logps/generated: -739.3701
28
- - Logps/real: -278.2851
29
- - Logits/generated: -2.8430
30
- - Logits/real: -2.8375
31
 
32
  ## Model description
33
 
@@ -64,10 +60,10 @@ The following hyperparameters were used during training:
64
 
65
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
66
  |:-------------:|:-----:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
67
- | 5.8769 | 0.49 | 25 | 0.1890 | -0.1680 | -2.9833 | 0.9375 | 2.8153 | -719.6649 | -274.2817 | -2.7940 | -2.8382 |
68
- | 0.1202 | 0.97 | 50 | 0.1440 | -0.4164 | -4.2256 | 0.9479 | 3.8092 | -732.0879 | -276.7652 | -2.8395 | -2.8439 |
69
- | 0.0754 | 1.46 | 75 | 0.1298 | -0.5468 | -4.7565 | 0.9583 | 4.2097 | -737.3973 | -278.0700 | -2.8411 | -2.8388 |
70
- | 0.0621 | 1.94 | 100 | 0.1253 | -0.5683 | -4.9538 | 0.9479 | 4.3854 | -739.3701 | -278.2851 | -2.8430 | -2.8375 |
71
 
72
 
73
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: plaguss/zephyr-7b-spin-iter2-v0
4
  tags:
 
5
  - generated_from_trainer
 
 
 
6
  model-index:
7
+ - name: zephyr-7b-spin-iter3-v0
8
  results: []
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
+ # zephyr-7b-spin-iter3-v0
15
 
16
+ This model is a fine-tuned version of [plaguss/zephyr-7b-spin-iter2-v0](https://huggingface.co/plaguss/zephyr-7b-spin-iter2-v0) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.1099
19
+ - Rewards/real: -2.9181
20
+ - Rewards/generated: -29.6970
21
+ - Rewards/accuracies: 0.9271
22
+ - Rewards/margins: 26.7789
23
+ - Logps/generated: -702.4378
24
+ - Logps/real: -278.1470
25
+ - Logits/generated: -2.8177
26
+ - Logits/real: -2.8051
27
 
28
  ## Model description
29
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
62
  |:-------------:|:-----:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
63
+ | 0.2928 | 0.49 | 25 | 0.3951 | -2.6212 | -20.3268 | 0.9062 | 17.7056 | -700.5638 | -278.0876 | -2.8098 | -2.8090 |
64
+ | 0.1487 | 0.97 | 50 | 0.1319 | -2.9077 | -29.1459 | 0.9375 | 26.2382 | -702.3276 | -278.1449 | -2.8218 | -2.8066 |
65
+ | 0.006 | 1.46 | 75 | 0.1269 | -2.6037 | -29.1519 | 0.9583 | 26.5482 | -702.3289 | -278.0841 | -2.8175 | -2.8037 |
66
+ | 0.0086 | 1.94 | 100 | 0.1099 | -2.9181 | -29.6970 | 0.9271 | 26.7789 | -702.4378 | -278.1470 | -2.8177 | -2.8051 |
67
 
68
 
69
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.98,
3
- "train_loss": 0.7072318076503044,
4
- "train_runtime": 2304.549,
5
  "train_samples": 3296,
6
- "train_samples_per_second": 2.86,
7
- "train_steps_per_second": 0.044
8
  }
 
1
  {
2
  "epoch": 1.98,
3
+ "train_loss": 0.1742458962578573,
4
+ "train_runtime": 2590.1164,
5
  "train_samples": 3296,
6
+ "train_samples_per_second": 2.545,
7
+ "train_steps_per_second": 0.039
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49dc2ee422268337033e90b2ac39a226f2a72315d174e4163302f409152af4bb
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7ff4ce11c67e15a74bdced2857f247219f6d717c7a745efdf77cb2c5e0b5bfd
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55daecbb744efccf965b5bf66969995029a3a0cffa0e46f0c5947661e04eb026
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b2e6888d9714ab406b70141affcde42af62ef635d809e9c047ec04d2a1fb5b
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f40dd84b3355fe1d213a931d0dc794558b88fc24d6725e53ff4288630e52d0d5
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0d66e326b5f6c239ab28b5feb6fefc38262e43e830c75fbb97179b361d3360
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.98,
3
- "train_loss": 0.7072318076503044,
4
- "train_runtime": 2304.549,
5
  "train_samples": 3296,
6
- "train_samples_per_second": 2.86,
7
- "train_steps_per_second": 0.044
8
  }
 
1
  {
2
  "epoch": 1.98,
3
+ "train_loss": 0.1742458962578573,
4
+ "train_runtime": 2590.1164,
5
  "train_samples": 3296,
6
+ "train_samples_per_second": 2.545,
7
+ "train_steps_per_second": 0.039
8
  }
trainer_state.json CHANGED
@@ -11,10 +11,10 @@
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 9.09090909090909e-09,
14
- "logits/generated": -2.7517285346984863,
15
- "logits/real": -2.7709789276123047,
16
- "logps/generated": -844.9539794921875,
17
- "logps/real": -335.89251708984375,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/generated": 0.0,
@@ -25,215 +25,215 @@
25
  {
26
  "epoch": 0.19,
27
  "learning_rate": 9.09090909090909e-08,
28
- "logits/generated": -2.6863832473754883,
29
- "logits/real": -2.7313873767852783,
30
- "logps/generated": -753.656005859375,
31
- "logps/real": -260.1191711425781,
32
- "loss": 0.5813,
33
- "rewards/accuracies": 0.7430555820465088,
34
- "rewards/generated": -0.3012603223323822,
35
- "rewards/margins": 0.32114675641059875,
36
- "rewards/real": 0.019886476919054985,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.39,
41
  "learning_rate": 9.010989010989011e-08,
42
- "logits/generated": -2.4782943725585938,
43
- "logits/real": -2.5394017696380615,
44
- "logps/generated": -1096.2259521484375,
45
- "logps/real": -633.4000854492188,
46
- "loss": 5.8769,
47
- "rewards/accuracies": 0.8999999761581421,
48
- "rewards/generated": -33.22383499145508,
49
- "rewards/margins": -1.7550216913223267,
50
- "rewards/real": -34.978858947753906,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.49,
55
- "eval_logits/generated": -2.7939765453338623,
56
- "eval_logits/real": -2.838216781616211,
57
- "eval_logps/generated": -719.6648559570312,
58
- "eval_logps/real": -274.2817077636719,
59
- "eval_loss": 0.18901990354061127,
60
- "eval_rewards/accuracies": 0.9375,
61
- "eval_rewards/generated": -2.9832763671875,
62
- "eval_rewards/margins": 2.815262794494629,
63
- "eval_rewards/real": -0.16801361739635468,
64
- "eval_runtime": 56.889,
65
- "eval_samples_per_second": 6.469,
66
- "eval_steps_per_second": 0.211,
67
  "step": 25
68
  },
69
  {
70
  "epoch": 0.58,
71
  "learning_rate": 7.912087912087911e-08,
72
- "logits/generated": -2.823836088180542,
73
- "logits/real": -2.8936166763305664,
74
- "logps/generated": -856.4396362304688,
75
- "logps/real": -293.3896179199219,
76
- "loss": 0.1538,
77
- "rewards/accuracies": 0.987500011920929,
78
- "rewards/generated": -3.060887336730957,
79
- "rewards/margins": 2.894258975982666,
80
- "rewards/real": -0.1666283905506134,
81
  "step": 30
82
  },
83
  {
84
  "epoch": 0.78,
85
  "learning_rate": 6.813186813186813e-08,
86
- "logits/generated": -2.8231825828552246,
87
- "logits/real": -2.8379809856414795,
88
- "logps/generated": -769.75048828125,
89
- "logps/real": -256.47027587890625,
90
- "loss": 0.1241,
91
- "rewards/accuracies": 0.9937499761581421,
92
- "rewards/generated": -3.725442409515381,
93
- "rewards/margins": 3.5178775787353516,
94
- "rewards/real": -0.20756463706493378,
95
  "step": 40
96
  },
97
  {
98
  "epoch": 0.97,
99
  "learning_rate": 5.714285714285714e-08,
100
- "logits/generated": -2.8316774368286133,
101
- "logits/real": -2.8536953926086426,
102
- "logps/generated": -803.0462646484375,
103
- "logps/real": -274.77337646484375,
104
- "loss": 0.1202,
105
- "rewards/accuracies": 0.949999988079071,
106
- "rewards/generated": -3.983602523803711,
107
- "rewards/margins": 3.5648624897003174,
108
- "rewards/real": -0.4187401831150055,
109
  "step": 50
110
  },
111
  {
112
  "epoch": 0.97,
113
- "eval_logits/generated": -2.83947491645813,
114
- "eval_logits/real": -2.8439435958862305,
115
- "eval_logps/generated": -732.087890625,
116
- "eval_logps/real": -276.7652282714844,
117
- "eval_loss": 0.14404349029064178,
118
- "eval_rewards/accuracies": 0.9479166865348816,
119
- "eval_rewards/generated": -4.225581645965576,
120
- "eval_rewards/margins": 3.809215784072876,
121
- "eval_rewards/real": -0.41636598110198975,
122
- "eval_runtime": 55.8686,
123
- "eval_samples_per_second": 6.587,
124
- "eval_steps_per_second": 0.215,
125
  "step": 50
126
  },
127
  {
128
  "epoch": 1.17,
129
  "learning_rate": 4.615384615384615e-08,
130
- "logits/generated": -2.8372151851654053,
131
- "logits/real": -2.857466697692871,
132
- "logps/generated": -920.998046875,
133
- "logps/real": -288.32086181640625,
134
- "loss": 0.0744,
135
- "rewards/accuracies": 1.0,
136
- "rewards/generated": -4.382575035095215,
137
- "rewards/margins": 4.1766462326049805,
138
- "rewards/real": -0.20592932403087616,
139
  "step": 60
140
  },
141
  {
142
  "epoch": 1.36,
143
  "learning_rate": 3.516483516483517e-08,
144
- "logits/generated": -2.832484483718872,
145
- "logits/real": -2.8351359367370605,
146
- "logps/generated": -778.0480346679688,
147
- "logps/real": -260.9442443847656,
148
- "loss": 0.0754,
149
  "rewards/accuracies": 0.987500011920929,
150
- "rewards/generated": -4.6376495361328125,
151
- "rewards/margins": 4.451912879943848,
152
- "rewards/real": -0.18573713302612305,
153
  "step": 70
154
  },
155
  {
156
  "epoch": 1.46,
157
- "eval_logits/generated": -2.841118574142456,
158
- "eval_logits/real": -2.838819742202759,
159
- "eval_logps/generated": -737.3972778320312,
160
- "eval_logps/real": -278.0699768066406,
161
- "eval_loss": 0.1297575831413269,
162
  "eval_rewards/accuracies": 0.9583333134651184,
163
- "eval_rewards/generated": -4.7565155029296875,
164
- "eval_rewards/margins": 4.2096757888793945,
165
- "eval_rewards/real": -0.546840488910675,
166
- "eval_runtime": 57.5597,
167
- "eval_samples_per_second": 6.393,
168
- "eval_steps_per_second": 0.208,
169
  "step": 75
170
  },
171
  {
172
  "epoch": 1.55,
173
  "learning_rate": 2.4175824175824175e-08,
174
- "logits/generated": -2.8195009231567383,
175
- "logits/real": -2.84141206741333,
176
- "logps/generated": -862.6012573242188,
177
- "logps/real": -276.63531494140625,
178
- "loss": 0.0666,
179
  "rewards/accuracies": 0.9937499761581421,
180
- "rewards/generated": -5.086130619049072,
181
- "rewards/margins": 4.704850196838379,
182
- "rewards/real": -0.3812801241874695,
183
  "step": 80
184
  },
185
  {
186
  "epoch": 1.75,
187
  "learning_rate": 1.3186813186813187e-08,
188
- "logits/generated": -2.8254876136779785,
189
- "logits/real": -2.823962450027466,
190
- "logps/generated": -835.1746215820312,
191
- "logps/real": -272.4798278808594,
192
- "loss": 0.0572,
193
- "rewards/accuracies": 1.0,
194
- "rewards/generated": -5.038660526275635,
195
- "rewards/margins": 4.638853073120117,
196
- "rewards/real": -0.39980727434158325,
197
  "step": 90
198
  },
199
  {
200
  "epoch": 1.94,
201
  "learning_rate": 2.197802197802198e-09,
202
- "logits/generated": -2.8575081825256348,
203
- "logits/real": -2.859795093536377,
204
- "logps/generated": -838.92919921875,
205
- "logps/real": -267.28704833984375,
206
- "loss": 0.0621,
207
- "rewards/accuracies": 0.9937499761581421,
208
- "rewards/generated": -5.040513038635254,
209
- "rewards/margins": 4.70792818069458,
210
- "rewards/real": -0.33258455991744995,
211
  "step": 100
212
  },
213
  {
214
  "epoch": 1.94,
215
- "eval_logits/generated": -2.84299635887146,
216
- "eval_logits/real": -2.8375051021575928,
217
- "eval_logps/generated": -739.3701171875,
218
- "eval_logps/real": -278.2850646972656,
219
- "eval_loss": 0.12532015144824982,
220
- "eval_rewards/accuracies": 0.9479166865348816,
221
- "eval_rewards/generated": -4.953795433044434,
222
- "eval_rewards/margins": 4.385446071624756,
223
- "eval_rewards/real": -0.5683497786521912,
224
- "eval_runtime": 56.3549,
225
- "eval_samples_per_second": 6.53,
226
- "eval_steps_per_second": 0.213,
227
  "step": 100
228
  },
229
  {
230
  "epoch": 1.98,
231
  "step": 102,
232
  "total_flos": 0.0,
233
- "train_loss": 0.7072318076503044,
234
- "train_runtime": 2304.549,
235
- "train_samples_per_second": 2.86,
236
- "train_steps_per_second": 0.044
237
  }
238
  ],
239
  "logging_steps": 10,
 
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 9.09090909090909e-09,
14
+ "logits/generated": -2.911376476287842,
15
+ "logits/real": -2.89278507232666,
16
+ "logps/generated": -668.016845703125,
17
+ "logps/real": -324.0830383300781,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/generated": 0.0,
 
25
  {
26
  "epoch": 0.19,
27
  "learning_rate": 9.09090909090909e-08,
28
+ "logits/generated": -2.848540782928467,
29
+ "logits/real": -2.8350822925567627,
30
+ "logps/generated": -617.5262451171875,
31
+ "logps/real": -255.12559509277344,
32
+ "loss": 0.8027,
33
+ "rewards/accuracies": 0.6527777910232544,
34
+ "rewards/generated": -6.766346454620361,
35
+ "rewards/margins": 4.9929962158203125,
36
+ "rewards/real": -1.7733497619628906,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.39,
41
  "learning_rate": 9.010989010989011e-08,
42
+ "logits/generated": -2.8388895988464355,
43
+ "logits/real": -2.8330814838409424,
44
+ "logps/generated": -707.9961547851562,
45
+ "logps/real": -283.4620056152344,
46
+ "loss": 0.2928,
47
+ "rewards/accuracies": 0.9125000238418579,
48
+ "rewards/generated": -20.150367736816406,
49
+ "rewards/margins": 17.525970458984375,
50
+ "rewards/real": -2.6243953704833984,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.49,
55
+ "eval_logits/generated": -2.8098323345184326,
56
+ "eval_logits/real": -2.8089659214019775,
57
+ "eval_logps/generated": -700.5637817382812,
58
+ "eval_logps/real": -278.0876159667969,
59
+ "eval_loss": 0.3950892686843872,
60
+ "eval_rewards/accuracies": 0.90625,
61
+ "eval_rewards/generated": -20.326812744140625,
62
+ "eval_rewards/margins": 17.705642700195312,
63
+ "eval_rewards/real": -2.6211698055267334,
64
+ "eval_runtime": 75.909,
65
+ "eval_samples_per_second": 4.848,
66
+ "eval_steps_per_second": 0.158,
67
  "step": 25
68
  },
69
  {
70
  "epoch": 0.58,
71
  "learning_rate": 7.912087912087911e-08,
72
+ "logits/generated": -2.877678394317627,
73
+ "logits/real": -2.8707680702209473,
74
+ "logps/generated": -755.6437377929688,
75
+ "logps/real": -290.04058837890625,
76
+ "loss": 0.2615,
77
+ "rewards/accuracies": 0.96875,
78
+ "rewards/generated": -25.02899742126465,
79
+ "rewards/margins": 23.474206924438477,
80
+ "rewards/real": -1.5547949075698853,
81
  "step": 30
82
  },
83
  {
84
  "epoch": 0.78,
85
  "learning_rate": 6.813186813186813e-08,
86
+ "logits/generated": -2.839968681335449,
87
+ "logits/real": -2.8121862411499023,
88
+ "logps/generated": -673.220703125,
89
+ "logps/real": -257.77001953125,
90
+ "loss": 0.1816,
91
+ "rewards/accuracies": 0.956250011920929,
92
+ "rewards/generated": -24.732364654541016,
93
+ "rewards/margins": 26.96199607849121,
94
+ "rewards/real": 2.2296276092529297,
95
  "step": 40
96
  },
97
  {
98
  "epoch": 0.97,
99
  "learning_rate": 5.714285714285714e-08,
100
+ "logits/generated": -2.8406929969787598,
101
+ "logits/real": -2.8268864154815674,
102
+ "logps/generated": -727.8557739257812,
103
+ "logps/real": -274.7761535644531,
104
+ "loss": 0.1487,
105
+ "rewards/accuracies": 0.981249988079071,
106
+ "rewards/generated": -29.384685516357422,
107
+ "rewards/margins": 30.1076717376709,
108
+ "rewards/real": 0.7229812145233154,
109
  "step": 50
110
  },
111
  {
112
  "epoch": 0.97,
113
+ "eval_logits/generated": -2.821841239929199,
114
+ "eval_logits/real": -2.8065903186798096,
115
+ "eval_logps/generated": -702.32763671875,
116
+ "eval_logps/real": -278.1448974609375,
117
+ "eval_loss": 0.131888747215271,
118
+ "eval_rewards/accuracies": 0.9375,
119
+ "eval_rewards/generated": -29.145910263061523,
120
+ "eval_rewards/margins": 26.238248825073242,
121
+ "eval_rewards/real": -2.9076578617095947,
122
+ "eval_runtime": 74.4579,
123
+ "eval_samples_per_second": 4.942,
124
+ "eval_steps_per_second": 0.161,
125
  "step": 50
126
  },
127
  {
128
  "epoch": 1.17,
129
  "learning_rate": 4.615384615384615e-08,
130
+ "logits/generated": -2.8634510040283203,
131
+ "logits/real": -2.8573107719421387,
132
+ "logps/generated": -768.2184448242188,
133
+ "logps/real": -281.481201171875,
134
+ "loss": 0.0249,
135
+ "rewards/accuracies": 0.9937499761581421,
136
+ "rewards/generated": -33.38450241088867,
137
+ "rewards/margins": 33.606407165527344,
138
+ "rewards/real": 0.22189739346504211,
139
  "step": 60
140
  },
141
  {
142
  "epoch": 1.36,
143
  "learning_rate": 3.516483516483517e-08,
144
+ "logits/generated": -2.846198797225952,
145
+ "logits/real": -2.818643093109131,
146
+ "logps/generated": -667.6427001953125,
147
+ "logps/real": -262.6141052246094,
148
+ "loss": 0.006,
149
  "rewards/accuracies": 0.987500011920929,
150
+ "rewards/generated": -29.605731964111328,
151
+ "rewards/margins": 33.2759895324707,
152
+ "rewards/real": 3.670259952545166,
153
  "step": 70
154
  },
155
  {
156
  "epoch": 1.46,
157
+ "eval_logits/generated": -2.8174571990966797,
158
+ "eval_logits/real": -2.8036582469940186,
159
+ "eval_logps/generated": -702.328857421875,
160
+ "eval_logps/real": -278.0840759277344,
161
+ "eval_loss": 0.12692251801490784,
162
  "eval_rewards/accuracies": 0.9583333134651184,
163
+ "eval_rewards/generated": -29.151870727539062,
164
+ "eval_rewards/margins": 26.54817008972168,
165
+ "eval_rewards/real": -2.6037025451660156,
166
+ "eval_runtime": 74.2997,
167
+ "eval_samples_per_second": 4.953,
168
+ "eval_steps_per_second": 0.162,
169
  "step": 75
170
  },
171
  {
172
  "epoch": 1.55,
173
  "learning_rate": 2.4175824175824175e-08,
174
+ "logits/generated": -2.8320305347442627,
175
+ "logits/real": -2.810762882232666,
176
+ "logps/generated": -708.1215209960938,
177
+ "logps/real": -277.090576171875,
178
+ "loss": 0.0292,
179
  "rewards/accuracies": 0.9937499761581421,
180
+ "rewards/generated": -33.68733215332031,
181
+ "rewards/margins": 36.958492279052734,
182
+ "rewards/real": 3.271162748336792,
183
  "step": 80
184
  },
185
  {
186
  "epoch": 1.75,
187
  "learning_rate": 1.3186813186813187e-08,
188
+ "logits/generated": -2.8205840587615967,
189
+ "logits/real": -2.8130674362182617,
190
+ "logps/generated": -704.9281005859375,
191
+ "logps/real": -273.8406066894531,
192
+ "loss": 0.0322,
193
+ "rewards/accuracies": 0.987500011920929,
194
+ "rewards/generated": -31.48798179626465,
195
+ "rewards/margins": 33.98870849609375,
196
+ "rewards/real": 2.500725746154785,
197
  "step": 90
198
  },
199
  {
200
  "epoch": 1.94,
201
  "learning_rate": 2.197802197802198e-09,
202
+ "logits/generated": -2.872133255004883,
203
+ "logits/real": -2.856153964996338,
204
+ "logps/generated": -711.3128662109375,
205
+ "logps/real": -259.7333068847656,
206
+ "loss": 0.0086,
207
+ "rewards/accuracies": 1.0,
208
+ "rewards/generated": -35.352970123291016,
209
+ "rewards/margins": 39.97649383544922,
210
+ "rewards/real": 4.623520851135254,
211
  "step": 100
212
  },
213
  {
214
  "epoch": 1.94,
215
+ "eval_logits/generated": -2.817664384841919,
216
+ "eval_logits/real": -2.805149793624878,
217
+ "eval_logps/generated": -702.4378051757812,
218
+ "eval_logps/real": -278.1470031738281,
219
+ "eval_loss": 0.10993197560310364,
220
+ "eval_rewards/accuracies": 0.9270833134651184,
221
+ "eval_rewards/generated": -29.69696617126465,
222
+ "eval_rewards/margins": 26.778860092163086,
223
+ "eval_rewards/real": -2.918107748031616,
224
+ "eval_runtime": 73.9005,
225
+ "eval_samples_per_second": 4.98,
226
+ "eval_steps_per_second": 0.162,
227
  "step": 100
228
  },
229
  {
230
  "epoch": 1.98,
231
  "step": 102,
232
  "total_flos": 0.0,
233
+ "train_loss": 0.1742458962578573,
234
+ "train_runtime": 2590.1164,
235
+ "train_samples_per_second": 2.545,
236
+ "train_steps_per_second": 0.039
237
  }
238
  ],
239
  "logging_steps": 10,