sin2piusc commited on
Commit
18edc9d
1 Parent(s): 5d00fe4

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -396
trainer_state.json DELETED
@@ -1,396 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.242144177449168,
5
- "eval_steps": 200,
6
- "global_step": 5000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.36968576709796674,
13
- "grad_norm": 0.3847227096557617,
14
- "learning_rate": 1.32e-05,
15
- "loss": 1.3706,
16
- "step": 200
17
- },
18
- {
19
- "epoch": 0.36968576709796674,
20
- "eval_loss": 1.1454479694366455,
21
- "eval_runtime": 278.1456,
22
- "eval_samples_per_second": 6.917,
23
- "eval_steps_per_second": 3.459,
24
- "step": 200
25
- },
26
- {
27
- "epoch": 0.7393715341959335,
28
- "grad_norm": 0.46645650267601013,
29
- "learning_rate": 2.6533333333333332e-05,
30
- "loss": 0.7963,
31
- "step": 400
32
- },
33
- {
34
- "epoch": 0.7393715341959335,
35
- "eval_loss": 0.5219003558158875,
36
- "eval_runtime": 243.7942,
37
- "eval_samples_per_second": 7.892,
38
- "eval_steps_per_second": 3.946,
39
- "step": 400
40
- },
41
- {
42
- "epoch": 1.1090573012939002,
43
- "grad_norm": 0.4292586147785187,
44
- "learning_rate": 3.986666666666667e-05,
45
- "loss": 0.2503,
46
- "step": 600
47
- },
48
- {
49
- "epoch": 1.1090573012939002,
50
- "eval_loss": 0.21779710054397583,
51
- "eval_runtime": 242.6153,
52
- "eval_samples_per_second": 7.93,
53
- "eval_steps_per_second": 3.965,
54
- "step": 600
55
- },
56
- {
57
- "epoch": 1.478743068391867,
58
- "grad_norm": 0.4367227256298065,
59
- "learning_rate": 5.3200000000000006e-05,
60
- "loss": 0.2062,
61
- "step": 800
62
- },
63
- {
64
- "epoch": 1.478743068391867,
65
- "eval_loss": 0.20053115487098694,
66
- "eval_runtime": 241.5839,
67
- "eval_samples_per_second": 7.964,
68
- "eval_steps_per_second": 3.982,
69
- "step": 800
70
- },
71
- {
72
- "epoch": 1.8484288354898335,
73
- "grad_norm": 0.5879358053207397,
74
- "learning_rate": 6.653333333333334e-05,
75
- "loss": 0.1867,
76
- "step": 1000
77
- },
78
- {
79
- "epoch": 1.8484288354898335,
80
- "eval_loss": 0.18689630925655365,
81
- "eval_runtime": 241.6576,
82
- "eval_samples_per_second": 7.962,
83
- "eval_steps_per_second": 3.981,
84
- "step": 1000
85
- },
86
- {
87
- "epoch": 2.2181146025878005,
88
- "grad_norm": 0.4122108817100525,
89
- "learning_rate": 7.986666666666667e-05,
90
- "loss": 0.1644,
91
- "step": 1200
92
- },
93
- {
94
- "epoch": 2.2181146025878005,
95
- "eval_loss": 0.17384040355682373,
96
- "eval_runtime": 241.52,
97
- "eval_samples_per_second": 7.966,
98
- "eval_steps_per_second": 3.983,
99
- "step": 1200
100
- },
101
- {
102
- "epoch": 2.587800369685767,
103
- "grad_norm": 0.3805679380893707,
104
- "learning_rate": 9.320000000000002e-05,
105
- "loss": 0.1501,
106
- "step": 1400
107
- },
108
- {
109
- "epoch": 2.587800369685767,
110
- "eval_loss": 0.16297519207000732,
111
- "eval_runtime": 241.6105,
112
- "eval_samples_per_second": 7.963,
113
- "eval_steps_per_second": 3.982,
114
- "step": 1400
115
- },
116
- {
117
- "epoch": 2.957486136783734,
118
- "grad_norm": 0.43259820342063904,
119
- "learning_rate": 9.72e-05,
120
- "loss": 0.1386,
121
- "step": 1600
122
- },
123
- {
124
- "epoch": 2.957486136783734,
125
- "eval_loss": 0.15236662328243256,
126
- "eval_runtime": 241.7624,
127
- "eval_samples_per_second": 7.958,
128
- "eval_steps_per_second": 3.979,
129
- "step": 1600
130
- },
131
- {
132
- "epoch": 3.3271719038817005,
133
- "grad_norm": 0.29012274742126465,
134
- "learning_rate": 9.148571428571428e-05,
135
- "loss": 0.1186,
136
- "step": 1800
137
- },
138
- {
139
- "epoch": 3.3271719038817005,
140
- "eval_loss": 0.14582309126853943,
141
- "eval_runtime": 241.5014,
142
- "eval_samples_per_second": 7.967,
143
- "eval_steps_per_second": 3.983,
144
- "step": 1800
145
- },
146
- {
147
- "epoch": 3.6968576709796674,
148
- "grad_norm": 0.3314245641231537,
149
- "learning_rate": 8.577142857142858e-05,
150
- "loss": 0.1086,
151
- "step": 2000
152
- },
153
- {
154
- "epoch": 3.6968576709796674,
155
- "eval_loss": 0.14241622388362885,
156
- "eval_runtime": 242.1199,
157
- "eval_samples_per_second": 7.946,
158
- "eval_steps_per_second": 3.973,
159
- "step": 2000
160
- },
161
- {
162
- "epoch": 4.066543438077634,
163
- "grad_norm": 0.4052281379699707,
164
- "learning_rate": 8.005714285714286e-05,
165
- "loss": 0.1019,
166
- "step": 2200
167
- },
168
- {
169
- "epoch": 4.066543438077634,
170
- "eval_loss": 0.1363690346479416,
171
- "eval_runtime": 241.9172,
172
- "eval_samples_per_second": 7.953,
173
- "eval_steps_per_second": 3.977,
174
- "step": 2200
175
- },
176
- {
177
- "epoch": 4.436229205175601,
178
- "grad_norm": 0.3031346797943115,
179
- "learning_rate": 7.434285714285715e-05,
180
- "loss": 0.0871,
181
- "step": 2400
182
- },
183
- {
184
- "epoch": 4.436229205175601,
185
- "eval_loss": 0.13472846150398254,
186
- "eval_runtime": 241.5757,
187
- "eval_samples_per_second": 7.964,
188
- "eval_steps_per_second": 3.982,
189
- "step": 2400
190
- },
191
- {
192
- "epoch": 4.805914972273568,
193
- "grad_norm": 0.3824409246444702,
194
- "learning_rate": 6.862857142857143e-05,
195
- "loss": 0.085,
196
- "step": 2600
197
- },
198
- {
199
- "epoch": 4.805914972273568,
200
- "eval_loss": 0.13260312378406525,
201
- "eval_runtime": 241.7492,
202
- "eval_samples_per_second": 7.959,
203
- "eval_steps_per_second": 3.979,
204
- "step": 2600
205
- },
206
- {
207
- "epoch": 5.175600739371534,
208
- "grad_norm": 0.35020148754119873,
209
- "learning_rate": 6.291428571428571e-05,
210
- "loss": 0.0746,
211
- "step": 2800
212
- },
213
- {
214
- "epoch": 5.175600739371534,
215
- "eval_loss": 0.13361449539661407,
216
- "eval_runtime": 241.5342,
217
- "eval_samples_per_second": 7.966,
218
- "eval_steps_per_second": 3.983,
219
- "step": 2800
220
- },
221
- {
222
- "epoch": 5.545286506469501,
223
- "grad_norm": 0.26805418729782104,
224
- "learning_rate": 5.72e-05,
225
- "loss": 0.0729,
226
- "step": 3000
227
- },
228
- {
229
- "epoch": 5.545286506469501,
230
- "eval_loss": 0.131169855594635,
231
- "eval_runtime": 241.4633,
232
- "eval_samples_per_second": 7.968,
233
- "eval_steps_per_second": 3.984,
234
- "step": 3000
235
- },
236
- {
237
- "epoch": 5.914972273567468,
238
- "grad_norm": 0.2870423197746277,
239
- "learning_rate": 5.1485714285714295e-05,
240
- "loss": 0.0688,
241
- "step": 3200
242
- },
243
- {
244
- "epoch": 5.914972273567468,
245
- "eval_loss": 0.13156786561012268,
246
- "eval_runtime": 241.8954,
247
- "eval_samples_per_second": 7.954,
248
- "eval_steps_per_second": 3.977,
249
- "step": 3200
250
- },
251
- {
252
- "epoch": 6.284658040665434,
253
- "grad_norm": 0.3393847644329071,
254
- "learning_rate": 4.5771428571428576e-05,
255
- "loss": 0.0598,
256
- "step": 3400
257
- },
258
- {
259
- "epoch": 6.284658040665434,
260
- "eval_loss": 0.1328059583902359,
261
- "eval_runtime": 241.7736,
262
- "eval_samples_per_second": 7.958,
263
- "eval_steps_per_second": 3.979,
264
- "step": 3400
265
- },
266
- {
267
- "epoch": 6.654343807763401,
268
- "grad_norm": 0.41797512769699097,
269
- "learning_rate": 4.005714285714286e-05,
270
- "loss": 0.0574,
271
- "step": 3600
272
- },
273
- {
274
- "epoch": 6.654343807763401,
275
- "eval_loss": 0.13396935164928436,
276
- "eval_runtime": 242.4245,
277
- "eval_samples_per_second": 7.936,
278
- "eval_steps_per_second": 3.968,
279
- "step": 3600
280
- },
281
- {
282
- "epoch": 7.024029574861368,
283
- "grad_norm": 0.3317042291164398,
284
- "learning_rate": 3.434285714285714e-05,
285
- "loss": 0.0598,
286
- "step": 3800
287
- },
288
- {
289
- "epoch": 7.024029574861368,
290
- "eval_loss": 0.13355988264083862,
291
- "eval_runtime": 242.0208,
292
- "eval_samples_per_second": 7.95,
293
- "eval_steps_per_second": 3.975,
294
- "step": 3800
295
- },
296
- {
297
- "epoch": 7.393715341959335,
298
- "grad_norm": 0.27158355712890625,
299
- "learning_rate": 2.8628571428571434e-05,
300
- "loss": 0.0481,
301
- "step": 4000
302
- },
303
- {
304
- "epoch": 7.393715341959335,
305
- "eval_loss": 0.13557623326778412,
306
- "eval_runtime": 241.5121,
307
- "eval_samples_per_second": 7.966,
308
- "eval_steps_per_second": 3.983,
309
- "step": 4000
310
- },
311
- {
312
- "epoch": 7.763401109057301,
313
- "grad_norm": 0.2915257215499878,
314
- "learning_rate": 2.2914285714285718e-05,
315
- "loss": 0.0514,
316
- "step": 4200
317
- },
318
- {
319
- "epoch": 7.763401109057301,
320
- "eval_loss": 0.1365816593170166,
321
- "eval_runtime": 242.2044,
322
- "eval_samples_per_second": 7.944,
323
- "eval_steps_per_second": 3.972,
324
- "step": 4200
325
- },
326
- {
327
- "epoch": 8.133086876155268,
328
- "grad_norm": 0.29942941665649414,
329
- "learning_rate": 1.7199999999999998e-05,
330
- "loss": 0.0465,
331
- "step": 4400
332
- },
333
- {
334
- "epoch": 8.133086876155268,
335
- "eval_loss": 0.13819697499275208,
336
- "eval_runtime": 242.075,
337
- "eval_samples_per_second": 7.948,
338
- "eval_steps_per_second": 3.974,
339
- "step": 4400
340
- },
341
- {
342
- "epoch": 8.502772643253234,
343
- "grad_norm": 0.25705790519714355,
344
- "learning_rate": 1.1485714285714285e-05,
345
- "loss": 0.0428,
346
- "step": 4600
347
- },
348
- {
349
- "epoch": 8.502772643253234,
350
- "eval_loss": 0.13781140744686127,
351
- "eval_runtime": 249.4522,
352
- "eval_samples_per_second": 7.713,
353
- "eval_steps_per_second": 3.856,
354
- "step": 4600
355
- },
356
- {
357
- "epoch": 8.872458410351202,
358
- "grad_norm": 0.3567424714565277,
359
- "learning_rate": 5.7714285714285715e-06,
360
- "loss": 0.043,
361
- "step": 4800
362
- },
363
- {
364
- "epoch": 8.872458410351202,
365
- "eval_loss": 0.13837969303131104,
366
- "eval_runtime": 251.3052,
367
- "eval_samples_per_second": 7.656,
368
- "eval_steps_per_second": 3.828,
369
- "step": 4800
370
- },
371
- {
372
- "epoch": 9.242144177449168,
373
- "grad_norm": 0.21961481869220734,
374
- "learning_rate": 5.714285714285715e-08,
375
- "loss": 0.0425,
376
- "step": 5000
377
- },
378
- {
379
- "epoch": 9.242144177449168,
380
- "eval_loss": 0.138921320438385,
381
- "eval_runtime": 258.7976,
382
- "eval_samples_per_second": 7.434,
383
- "eval_steps_per_second": 3.717,
384
- "step": 5000
385
- }
386
- ],
387
- "logging_steps": 200,
388
- "max_steps": 5000,
389
- "num_input_tokens_seen": 0,
390
- "num_train_epochs": 10,
391
- "save_steps": 200,
392
- "total_flos": 1.6543388139651072e+20,
393
- "train_batch_size": 8,
394
- "trial_name": null,
395
- "trial_params": null
396
- }