File size: 10,927 Bytes
bc5f98f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 20.0,
  "eval_steps": 500,
  "global_step": 2440,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 5.062061786651611,
      "learning_rate": 4.75e-05,
      "loss": 0.566,
      "step": 122
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.7142857142857143,
      "eval_f1": 0.6402562480227776,
      "eval_loss": 0.5205540657043457,
      "eval_precision": 0.6483653398896937,
      "eval_recall": 0.6353427895981087,
      "eval_runtime": 5.1909,
      "eval_samples_per_second": 76.865,
      "eval_steps_per_second": 9.632,
      "step": 122
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.104482173919678,
      "learning_rate": 4.5e-05,
      "loss": 0.5117,
      "step": 244
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.7343358395989975,
      "eval_f1": 0.69391211208893,
      "eval_loss": 0.5062463879585266,
      "eval_precision": 0.6880119239984399,
      "eval_recall": 0.7045371885797418,
      "eval_runtime": 5.0488,
      "eval_samples_per_second": 79.028,
      "eval_steps_per_second": 9.903,
      "step": 244
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.243982791900635,
      "learning_rate": 4.25e-05,
      "loss": 0.4804,
      "step": 366
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.7669172932330827,
      "eval_f1": 0.7152209115816456,
      "eval_loss": 0.46674054861068726,
      "eval_precision": 0.7182055749128919,
      "eval_recall": 0.7125841062011276,
      "eval_runtime": 5.0519,
      "eval_samples_per_second": 78.98,
      "eval_steps_per_second": 9.897,
      "step": 366
    },
    {
      "epoch": 4.0,
      "grad_norm": 2.68826961517334,
      "learning_rate": 4e-05,
      "loss": 0.4345,
      "step": 488
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.7919799498746867,
      "eval_f1": 0.744501107107864,
      "eval_loss": 0.43496260046958923,
      "eval_precision": 0.7494180559924504,
      "eval_recall": 0.7403164211674851,
      "eval_runtime": 5.0763,
      "eval_samples_per_second": 78.601,
      "eval_steps_per_second": 9.85,
      "step": 488
    },
    {
      "epoch": 5.0,
      "grad_norm": 2.9247703552246094,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 0.4081,
      "step": 610
    },
    {
      "epoch": 5.0,
      "eval_accuracy": 0.7944862155388471,
      "eval_f1": 0.7660995138690305,
      "eval_loss": 0.43371620774269104,
      "eval_precision": 0.7565013111888113,
      "eval_recall": 0.7845971994908165,
      "eval_runtime": 5.0744,
      "eval_samples_per_second": 78.631,
      "eval_steps_per_second": 9.853,
      "step": 610
    },
    {
      "epoch": 6.0,
      "grad_norm": 3.43408203125,
      "learning_rate": 3.5e-05,
      "loss": 0.3793,
      "step": 732
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.8195488721804511,
      "eval_f1": 0.7753378378378378,
      "eval_loss": 0.39230969548225403,
      "eval_precision": 0.7857142857142857,
      "eval_recall": 0.7673213311511184,
      "eval_runtime": 5.0665,
      "eval_samples_per_second": 78.752,
      "eval_steps_per_second": 9.869,
      "step": 732
    },
    {
      "epoch": 7.0,
      "grad_norm": 1.72346031665802,
      "learning_rate": 3.2500000000000004e-05,
      "loss": 0.3665,
      "step": 854
    },
    {
      "epoch": 7.0,
      "eval_accuracy": 0.8295739348370927,
      "eval_f1": 0.7933776044839771,
      "eval_loss": 0.3765198886394501,
      "eval_precision": 0.7949020208205757,
      "eval_recall": 0.7919167121294781,
      "eval_runtime": 5.0594,
      "eval_samples_per_second": 78.863,
      "eval_steps_per_second": 9.883,
      "step": 854
    },
    {
      "epoch": 8.0,
      "grad_norm": 3.3123555183410645,
      "learning_rate": 3e-05,
      "loss": 0.3471,
      "step": 976
    },
    {
      "epoch": 8.0,
      "eval_accuracy": 0.8370927318295739,
      "eval_f1": 0.796615353247018,
      "eval_loss": 0.368134468793869,
      "eval_precision": 0.8088983050847458,
      "eval_recall": 0.7872340425531914,
      "eval_runtime": 5.0543,
      "eval_samples_per_second": 78.942,
      "eval_steps_per_second": 9.892,
      "step": 976
    },
    {
      "epoch": 9.0,
      "grad_norm": 4.656528949737549,
      "learning_rate": 2.7500000000000004e-05,
      "loss": 0.3498,
      "step": 1098
    },
    {
      "epoch": 9.0,
      "eval_accuracy": 0.8320802005012531,
      "eval_f1": 0.790357364116157,
      "eval_loss": 0.3676688075065613,
      "eval_precision": 0.8023956975228161,
      "eval_recall": 0.7811874886342971,
      "eval_runtime": 5.0959,
      "eval_samples_per_second": 78.299,
      "eval_steps_per_second": 9.812,
      "step": 1098
    },
    {
      "epoch": 10.0,
      "grad_norm": 6.076303482055664,
      "learning_rate": 2.5e-05,
      "loss": 0.3282,
      "step": 1220
    },
    {
      "epoch": 10.0,
      "eval_accuracy": 0.8345864661654135,
      "eval_f1": 0.7917273014868713,
      "eval_loss": 0.363395094871521,
      "eval_precision": 0.8074456774536514,
      "eval_recall": 0.780460083651573,
      "eval_runtime": 5.0935,
      "eval_samples_per_second": 78.335,
      "eval_steps_per_second": 9.816,
      "step": 1220
    },
    {
      "epoch": 11.0,
      "grad_norm": 4.567991256713867,
      "learning_rate": 2.25e-05,
      "loss": 0.3149,
      "step": 1342
    },
    {
      "epoch": 11.0,
      "eval_accuracy": 0.8446115288220551,
      "eval_f1": 0.8065409159159159,
      "eval_loss": 0.3537313938140869,
      "eval_precision": 0.8180272108843537,
      "eval_recall": 0.7975541007455902,
      "eval_runtime": 5.0734,
      "eval_samples_per_second": 78.645,
      "eval_steps_per_second": 9.855,
      "step": 1342
    },
    {
      "epoch": 12.0,
      "grad_norm": 11.14825439453125,
      "learning_rate": 2e-05,
      "loss": 0.3092,
      "step": 1464
    },
    {
      "epoch": 12.0,
      "eval_accuracy": 0.849624060150376,
      "eval_f1": 0.8167483159828537,
      "eval_loss": 0.3528764247894287,
      "eval_precision": 0.8201621387462095,
      "eval_recall": 0.8136024731769412,
      "eval_runtime": 5.0609,
      "eval_samples_per_second": 78.84,
      "eval_steps_per_second": 9.88,
      "step": 1464
    },
    {
      "epoch": 13.0,
      "grad_norm": 4.867825031280518,
      "learning_rate": 1.75e-05,
      "loss": 0.3135,
      "step": 1586
    },
    {
      "epoch": 13.0,
      "eval_accuracy": 0.8521303258145363,
      "eval_f1": 0.8121903546212454,
      "eval_loss": 0.3471047580242157,
      "eval_precision": 0.8331751305173232,
      "eval_recall": 0.7978723404255319,
      "eval_runtime": 5.0541,
      "eval_samples_per_second": 78.945,
      "eval_steps_per_second": 9.893,
      "step": 1586
    },
    {
      "epoch": 14.0,
      "grad_norm": 12.051921844482422,
      "learning_rate": 1.5e-05,
      "loss": 0.3103,
      "step": 1708
    },
    {
      "epoch": 14.0,
      "eval_accuracy": 0.8621553884711779,
      "eval_f1": 0.8269335415335841,
      "eval_loss": 0.3426941931247711,
      "eval_precision": 0.8430382253911666,
      "eval_recall": 0.8149663575195489,
      "eval_runtime": 5.0514,
      "eval_samples_per_second": 78.988,
      "eval_steps_per_second": 9.898,
      "step": 1708
    },
    {
      "epoch": 15.0,
      "grad_norm": 1.2898627519607544,
      "learning_rate": 1.25e-05,
      "loss": 0.2974,
      "step": 1830
    },
    {
      "epoch": 15.0,
      "eval_accuracy": 0.8621553884711779,
      "eval_f1": 0.8297847585805701,
      "eval_loss": 0.33716997504234314,
      "eval_precision": 0.8385357006491028,
      "eval_recall": 0.8224677214038916,
      "eval_runtime": 5.0651,
      "eval_samples_per_second": 78.774,
      "eval_steps_per_second": 9.871,
      "step": 1830
    },
    {
      "epoch": 16.0,
      "grad_norm": 6.9146409034729,
      "learning_rate": 1e-05,
      "loss": 0.2905,
      "step": 1952
    },
    {
      "epoch": 16.0,
      "eval_accuracy": 0.8696741854636592,
      "eval_f1": 0.8386324041811847,
      "eval_loss": 0.3345378339290619,
      "eval_precision": 0.8487869670976828,
      "eval_recall": 0.830287324968176,
      "eval_runtime": 5.0595,
      "eval_samples_per_second": 78.862,
      "eval_steps_per_second": 9.882,
      "step": 1952
    },
    {
      "epoch": 17.0,
      "grad_norm": 4.737354278564453,
      "learning_rate": 7.5e-06,
      "loss": 0.2895,
      "step": 2074
    },
    {
      "epoch": 17.0,
      "eval_accuracy": 0.8621553884711779,
      "eval_f1": 0.8269335415335841,
      "eval_loss": 0.3339170217514038,
      "eval_precision": 0.8430382253911666,
      "eval_recall": 0.8149663575195489,
      "eval_runtime": 5.0983,
      "eval_samples_per_second": 78.261,
      "eval_steps_per_second": 9.807,
      "step": 2074
    },
    {
      "epoch": 18.0,
      "grad_norm": 3.6233842372894287,
      "learning_rate": 5e-06,
      "loss": 0.2922,
      "step": 2196
    },
    {
      "epoch": 18.0,
      "eval_accuracy": 0.8696741854636592,
      "eval_f1": 0.8386324041811847,
      "eval_loss": 0.3318663537502289,
      "eval_precision": 0.8487869670976828,
      "eval_recall": 0.830287324968176,
      "eval_runtime": 5.0967,
      "eval_samples_per_second": 78.285,
      "eval_steps_per_second": 9.81,
      "step": 2196
    },
    {
      "epoch": 19.0,
      "grad_norm": 4.824616432189941,
      "learning_rate": 2.5e-06,
      "loss": 0.2843,
      "step": 2318
    },
    {
      "epoch": 19.0,
      "eval_accuracy": 0.8621553884711779,
      "eval_f1": 0.8269335415335841,
      "eval_loss": 0.3319249749183655,
      "eval_precision": 0.8430382253911666,
      "eval_recall": 0.8149663575195489,
      "eval_runtime": 5.0765,
      "eval_samples_per_second": 78.598,
      "eval_steps_per_second": 9.849,
      "step": 2318
    },
    {
      "epoch": 20.0,
      "grad_norm": 7.412130832672119,
      "learning_rate": 0.0,
      "loss": 0.287,
      "step": 2440
    },
    {
      "epoch": 20.0,
      "eval_accuracy": 0.8621553884711779,
      "eval_f1": 0.8279052989013229,
      "eval_loss": 0.33124828338623047,
      "eval_precision": 0.8414113428943938,
      "eval_recall": 0.8174668121476631,
      "eval_runtime": 5.0634,
      "eval_samples_per_second": 78.8,
      "eval_steps_per_second": 9.875,
      "step": 2440
    },
    {
      "epoch": 20.0,
      "step": 2440,
      "total_flos": 7597037114448000.0,
      "train_loss": 0.35801424432973394,
      "train_runtime": 1955.1531,
      "train_samples_per_second": 37.214,
      "train_steps_per_second": 1.248
    }
  ],
  "logging_steps": 500,
  "max_steps": 2440,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20,
  "save_steps": 500,
  "total_flos": 7597037114448000.0,
  "train_batch_size": 30,
  "trial_name": null,
  "trial_params": null
}