File size: 7,803 Bytes
1e49772
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
{
  "best_metric": 0.08749764412641525,
  "best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-1600",
  "epoch": 4.0,
  "global_step": 3580,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.22,
      "eval_accuracy": 0.9538808220802765,
      "eval_f1": 0.9490697836636672,
      "eval_loss": 0.17476704716682434,
      "eval_precision": 0.9467019204007793,
      "eval_recall": 0.9514495214880822,
      "eval_runtime": 1.5406,
      "eval_samples_per_second": 1073.605,
      "eval_steps_per_second": 67.506,
      "step": 200
    },
    {
      "epoch": 0.45,
      "eval_accuracy": 0.9654529743650565,
      "eval_f1": 0.9616131828280513,
      "eval_loss": 0.125724658370018,
      "eval_precision": 0.9590238861730456,
      "eval_recall": 0.9642164991708457,
      "eval_runtime": 1.5211,
      "eval_samples_per_second": 1087.347,
      "eval_steps_per_second": 68.37,
      "step": 400
    },
    {
      "epoch": 0.56,
      "learning_rate": 4.303072625698324e-05,
      "loss": 0.3133,
      "step": 500
    },
    {
      "epoch": 0.67,
      "eval_accuracy": 0.9684688500703139,
      "eval_f1": 0.9654409345999343,
      "eval_loss": 0.1092953085899353,
      "eval_precision": 0.9633379749353491,
      "eval_recall": 0.9675530958422409,
      "eval_runtime": 1.5243,
      "eval_samples_per_second": 1085.096,
      "eval_steps_per_second": 68.229,
      "step": 600
    },
    {
      "epoch": 0.89,
      "eval_accuracy": 0.9705189678250115,
      "eval_f1": 0.9675733365251304,
      "eval_loss": 0.09988456219434738,
      "eval_precision": 0.9660797514241326,
      "eval_recall": 0.9690715470220376,
      "eval_runtime": 1.5122,
      "eval_samples_per_second": 1093.787,
      "eval_steps_per_second": 68.775,
      "step": 800
    },
    {
      "epoch": 1.12,
      "learning_rate": 3.604748603351956e-05,
      "loss": 0.0983,
      "step": 1000
    },
    {
      "epoch": 1.12,
      "eval_accuracy": 0.9718066450924248,
      "eval_f1": 0.9694776714513557,
      "eval_loss": 0.09871890395879745,
      "eval_precision": 0.9673755197039925,
      "eval_recall": 0.971588979241174,
      "eval_runtime": 1.5084,
      "eval_samples_per_second": 1096.509,
      "eval_steps_per_second": 68.946,
      "step": 1000
    },
    {
      "epoch": 1.34,
      "eval_accuracy": 0.9728401755307433,
      "eval_f1": 0.9709789372905697,
      "eval_loss": 0.09597848355770111,
      "eval_precision": 0.9693355369267836,
      "eval_recall": 0.9726279195220875,
      "eval_runtime": 1.4942,
      "eval_samples_per_second": 1106.933,
      "eval_steps_per_second": 69.602,
      "step": 1200
    },
    {
      "epoch": 1.56,
      "eval_accuracy": 0.9736026160180274,
      "eval_f1": 0.9714610733031314,
      "eval_loss": 0.09189929813146591,
      "eval_precision": 0.9703765723740606,
      "eval_recall": 0.9725480010389402,
      "eval_runtime": 1.5177,
      "eval_samples_per_second": 1089.787,
      "eval_steps_per_second": 68.523,
      "step": 1400
    },
    {
      "epoch": 1.68,
      "learning_rate": 2.9064245810055868e-05,
      "loss": 0.0695,
      "step": 1500
    },
    {
      "epoch": 1.79,
      "eval_accuracy": 0.9747039189441047,
      "eval_f1": 0.9728139460666986,
      "eval_loss": 0.08749764412641525,
      "eval_precision": 0.9711674399155732,
      "eval_recall": 0.9744660446344728,
      "eval_runtime": 1.5082,
      "eval_samples_per_second": 1096.655,
      "eval_steps_per_second": 68.955,
      "step": 1600
    },
    {
      "epoch": 2.01,
      "eval_accuracy": 0.974975008895139,
      "eval_f1": 0.9729659656365613,
      "eval_loss": 0.08767995983362198,
      "eval_precision": 0.9712323312761298,
      "eval_recall": 0.9747058000839144,
      "eval_runtime": 1.4537,
      "eval_samples_per_second": 1137.75,
      "eval_steps_per_second": 71.539,
      "step": 1800
    },
    {
      "epoch": 2.23,
      "learning_rate": 2.2094972067039108e-05,
      "loss": 0.0597,
      "step": 2000
    },
    {
      "epoch": 2.23,
      "eval_accuracy": 0.974636146456346,
      "eval_f1": 0.9728004468426774,
      "eval_loss": 0.08931880444288254,
      "eval_precision": 0.9712794773741237,
      "eval_recall": 0.9743261872889653,
      "eval_runtime": 1.4451,
      "eval_samples_per_second": 1144.553,
      "eval_steps_per_second": 71.967,
      "step": 2000
    },
    {
      "epoch": 2.46,
      "eval_accuracy": 0.975025838260958,
      "eval_f1": 0.9732900972811175,
      "eval_loss": 0.0905543640255928,
      "eval_precision": 0.9720971020847451,
      "eval_recall": 0.9744860242552597,
      "eval_runtime": 1.4435,
      "eval_samples_per_second": 1145.863,
      "eval_steps_per_second": 72.049,
      "step": 2200
    },
    {
      "epoch": 2.68,
      "eval_accuracy": 0.9755510750410871,
      "eval_f1": 0.9741129056392965,
      "eval_loss": 0.09149234741926193,
      "eval_precision": 0.9727446605036659,
      "eval_recall": 0.9754850052945995,
      "eval_runtime": 1.4454,
      "eval_samples_per_second": 1144.307,
      "eval_steps_per_second": 71.952,
      "step": 2400
    },
    {
      "epoch": 2.79,
      "learning_rate": 1.511173184357542e-05,
      "loss": 0.0458,
      "step": 2500
    },
    {
      "epoch": 2.91,
      "eval_accuracy": 0.9755171887972077,
      "eval_f1": 0.9740555211846648,
      "eval_loss": 0.08821560442447662,
      "eval_precision": 0.9729681239160337,
      "eval_recall": 0.975145351741224,
      "eval_runtime": 1.4692,
      "eval_samples_per_second": 1125.788,
      "eval_steps_per_second": 70.787,
      "step": 2600
    },
    {
      "epoch": 3.13,
      "eval_accuracy": 0.9757374493824232,
      "eval_f1": 0.9740832369019593,
      "eval_loss": 0.09264585375785828,
      "eval_precision": 0.9727053573207419,
      "eval_recall": 0.9754650256738127,
      "eval_runtime": 1.4438,
      "eval_samples_per_second": 1145.593,
      "eval_steps_per_second": 72.032,
      "step": 2800
    },
    {
      "epoch": 3.35,
      "learning_rate": 8.128491620111732e-06,
      "loss": 0.0383,
      "step": 3000
    },
    {
      "epoch": 3.35,
      "eval_accuracy": 0.9753985869436302,
      "eval_f1": 0.973785013167345,
      "eval_loss": 0.093608058989048,
      "eval_precision": 0.9723688666653386,
      "eval_recall": 0.9752052906035843,
      "eval_runtime": 1.444,
      "eval_samples_per_second": 1145.395,
      "eval_steps_per_second": 72.02,
      "step": 3000
    },
    {
      "epoch": 3.58,
      "eval_accuracy": 0.9756357906507853,
      "eval_f1": 0.974050980196538,
      "eval_loss": 0.09429396688938141,
      "eval_precision": 0.9727602423083055,
      "eval_recall": 0.9753451479490919,
      "eval_runtime": 1.4392,
      "eval_samples_per_second": 1149.213,
      "eval_steps_per_second": 72.26,
      "step": 3200
    },
    {
      "epoch": 3.8,
      "eval_accuracy": 0.9750936107487166,
      "eval_f1": 0.9735209019255712,
      "eval_loss": 0.09423112124204636,
      "eval_precision": 0.9722792403196556,
      "eval_recall": 0.9747657389462748,
      "eval_runtime": 1.4459,
      "eval_samples_per_second": 1143.936,
      "eval_steps_per_second": 71.928,
      "step": 3400
    },
    {
      "epoch": 3.91,
      "learning_rate": 1.1452513966480447e-06,
      "loss": 0.0344,
      "step": 3500
    },
    {
      "epoch": 4.0,
      "step": 3580,
      "total_flos": 32358086511744.0,
      "train_loss": 0.09274842659188383,
      "train_runtime": 230.8308,
      "train_samples_per_second": 247.887,
      "train_steps_per_second": 15.509
    }
  ],
  "max_steps": 3580,
  "num_train_epochs": 4,
  "total_flos": 32358086511744.0,
  "trial_name": null,
  "trial_params": null
}