DumbledoreWiz commited on
Commit
fe2fdaf
·
verified ·
1 Parent(s): 0001bb4

Upload 4 files

Browse files
Files changed (3) hide show
  1. config.json +22 -14
  2. model.safetensors +2 -2
  3. trainer_state.json +853 -0
config.json CHANGED
@@ -9,25 +9,33 @@
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "Round",
13
- "1": "VNeck",
14
- "2": "Straight",
15
- "3": "Hoodie",
16
- "4": "Henley",
17
- "5": "Halter",
18
- "6": "Sweetheart"
 
 
 
 
19
  },
20
  "image_size": 224,
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
23
  "label2id": {
24
- "Halter": 5,
25
- "Henley": 4,
26
- "Hoodie": 3,
27
- "Round": 0,
28
- "Straight": 2,
29
- "Sweetheart": 6,
30
- "VNeck": 1
 
 
 
 
31
  },
32
  "layer_norm_eps": 1e-12,
33
  "model_type": "vit",
 
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "round",
13
+ "1": "vneck",
14
+ "2": "collared",
15
+ "3": "straight",
16
+ "4": "highneck",
17
+ "5": "hoodie",
18
+ "6": "henley",
19
+ "7": "halter",
20
+ "8": "sweetheart",
21
+ "9": "polo",
22
+ "10": "asymmetrical"
23
  },
24
  "image_size": 224,
25
  "initializer_range": 0.02,
26
  "intermediate_size": 3072,
27
  "label2id": {
28
+ "asymmetrical": 10,
29
+ "collared": 2,
30
+ "halter": 7,
31
+ "henley": 6,
32
+ "highneck": 4,
33
+ "hoodie": 5,
34
+ "polo": 9,
35
+ "round": 0,
36
+ "straight": 3,
37
+ "sweetheart": 8,
38
+ "vneck": 1
39
  },
40
  "layer_norm_eps": 1e-12,
41
  "model_type": "vit",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:209aed8bc989114771efc56e542a492ff6ebcb44bbe708c8e4bee3c47deaeb1b
3
- size 343239356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e0f7acb425a3d5310cd89ffea6e9f6179451404295fc887fce8eb9f32e6731
3
+ size 343251660
trainer_state.json ADDED
@@ -0,0 +1,853 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7836363636363637,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/autoTaggingProject/ViT/General/Features/NeckLine/Results/model_2024-10-16_test/checkpoint-5192",
4
+ "epoch": 11.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5192,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1059322033898305,
13
+ "grad_norm": 1.528609037399292,
14
+ "learning_rate": 9.964689265536724e-06,
15
+ "loss": 2.3665,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.211864406779661,
20
+ "grad_norm": 1.4840689897537231,
21
+ "learning_rate": 9.929378531073447e-06,
22
+ "loss": 2.2771,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.3177966101694915,
27
+ "grad_norm": 1.6797162294387817,
28
+ "learning_rate": 9.89406779661017e-06,
29
+ "loss": 2.1584,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.423728813559322,
34
+ "grad_norm": 1.5869511365890503,
35
+ "learning_rate": 9.858757062146892e-06,
36
+ "loss": 2.0337,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.5296610169491526,
41
+ "grad_norm": 1.625272512435913,
42
+ "learning_rate": 9.823446327683617e-06,
43
+ "loss": 1.945,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.635593220338983,
48
+ "grad_norm": 1.918320655822754,
49
+ "learning_rate": 9.788135593220339e-06,
50
+ "loss": 1.8575,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.7415254237288136,
55
+ "grad_norm": 2.73588490486145,
56
+ "learning_rate": 9.752824858757062e-06,
57
+ "loss": 1.795,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.847457627118644,
62
+ "grad_norm": 1.978835105895996,
63
+ "learning_rate": 9.717514124293787e-06,
64
+ "loss": 1.7206,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.9533898305084746,
69
+ "grad_norm": 2.827810764312744,
70
+ "learning_rate": 9.682203389830509e-06,
71
+ "loss": 1.643,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "eval_accuracy": 0.5754545454545454,
77
+ "eval_loss": 1.6228725910186768,
78
+ "eval_runtime": 14.283,
79
+ "eval_samples_per_second": 77.014,
80
+ "eval_steps_per_second": 2.45,
81
+ "step": 472
82
+ },
83
+ {
84
+ "epoch": 1.0593220338983051,
85
+ "grad_norm": 2.191129207611084,
86
+ "learning_rate": 9.646892655367232e-06,
87
+ "loss": 1.5876,
88
+ "step": 500
89
+ },
90
+ {
91
+ "epoch": 1.1652542372881356,
92
+ "grad_norm": 2.16697359085083,
93
+ "learning_rate": 9.611581920903955e-06,
94
+ "loss": 1.5416,
95
+ "step": 550
96
+ },
97
+ {
98
+ "epoch": 1.271186440677966,
99
+ "grad_norm": 2.1197969913482666,
100
+ "learning_rate": 9.576271186440679e-06,
101
+ "loss": 1.5015,
102
+ "step": 600
103
+ },
104
+ {
105
+ "epoch": 1.3771186440677967,
106
+ "grad_norm": 2.465144634246826,
107
+ "learning_rate": 9.540960451977402e-06,
108
+ "loss": 1.4279,
109
+ "step": 650
110
+ },
111
+ {
112
+ "epoch": 1.4830508474576272,
113
+ "grad_norm": 1.8128849267959595,
114
+ "learning_rate": 9.505649717514125e-06,
115
+ "loss": 1.4298,
116
+ "step": 700
117
+ },
118
+ {
119
+ "epoch": 1.5889830508474576,
120
+ "grad_norm": 2.8659541606903076,
121
+ "learning_rate": 9.470338983050848e-06,
122
+ "loss": 1.3816,
123
+ "step": 750
124
+ },
125
+ {
126
+ "epoch": 1.694915254237288,
127
+ "grad_norm": 2.582030773162842,
128
+ "learning_rate": 9.435028248587572e-06,
129
+ "loss": 1.3254,
130
+ "step": 800
131
+ },
132
+ {
133
+ "epoch": 1.8008474576271185,
134
+ "grad_norm": 2.6557815074920654,
135
+ "learning_rate": 9.399717514124295e-06,
136
+ "loss": 1.2937,
137
+ "step": 850
138
+ },
139
+ {
140
+ "epoch": 1.9067796610169492,
141
+ "grad_norm": 3.4831998348236084,
142
+ "learning_rate": 9.364406779661017e-06,
143
+ "loss": 1.2511,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.0,
148
+ "eval_accuracy": 0.6927272727272727,
149
+ "eval_loss": 1.3029816150665283,
150
+ "eval_runtime": 14.6712,
151
+ "eval_samples_per_second": 74.977,
152
+ "eval_steps_per_second": 2.386,
153
+ "step": 944
154
+ },
155
+ {
156
+ "epoch": 2.01271186440678,
157
+ "grad_norm": 2.9632270336151123,
158
+ "learning_rate": 9.329096045197742e-06,
159
+ "loss": 1.2574,
160
+ "step": 950
161
+ },
162
+ {
163
+ "epoch": 2.1186440677966103,
164
+ "grad_norm": 4.297321796417236,
165
+ "learning_rate": 9.293785310734465e-06,
166
+ "loss": 1.213,
167
+ "step": 1000
168
+ },
169
+ {
170
+ "epoch": 2.2245762711864407,
171
+ "grad_norm": 5.289111614227295,
172
+ "learning_rate": 9.258474576271187e-06,
173
+ "loss": 1.2127,
174
+ "step": 1050
175
+ },
176
+ {
177
+ "epoch": 2.330508474576271,
178
+ "grad_norm": 5.046447277069092,
179
+ "learning_rate": 9.22316384180791e-06,
180
+ "loss": 1.1403,
181
+ "step": 1100
182
+ },
183
+ {
184
+ "epoch": 2.4364406779661016,
185
+ "grad_norm": 3.322784662246704,
186
+ "learning_rate": 9.187853107344633e-06,
187
+ "loss": 1.1244,
188
+ "step": 1150
189
+ },
190
+ {
191
+ "epoch": 2.542372881355932,
192
+ "grad_norm": 2.8034584522247314,
193
+ "learning_rate": 9.152542372881356e-06,
194
+ "loss": 1.1052,
195
+ "step": 1200
196
+ },
197
+ {
198
+ "epoch": 2.648305084745763,
199
+ "grad_norm": 2.822523593902588,
200
+ "learning_rate": 9.11723163841808e-06,
201
+ "loss": 1.0929,
202
+ "step": 1250
203
+ },
204
+ {
205
+ "epoch": 2.7542372881355934,
206
+ "grad_norm": 4.488712787628174,
207
+ "learning_rate": 9.081920903954803e-06,
208
+ "loss": 1.0991,
209
+ "step": 1300
210
+ },
211
+ {
212
+ "epoch": 2.860169491525424,
213
+ "grad_norm": 4.986589431762695,
214
+ "learning_rate": 9.046610169491526e-06,
215
+ "loss": 1.0464,
216
+ "step": 1350
217
+ },
218
+ {
219
+ "epoch": 2.9661016949152543,
220
+ "grad_norm": 4.071073532104492,
221
+ "learning_rate": 9.01129943502825e-06,
222
+ "loss": 1.0532,
223
+ "step": 1400
224
+ },
225
+ {
226
+ "epoch": 3.0,
227
+ "eval_accuracy": 0.7209090909090909,
228
+ "eval_loss": 1.0850567817687988,
229
+ "eval_runtime": 17.8307,
230
+ "eval_samples_per_second": 61.691,
231
+ "eval_steps_per_second": 1.963,
232
+ "step": 1416
233
+ },
234
+ {
235
+ "epoch": 3.0720338983050848,
236
+ "grad_norm": 4.750606536865234,
237
+ "learning_rate": 8.975988700564973e-06,
238
+ "loss": 1.0453,
239
+ "step": 1450
240
+ },
241
+ {
242
+ "epoch": 3.1779661016949152,
243
+ "grad_norm": 3.7604198455810547,
244
+ "learning_rate": 8.940677966101694e-06,
245
+ "loss": 1.0231,
246
+ "step": 1500
247
+ },
248
+ {
249
+ "epoch": 3.2838983050847457,
250
+ "grad_norm": 5.222371578216553,
251
+ "learning_rate": 8.90536723163842e-06,
252
+ "loss": 0.9812,
253
+ "step": 1550
254
+ },
255
+ {
256
+ "epoch": 3.389830508474576,
257
+ "grad_norm": 6.1730570793151855,
258
+ "learning_rate": 8.870056497175143e-06,
259
+ "loss": 0.9771,
260
+ "step": 1600
261
+ },
262
+ {
263
+ "epoch": 3.4957627118644066,
264
+ "grad_norm": 5.553199291229248,
265
+ "learning_rate": 8.834745762711864e-06,
266
+ "loss": 0.9682,
267
+ "step": 1650
268
+ },
269
+ {
270
+ "epoch": 3.601694915254237,
271
+ "grad_norm": 4.2731451988220215,
272
+ "learning_rate": 8.79943502824859e-06,
273
+ "loss": 1.0006,
274
+ "step": 1700
275
+ },
276
+ {
277
+ "epoch": 3.707627118644068,
278
+ "grad_norm": 3.617053985595703,
279
+ "learning_rate": 8.764124293785311e-06,
280
+ "loss": 0.9154,
281
+ "step": 1750
282
+ },
283
+ {
284
+ "epoch": 3.8135593220338984,
285
+ "grad_norm": 2.5833144187927246,
286
+ "learning_rate": 8.728813559322034e-06,
287
+ "loss": 0.935,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 3.919491525423729,
292
+ "grad_norm": 3.1606085300445557,
293
+ "learning_rate": 8.693502824858758e-06,
294
+ "loss": 0.9216,
295
+ "step": 1850
296
+ },
297
+ {
298
+ "epoch": 4.0,
299
+ "eval_accuracy": 0.74,
300
+ "eval_loss": 0.9795950651168823,
301
+ "eval_runtime": 16.0161,
302
+ "eval_samples_per_second": 68.681,
303
+ "eval_steps_per_second": 2.185,
304
+ "step": 1888
305
+ },
306
+ {
307
+ "epoch": 4.02542372881356,
308
+ "grad_norm": 3.959052801132202,
309
+ "learning_rate": 8.65819209039548e-06,
310
+ "loss": 0.9288,
311
+ "step": 1900
312
+ },
313
+ {
314
+ "epoch": 4.13135593220339,
315
+ "grad_norm": 3.6853768825531006,
316
+ "learning_rate": 8.622881355932204e-06,
317
+ "loss": 0.879,
318
+ "step": 1950
319
+ },
320
+ {
321
+ "epoch": 4.237288135593221,
322
+ "grad_norm": 3.1763620376586914,
323
+ "learning_rate": 8.587570621468927e-06,
324
+ "loss": 0.8912,
325
+ "step": 2000
326
+ },
327
+ {
328
+ "epoch": 4.343220338983051,
329
+ "grad_norm": 4.019489288330078,
330
+ "learning_rate": 8.55225988700565e-06,
331
+ "loss": 0.9077,
332
+ "step": 2050
333
+ },
334
+ {
335
+ "epoch": 4.4491525423728815,
336
+ "grad_norm": 4.087663650512695,
337
+ "learning_rate": 8.516949152542372e-06,
338
+ "loss": 0.8812,
339
+ "step": 2100
340
+ },
341
+ {
342
+ "epoch": 4.555084745762712,
343
+ "grad_norm": 3.6994051933288574,
344
+ "learning_rate": 8.481638418079097e-06,
345
+ "loss": 0.8344,
346
+ "step": 2150
347
+ },
348
+ {
349
+ "epoch": 4.661016949152542,
350
+ "grad_norm": 2.284302234649658,
351
+ "learning_rate": 8.44632768361582e-06,
352
+ "loss": 0.8501,
353
+ "step": 2200
354
+ },
355
+ {
356
+ "epoch": 4.766949152542373,
357
+ "grad_norm": 4.348343372344971,
358
+ "learning_rate": 8.411016949152542e-06,
359
+ "loss": 0.8712,
360
+ "step": 2250
361
+ },
362
+ {
363
+ "epoch": 4.872881355932203,
364
+ "grad_norm": 4.996354103088379,
365
+ "learning_rate": 8.375706214689267e-06,
366
+ "loss": 0.8264,
367
+ "step": 2300
368
+ },
369
+ {
370
+ "epoch": 4.978813559322034,
371
+ "grad_norm": 3.136770486831665,
372
+ "learning_rate": 8.340395480225989e-06,
373
+ "loss": 0.843,
374
+ "step": 2350
375
+ },
376
+ {
377
+ "epoch": 5.0,
378
+ "eval_accuracy": 0.7390909090909091,
379
+ "eval_loss": 0.9454855918884277,
380
+ "eval_runtime": 15.0504,
381
+ "eval_samples_per_second": 73.088,
382
+ "eval_steps_per_second": 2.326,
383
+ "step": 2360
384
+ },
385
+ {
386
+ "epoch": 5.084745762711864,
387
+ "grad_norm": 6.956203937530518,
388
+ "learning_rate": 8.305084745762712e-06,
389
+ "loss": 0.8094,
390
+ "step": 2400
391
+ },
392
+ {
393
+ "epoch": 5.190677966101695,
394
+ "grad_norm": 6.91636323928833,
395
+ "learning_rate": 8.269774011299437e-06,
396
+ "loss": 0.8301,
397
+ "step": 2450
398
+ },
399
+ {
400
+ "epoch": 5.296610169491525,
401
+ "grad_norm": 2.561798334121704,
402
+ "learning_rate": 8.234463276836159e-06,
403
+ "loss": 0.8562,
404
+ "step": 2500
405
+ },
406
+ {
407
+ "epoch": 5.4025423728813555,
408
+ "grad_norm": 4.503079891204834,
409
+ "learning_rate": 8.199152542372882e-06,
410
+ "loss": 0.7487,
411
+ "step": 2550
412
+ },
413
+ {
414
+ "epoch": 5.508474576271187,
415
+ "grad_norm": 3.560302257537842,
416
+ "learning_rate": 8.163841807909605e-06,
417
+ "loss": 0.8222,
418
+ "step": 2600
419
+ },
420
+ {
421
+ "epoch": 5.614406779661017,
422
+ "grad_norm": 6.565722465515137,
423
+ "learning_rate": 8.128531073446328e-06,
424
+ "loss": 0.7917,
425
+ "step": 2650
426
+ },
427
+ {
428
+ "epoch": 5.720338983050848,
429
+ "grad_norm": 7.790140151977539,
430
+ "learning_rate": 8.093220338983052e-06,
431
+ "loss": 0.7764,
432
+ "step": 2700
433
+ },
434
+ {
435
+ "epoch": 5.826271186440678,
436
+ "grad_norm": 4.017592430114746,
437
+ "learning_rate": 8.057909604519775e-06,
438
+ "loss": 0.7718,
439
+ "step": 2750
440
+ },
441
+ {
442
+ "epoch": 5.932203389830509,
443
+ "grad_norm": 6.110499382019043,
444
+ "learning_rate": 8.022598870056498e-06,
445
+ "loss": 0.7445,
446
+ "step": 2800
447
+ },
448
+ {
449
+ "epoch": 6.0,
450
+ "eval_accuracy": 0.7527272727272727,
451
+ "eval_loss": 0.8885732889175415,
452
+ "eval_runtime": 15.0594,
453
+ "eval_samples_per_second": 73.044,
454
+ "eval_steps_per_second": 2.324,
455
+ "step": 2832
456
+ },
457
+ {
458
+ "epoch": 6.038135593220339,
459
+ "grad_norm": 4.600073337554932,
460
+ "learning_rate": 7.987288135593222e-06,
461
+ "loss": 0.7767,
462
+ "step": 2850
463
+ },
464
+ {
465
+ "epoch": 6.1440677966101696,
466
+ "grad_norm": 5.356403827667236,
467
+ "learning_rate": 7.951977401129945e-06,
468
+ "loss": 0.708,
469
+ "step": 2900
470
+ },
471
+ {
472
+ "epoch": 6.25,
473
+ "grad_norm": 5.1397223472595215,
474
+ "learning_rate": 7.917372881355932e-06,
475
+ "loss": 0.7397,
476
+ "step": 2950
477
+ },
478
+ {
479
+ "epoch": 6.3559322033898304,
480
+ "grad_norm": 6.384206771850586,
481
+ "learning_rate": 7.882062146892657e-06,
482
+ "loss": 0.7523,
483
+ "step": 3000
484
+ },
485
+ {
486
+ "epoch": 6.461864406779661,
487
+ "grad_norm": 5.545274257659912,
488
+ "learning_rate": 7.846751412429378e-06,
489
+ "loss": 0.7011,
490
+ "step": 3050
491
+ },
492
+ {
493
+ "epoch": 6.567796610169491,
494
+ "grad_norm": 9.406649589538574,
495
+ "learning_rate": 7.811440677966102e-06,
496
+ "loss": 0.7975,
497
+ "step": 3100
498
+ },
499
+ {
500
+ "epoch": 6.673728813559322,
501
+ "grad_norm": 7.81419563293457,
502
+ "learning_rate": 7.776129943502827e-06,
503
+ "loss": 0.7385,
504
+ "step": 3150
505
+ },
506
+ {
507
+ "epoch": 6.779661016949152,
508
+ "grad_norm": 3.415956974029541,
509
+ "learning_rate": 7.740819209039548e-06,
510
+ "loss": 0.7356,
511
+ "step": 3200
512
+ },
513
+ {
514
+ "epoch": 6.885593220338983,
515
+ "grad_norm": 5.487062931060791,
516
+ "learning_rate": 7.705508474576271e-06,
517
+ "loss": 0.7121,
518
+ "step": 3250
519
+ },
520
+ {
521
+ "epoch": 6.991525423728813,
522
+ "grad_norm": 5.682718276977539,
523
+ "learning_rate": 7.670197740112995e-06,
524
+ "loss": 0.7191,
525
+ "step": 3300
526
+ },
527
+ {
528
+ "epoch": 7.0,
529
+ "eval_accuracy": 0.7545454545454545,
530
+ "eval_loss": 0.8240677118301392,
531
+ "eval_runtime": 14.7678,
532
+ "eval_samples_per_second": 74.486,
533
+ "eval_steps_per_second": 2.37,
534
+ "step": 3304
535
+ },
536
+ {
537
+ "epoch": 7.0974576271186445,
538
+ "grad_norm": 8.319087028503418,
539
+ "learning_rate": 7.634887005649718e-06,
540
+ "loss": 0.7096,
541
+ "step": 3350
542
+ },
543
+ {
544
+ "epoch": 7.203389830508475,
545
+ "grad_norm": 5.857816696166992,
546
+ "learning_rate": 7.599576271186442e-06,
547
+ "loss": 0.7006,
548
+ "step": 3400
549
+ },
550
+ {
551
+ "epoch": 7.309322033898305,
552
+ "grad_norm": 4.400519371032715,
553
+ "learning_rate": 7.564265536723165e-06,
554
+ "loss": 0.706,
555
+ "step": 3450
556
+ },
557
+ {
558
+ "epoch": 7.415254237288136,
559
+ "grad_norm": 4.573615550994873,
560
+ "learning_rate": 7.528954802259888e-06,
561
+ "loss": 0.6543,
562
+ "step": 3500
563
+ },
564
+ {
565
+ "epoch": 7.521186440677966,
566
+ "grad_norm": 7.545746803283691,
567
+ "learning_rate": 7.49364406779661e-06,
568
+ "loss": 0.6498,
569
+ "step": 3550
570
+ },
571
+ {
572
+ "epoch": 7.627118644067797,
573
+ "grad_norm": 6.38883638381958,
574
+ "learning_rate": 7.4583333333333345e-06,
575
+ "loss": 0.6849,
576
+ "step": 3600
577
+ },
578
+ {
579
+ "epoch": 7.733050847457627,
580
+ "grad_norm": 4.496486186981201,
581
+ "learning_rate": 7.423022598870057e-06,
582
+ "loss": 0.6741,
583
+ "step": 3650
584
+ },
585
+ {
586
+ "epoch": 7.838983050847458,
587
+ "grad_norm": 5.2381792068481445,
588
+ "learning_rate": 7.38771186440678e-06,
589
+ "loss": 0.6441,
590
+ "step": 3700
591
+ },
592
+ {
593
+ "epoch": 7.944915254237288,
594
+ "grad_norm": 6.047347068786621,
595
+ "learning_rate": 7.3524011299435035e-06,
596
+ "loss": 0.7089,
597
+ "step": 3750
598
+ },
599
+ {
600
+ "epoch": 8.0,
601
+ "eval_accuracy": 0.7718181818181818,
602
+ "eval_loss": 0.845079779624939,
603
+ "eval_runtime": 14.9091,
604
+ "eval_samples_per_second": 73.78,
605
+ "eval_steps_per_second": 2.348,
606
+ "step": 3776
607
+ },
608
+ {
609
+ "epoch": 8.05084745762712,
610
+ "grad_norm": 5.383782386779785,
611
+ "learning_rate": 7.317090395480226e-06,
612
+ "loss": 0.6375,
613
+ "step": 3800
614
+ },
615
+ {
616
+ "epoch": 8.15677966101695,
617
+ "grad_norm": 7.663337707519531,
618
+ "learning_rate": 7.28177966101695e-06,
619
+ "loss": 0.6101,
620
+ "step": 3850
621
+ },
622
+ {
623
+ "epoch": 8.26271186440678,
624
+ "grad_norm": 4.593461036682129,
625
+ "learning_rate": 7.2464689265536725e-06,
626
+ "loss": 0.6356,
627
+ "step": 3900
628
+ },
629
+ {
630
+ "epoch": 8.36864406779661,
631
+ "grad_norm": 7.878734111785889,
632
+ "learning_rate": 7.211158192090396e-06,
633
+ "loss": 0.6412,
634
+ "step": 3950
635
+ },
636
+ {
637
+ "epoch": 8.474576271186441,
638
+ "grad_norm": 3.501059055328369,
639
+ "learning_rate": 7.17584745762712e-06,
640
+ "loss": 0.6296,
641
+ "step": 4000
642
+ },
643
+ {
644
+ "epoch": 8.580508474576272,
645
+ "grad_norm": 3.8199708461761475,
646
+ "learning_rate": 7.140536723163842e-06,
647
+ "loss": 0.654,
648
+ "step": 4050
649
+ },
650
+ {
651
+ "epoch": 8.686440677966102,
652
+ "grad_norm": 6.42057466506958,
653
+ "learning_rate": 7.105225988700566e-06,
654
+ "loss": 0.6378,
655
+ "step": 4100
656
+ },
657
+ {
658
+ "epoch": 8.792372881355933,
659
+ "grad_norm": 6.310295104980469,
660
+ "learning_rate": 7.069915254237288e-06,
661
+ "loss": 0.6235,
662
+ "step": 4150
663
+ },
664
+ {
665
+ "epoch": 8.898305084745763,
666
+ "grad_norm": 4.627810478210449,
667
+ "learning_rate": 7.034604519774012e-06,
668
+ "loss": 0.6631,
669
+ "step": 4200
670
+ },
671
+ {
672
+ "epoch": 9.0,
673
+ "eval_accuracy": 0.77,
674
+ "eval_loss": 0.8626542687416077,
675
+ "eval_runtime": 15.548,
676
+ "eval_samples_per_second": 70.749,
677
+ "eval_steps_per_second": 2.251,
678
+ "step": 4248
679
+ },
680
+ {
681
+ "epoch": 9.004237288135593,
682
+ "grad_norm": 5.442898273468018,
683
+ "learning_rate": 6.999293785310735e-06,
684
+ "loss": 0.6358,
685
+ "step": 4250
686
+ },
687
+ {
688
+ "epoch": 9.110169491525424,
689
+ "grad_norm": 5.042696475982666,
690
+ "learning_rate": 6.963983050847458e-06,
691
+ "loss": 0.6183,
692
+ "step": 4300
693
+ },
694
+ {
695
+ "epoch": 9.216101694915254,
696
+ "grad_norm": 5.006898403167725,
697
+ "learning_rate": 6.928672316384182e-06,
698
+ "loss": 0.6438,
699
+ "step": 4350
700
+ },
701
+ {
702
+ "epoch": 9.322033898305085,
703
+ "grad_norm": 6.093140125274658,
704
+ "learning_rate": 6.893361581920905e-06,
705
+ "loss": 0.5826,
706
+ "step": 4400
707
+ },
708
+ {
709
+ "epoch": 9.427966101694915,
710
+ "grad_norm": 4.637847900390625,
711
+ "learning_rate": 6.858050847457628e-06,
712
+ "loss": 0.5559,
713
+ "step": 4450
714
+ },
715
+ {
716
+ "epoch": 9.533898305084746,
717
+ "grad_norm": 2.860111951828003,
718
+ "learning_rate": 6.82274011299435e-06,
719
+ "loss": 0.5577,
720
+ "step": 4500
721
+ },
722
+ {
723
+ "epoch": 9.639830508474576,
724
+ "grad_norm": 10.876856803894043,
725
+ "learning_rate": 6.7874293785310745e-06,
726
+ "loss": 0.6233,
727
+ "step": 4550
728
+ },
729
+ {
730
+ "epoch": 9.745762711864407,
731
+ "grad_norm": 5.635727882385254,
732
+ "learning_rate": 6.752118644067798e-06,
733
+ "loss": 0.5703,
734
+ "step": 4600
735
+ },
736
+ {
737
+ "epoch": 9.851694915254237,
738
+ "grad_norm": 6.9388532638549805,
739
+ "learning_rate": 6.71680790960452e-06,
740
+ "loss": 0.6323,
741
+ "step": 4650
742
+ },
743
+ {
744
+ "epoch": 9.957627118644067,
745
+ "grad_norm": 7.485644340515137,
746
+ "learning_rate": 6.6814971751412435e-06,
747
+ "loss": 0.6021,
748
+ "step": 4700
749
+ },
750
+ {
751
+ "epoch": 10.0,
752
+ "eval_accuracy": 0.7772727272727272,
753
+ "eval_loss": 0.8030957579612732,
754
+ "eval_runtime": 14.6023,
755
+ "eval_samples_per_second": 75.331,
756
+ "eval_steps_per_second": 2.397,
757
+ "step": 4720
758
+ },
759
+ {
760
+ "epoch": 10.063559322033898,
761
+ "grad_norm": 13.398885726928711,
762
+ "learning_rate": 6.646186440677966e-06,
763
+ "loss": 0.5698,
764
+ "step": 4750
765
+ },
766
+ {
767
+ "epoch": 10.169491525423728,
768
+ "grad_norm": 7.821059226989746,
769
+ "learning_rate": 6.61087570621469e-06,
770
+ "loss": 0.5364,
771
+ "step": 4800
772
+ },
773
+ {
774
+ "epoch": 10.275423728813559,
775
+ "grad_norm": 7.535600662231445,
776
+ "learning_rate": 6.576271186440678e-06,
777
+ "loss": 0.5959,
778
+ "step": 4850
779
+ },
780
+ {
781
+ "epoch": 10.38135593220339,
782
+ "grad_norm": 8.605109214782715,
783
+ "learning_rate": 6.540960451977402e-06,
784
+ "loss": 0.5837,
785
+ "step": 4900
786
+ },
787
+ {
788
+ "epoch": 10.48728813559322,
789
+ "grad_norm": 5.240401268005371,
790
+ "learning_rate": 6.505649717514125e-06,
791
+ "loss": 0.5796,
792
+ "step": 4950
793
+ },
794
+ {
795
+ "epoch": 10.59322033898305,
796
+ "grad_norm": 6.025055408477783,
797
+ "learning_rate": 6.4703389830508476e-06,
798
+ "loss": 0.5749,
799
+ "step": 5000
800
+ },
801
+ {
802
+ "epoch": 10.69915254237288,
803
+ "grad_norm": 6.605931282043457,
804
+ "learning_rate": 6.435028248587572e-06,
805
+ "loss": 0.5185,
806
+ "step": 5050
807
+ },
808
+ {
809
+ "epoch": 10.805084745762711,
810
+ "grad_norm": 3.71102237701416,
811
+ "learning_rate": 6.399717514124294e-06,
812
+ "loss": 0.5289,
813
+ "step": 5100
814
+ },
815
+ {
816
+ "epoch": 10.911016949152543,
817
+ "grad_norm": 4.973482608795166,
818
+ "learning_rate": 6.3644067796610174e-06,
819
+ "loss": 0.5691,
820
+ "step": 5150
821
+ },
822
+ {
823
+ "epoch": 11.0,
824
+ "eval_accuracy": 0.7836363636363637,
825
+ "eval_loss": 0.8530685305595398,
826
+ "eval_runtime": 14.8369,
827
+ "eval_samples_per_second": 74.14,
828
+ "eval_steps_per_second": 2.359,
829
+ "step": 5192
830
+ }
831
+ ],
832
+ "logging_steps": 50,
833
+ "max_steps": 14160,
834
+ "num_input_tokens_seen": 0,
835
+ "num_train_epochs": 30,
836
+ "save_steps": 500,
837
+ "stateful_callbacks": {
838
+ "TrainerControl": {
839
+ "args": {
840
+ "should_epoch_stop": false,
841
+ "should_evaluate": false,
842
+ "should_log": false,
843
+ "should_save": true,
844
+ "should_training_stop": false
845
+ },
846
+ "attributes": {}
847
+ }
848
+ },
849
+ "total_flos": 1.286478541133356e+19,
850
+ "train_batch_size": 32,
851
+ "trial_name": null,
852
+ "trial_params": null
853
+ }