alicelouis commited on
Commit
65c1124
1 Parent(s): ae575a8

Upload 6 files

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.6914285714285714,
4
+ "eval_loss": 1.726584792137146,
5
+ "eval_runtime": 14.2787,
6
+ "eval_samples_per_second": 49.024,
7
+ "eval_steps_per_second": 0.77,
8
+ "total_flos": 5.2828663104e+18,
9
+ "train_loss": 0.7520242673994014,
10
+ "train_runtime": 3943.4049,
11
+ "train_samples_per_second": 21.301,
12
+ "train_steps_per_second": 0.084
13
+ }
config.json ADDED
@@ -0,0 +1,1053 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/dinov2-base",
3
+ "apply_layernorm": true,
4
+ "architectures": [
5
+ "Dinov2ForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "drop_path_rate": 0.0,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "00000000",
14
+ "1": "00000002",
15
+ "2": "00000005",
16
+ "3": "00000007",
17
+ "4": "00000008",
18
+ "5": "00000009",
19
+ "6": "00000019",
20
+ "7": "00000031",
21
+ "8": "00000039",
22
+ "9": "00000050",
23
+ "10": "00000053",
24
+ "11": "00000057",
25
+ "12": "00000058",
26
+ "13": "00000061",
27
+ "14": "00000068",
28
+ "15": "00000071",
29
+ "16": "00000078",
30
+ "17": "00000079",
31
+ "18": "00000088",
32
+ "19": "00000091",
33
+ "20": "00000093",
34
+ "21": "00000095",
35
+ "22": "00000101",
36
+ "23": "00000127",
37
+ "24": "00000128",
38
+ "25": "00000134",
39
+ "26": "00000139",
40
+ "27": "00000141",
41
+ "28": "00000145",
42
+ "29": "00000152",
43
+ "30": "00000159",
44
+ "31": "00000166",
45
+ "32": "00000171",
46
+ "33": "00000175",
47
+ "34": "00000179",
48
+ "35": "00000182",
49
+ "36": "00000186",
50
+ "37": "00000206",
51
+ "38": "00000233",
52
+ "39": "00000237",
53
+ "40": "00000241",
54
+ "41": "00000243",
55
+ "42": "00000246",
56
+ "43": "00000259",
57
+ "44": "00000268",
58
+ "45": "00000269",
59
+ "46": "00000270",
60
+ "47": "00000272",
61
+ "48": "00000273",
62
+ "49": "00000274",
63
+ "50": "00000275",
64
+ "51": "00000282",
65
+ "52": "00000284",
66
+ "53": "00000285",
67
+ "54": "00000296",
68
+ "55": "00000311",
69
+ "56": "00000312",
70
+ "57": "00000315",
71
+ "58": "00000328",
72
+ "59": "00000329",
73
+ "60": "00000333",
74
+ "61": "00000340",
75
+ "62": "00000345",
76
+ "63": "00000346",
77
+ "64": "00000348",
78
+ "65": "00000349",
79
+ "66": "00000350",
80
+ "67": "00000352",
81
+ "68": "00000358",
82
+ "69": "00000359",
83
+ "70": "00000368",
84
+ "71": "00000372",
85
+ "72": "00000373",
86
+ "73": "00000378",
87
+ "74": "00000382",
88
+ "75": "00000383",
89
+ "76": "00000384",
90
+ "77": "00000399",
91
+ "78": "00000406",
92
+ "79": "00000418",
93
+ "80": "00000423",
94
+ "81": "00000431",
95
+ "82": "00000437",
96
+ "83": "00000441",
97
+ "84": "00000443",
98
+ "85": "00000459",
99
+ "86": "00000462",
100
+ "87": "00000463",
101
+ "88": "00000477",
102
+ "89": "00000494",
103
+ "90": "00000500",
104
+ "91": "00000509",
105
+ "92": "00000512",
106
+ "93": "00000515",
107
+ "94": "00000522",
108
+ "95": "00000543",
109
+ "96": "00000550",
110
+ "97": "00000551",
111
+ "98": "00000559",
112
+ "99": "00000561",
113
+ "100": "00000562",
114
+ "101": "00000567",
115
+ "102": "00000568",
116
+ "103": "00000576",
117
+ "104": "00000582",
118
+ "105": "00000585",
119
+ "106": "00000586",
120
+ "107": "00000587",
121
+ "108": "00000590",
122
+ "109": "00000597",
123
+ "110": "00000608",
124
+ "111": "00000616",
125
+ "112": "00000619",
126
+ "113": "00000626",
127
+ "114": "00000627",
128
+ "115": "00000629",
129
+ "116": "00000637",
130
+ "117": "00000652",
131
+ "118": "00000653",
132
+ "119": "00000654",
133
+ "120": "00000655",
134
+ "121": "00000658",
135
+ "122": "00000660",
136
+ "123": "00000661",
137
+ "124": "00000664",
138
+ "125": "00000683",
139
+ "126": "00000684",
140
+ "127": "00000685",
141
+ "128": "00000699",
142
+ "129": "00000701",
143
+ "130": "00000703",
144
+ "131": "00000718",
145
+ "132": "00000721",
146
+ "133": "00000729",
147
+ "134": "00000732",
148
+ "135": "00000736",
149
+ "136": "00000738",
150
+ "137": "00000746",
151
+ "138": "00000751",
152
+ "139": "00000757",
153
+ "140": "00000763",
154
+ "141": "00000774",
155
+ "142": "00000784",
156
+ "143": "00000785",
157
+ "144": "00000795",
158
+ "145": "00000799",
159
+ "146": "00000809",
160
+ "147": "00000816",
161
+ "148": "00000817",
162
+ "149": "00000820",
163
+ "150": "00000829",
164
+ "151": "00000831",
165
+ "152": "00000834",
166
+ "153": "00000837",
167
+ "154": "00000845",
168
+ "155": "00000849",
169
+ "156": "00000860",
170
+ "157": "00000862",
171
+ "158": "00000863",
172
+ "159": "00000869",
173
+ "160": "00000877",
174
+ "161": "00000882",
175
+ "162": "00000885",
176
+ "163": "00000887",
177
+ "164": "00000890",
178
+ "165": "00000900",
179
+ "166": "00000904",
180
+ "167": "00000913",
181
+ "168": "00000931",
182
+ "169": "00000933",
183
+ "170": "00000936",
184
+ "171": "00000941",
185
+ "172": "00000944",
186
+ "173": "00000946",
187
+ "174": "00000950",
188
+ "175": "00000952",
189
+ "176": "00000954",
190
+ "177": "00000956",
191
+ "178": "00000958",
192
+ "179": "00000966",
193
+ "180": "00000968",
194
+ "181": "00000972",
195
+ "182": "00000975",
196
+ "183": "00000976",
197
+ "184": "00000985",
198
+ "185": "00000988",
199
+ "186": "00000993",
200
+ "187": "00000998",
201
+ "188": "00000999",
202
+ "189": "00001001",
203
+ "190": "00001002",
204
+ "191": "00001003",
205
+ "192": "00001004",
206
+ "193": "00001010",
207
+ "194": "00001013",
208
+ "195": "00001017",
209
+ "196": "00001018",
210
+ "197": "00001019",
211
+ "198": "00001020",
212
+ "199": "00001027",
213
+ "200": "00001028",
214
+ "201": "00001058",
215
+ "202": "00001067",
216
+ "203": "00001069",
217
+ "204": "00001072",
218
+ "205": "00001084",
219
+ "206": "00001093",
220
+ "207": "00001097",
221
+ "208": "00001098",
222
+ "209": "00001099",
223
+ "210": "00001100",
224
+ "211": "00001101",
225
+ "212": "00001105",
226
+ "213": "00001107",
227
+ "214": "00001112",
228
+ "215": "00001118",
229
+ "216": "00001120",
230
+ "217": "00001124",
231
+ "218": "00001128",
232
+ "219": "00001140",
233
+ "220": "00001142",
234
+ "221": "00001143",
235
+ "222": "00001146",
236
+ "223": "00001155",
237
+ "224": "00001157",
238
+ "225": "00001159",
239
+ "226": "00001162",
240
+ "227": "00001164",
241
+ "228": "00001175",
242
+ "229": "00001177",
243
+ "230": "00001178",
244
+ "231": "00001183",
245
+ "232": "00001188",
246
+ "233": "00001191",
247
+ "234": "00001195",
248
+ "235": "00001196",
249
+ "236": "00001206",
250
+ "237": "00001208",
251
+ "238": "00001210",
252
+ "239": "00001211",
253
+ "240": "00001212",
254
+ "241": "00001213",
255
+ "242": "00001214",
256
+ "243": "00001217",
257
+ "244": "00001220",
258
+ "245": "00001227",
259
+ "246": "00001228",
260
+ "247": "00001230",
261
+ "248": "00001232",
262
+ "249": "00001233",
263
+ "250": "00001235",
264
+ "251": "00001236",
265
+ "252": "00001237",
266
+ "253": "00001240",
267
+ "254": "00001243",
268
+ "255": "00001246",
269
+ "256": "00001249",
270
+ "257": "00001251",
271
+ "258": "00001257",
272
+ "259": "00001259",
273
+ "260": "00001262",
274
+ "261": "00001264",
275
+ "262": "00001272",
276
+ "263": "00001276",
277
+ "264": "00001277",
278
+ "265": "00001285",
279
+ "266": "00001301",
280
+ "267": "00001304",
281
+ "268": "00001305",
282
+ "269": "00001307",
283
+ "270": "00001309",
284
+ "271": "00001311",
285
+ "272": "00001317",
286
+ "273": "00001319",
287
+ "274": "00001320",
288
+ "275": "00001321",
289
+ "276": "00001323",
290
+ "277": "00001328",
291
+ "278": "00001333",
292
+ "279": "00001334",
293
+ "280": "00001338",
294
+ "281": "00001340",
295
+ "282": "00001346",
296
+ "283": "00001348",
297
+ "284": "00001354",
298
+ "285": "00001371",
299
+ "286": "00001378",
300
+ "287": "00001381",
301
+ "288": "00001394",
302
+ "289": "00001397",
303
+ "290": "00001398",
304
+ "291": "00001399",
305
+ "292": "00001403",
306
+ "293": "00001406",
307
+ "294": "00001408",
308
+ "295": "00001409",
309
+ "296": "00001410",
310
+ "297": "00001412",
311
+ "298": "00001415",
312
+ "299": "00001422",
313
+ "300": "00001429",
314
+ "301": "00001430",
315
+ "302": "00001431",
316
+ "303": "00001432",
317
+ "304": "00001437",
318
+ "305": "00001445",
319
+ "306": "00001453",
320
+ "307": "00001466",
321
+ "308": "00001467",
322
+ "309": "00001475",
323
+ "310": "00001485",
324
+ "311": "00001487",
325
+ "312": "00001488",
326
+ "313": "00001490",
327
+ "314": "00001491",
328
+ "315": "00001492",
329
+ "316": "00001496",
330
+ "317": "00001498",
331
+ "318": "00001499",
332
+ "319": "00001500",
333
+ "320": "00001502",
334
+ "321": "00001506",
335
+ "322": "00001507",
336
+ "323": "00001508",
337
+ "324": "00001522",
338
+ "325": "00001525",
339
+ "326": "00001535",
340
+ "327": "00001545",
341
+ "328": "00001547",
342
+ "329": "00001549",
343
+ "330": "00001559",
344
+ "331": "00001569",
345
+ "332": "00001579",
346
+ "333": "00001581",
347
+ "334": "00001587",
348
+ "335": "00001597",
349
+ "336": "00001599",
350
+ "337": "00001601",
351
+ "338": "00001605",
352
+ "339": "00001613",
353
+ "340": "00001616",
354
+ "341": "00001621",
355
+ "342": "00001630",
356
+ "343": "00001631",
357
+ "344": "00001633",
358
+ "345": "00001634",
359
+ "346": "00001640",
360
+ "347": "00001642",
361
+ "348": "00001644",
362
+ "349": "00001645",
363
+ "350": "00001646",
364
+ "351": "00001651",
365
+ "352": "00001667",
366
+ "353": "00001675",
367
+ "354": "00001677",
368
+ "355": "00001680",
369
+ "356": "00001691",
370
+ "357": "00001694",
371
+ "358": "00001698",
372
+ "359": "00001703",
373
+ "360": "00001705",
374
+ "361": "00001706",
375
+ "362": "00001712",
376
+ "363": "00001714",
377
+ "364": "00001718",
378
+ "365": "00001720",
379
+ "366": "00001726",
380
+ "367": "00001728",
381
+ "368": "00001729",
382
+ "369": "00001738",
383
+ "370": "00001740",
384
+ "371": "00001745",
385
+ "372": "00001751",
386
+ "373": "00001752",
387
+ "374": "00001777",
388
+ "375": "00001779",
389
+ "376": "00001783",
390
+ "377": "00001790",
391
+ "378": "00001796",
392
+ "379": "00001801",
393
+ "380": "00001807",
394
+ "381": "00001821",
395
+ "382": "00001828",
396
+ "383": "00001829",
397
+ "384": "00001832",
398
+ "385": "00001834",
399
+ "386": "00001836",
400
+ "387": "00001840",
401
+ "388": "00001844",
402
+ "389": "00001873",
403
+ "390": "00001892",
404
+ "391": "00001893",
405
+ "392": "00001899",
406
+ "393": "00001907",
407
+ "394": "00001909",
408
+ "395": "00001913",
409
+ "396": "00001914",
410
+ "397": "00001916",
411
+ "398": "00001917",
412
+ "399": "00001926",
413
+ "400": "00001929",
414
+ "401": "00001964",
415
+ "402": "00001966",
416
+ "403": "00001970",
417
+ "404": "00001973",
418
+ "405": "00001977",
419
+ "406": "00001979",
420
+ "407": "00001980",
421
+ "408": "00001984",
422
+ "409": "00001988",
423
+ "410": "00001989",
424
+ "411": "00001998",
425
+ "412": "00002000",
426
+ "413": "00002006",
427
+ "414": "00002009",
428
+ "415": "00002011",
429
+ "416": "00002016",
430
+ "417": "00002031",
431
+ "418": "00002033",
432
+ "419": "00002039",
433
+ "420": "00002040",
434
+ "421": "00002047",
435
+ "422": "00002056",
436
+ "423": "00002058",
437
+ "424": "00002063",
438
+ "425": "00002068",
439
+ "426": "00002071",
440
+ "427": "00002077",
441
+ "428": "00002087",
442
+ "429": "00002093",
443
+ "430": "00002098",
444
+ "431": "00002108",
445
+ "432": "00002109",
446
+ "433": "00002114",
447
+ "434": "00002119",
448
+ "435": "00002121",
449
+ "436": "00002126",
450
+ "437": "00002129",
451
+ "438": "00002136",
452
+ "439": "00002137",
453
+ "440": "00002138",
454
+ "441": "00002146",
455
+ "442": "00002154",
456
+ "443": "00002159",
457
+ "444": "00002161",
458
+ "445": "00002162",
459
+ "446": "00002173",
460
+ "447": "00002178",
461
+ "448": "00002183",
462
+ "449": "00002185",
463
+ "450": "00002195",
464
+ "451": "00002197",
465
+ "452": "00002200",
466
+ "453": "00002202",
467
+ "454": "00002203",
468
+ "455": "00002208",
469
+ "456": "00002209",
470
+ "457": "00002210",
471
+ "458": "00002214",
472
+ "459": "00002225",
473
+ "460": "00002227",
474
+ "461": "00002231",
475
+ "462": "00002235",
476
+ "463": "00002236",
477
+ "464": "00002241",
478
+ "465": "00002243",
479
+ "466": "00002254",
480
+ "467": "00002256",
481
+ "468": "00002257",
482
+ "469": "00002259",
483
+ "470": "00002261",
484
+ "471": "00002263",
485
+ "472": "00002267",
486
+ "473": "00002270",
487
+ "474": "00002276",
488
+ "475": "00002278",
489
+ "476": "00002316",
490
+ "477": "00002320",
491
+ "478": "00002326",
492
+ "479": "00002328",
493
+ "480": "00002331",
494
+ "481": "00002332",
495
+ "482": "00002334",
496
+ "483": "00002337",
497
+ "484": "00002341",
498
+ "485": "00002352",
499
+ "486": "00002356",
500
+ "487": "00002365",
501
+ "488": "00002366",
502
+ "489": "00002371",
503
+ "490": "00002378",
504
+ "491": "00002379",
505
+ "492": "00002381",
506
+ "493": "00002387",
507
+ "494": "00002393",
508
+ "495": "00002398",
509
+ "496": "00002399",
510
+ "497": "00002405",
511
+ "498": "00002406",
512
+ "499": "00002414"
513
+ },
514
+ "image_size": 518,
515
+ "initializer_range": 0.02,
516
+ "label2id": {
517
+ "00000000": 0,
518
+ "00000002": 1,
519
+ "00000005": 2,
520
+ "00000007": 3,
521
+ "00000008": 4,
522
+ "00000009": 5,
523
+ "00000019": 6,
524
+ "00000031": 7,
525
+ "00000039": 8,
526
+ "00000050": 9,
527
+ "00000053": 10,
528
+ "00000057": 11,
529
+ "00000058": 12,
530
+ "00000061": 13,
531
+ "00000068": 14,
532
+ "00000071": 15,
533
+ "00000078": 16,
534
+ "00000079": 17,
535
+ "00000088": 18,
536
+ "00000091": 19,
537
+ "00000093": 20,
538
+ "00000095": 21,
539
+ "00000101": 22,
540
+ "00000127": 23,
541
+ "00000128": 24,
542
+ "00000134": 25,
543
+ "00000139": 26,
544
+ "00000141": 27,
545
+ "00000145": 28,
546
+ "00000152": 29,
547
+ "00000159": 30,
548
+ "00000166": 31,
549
+ "00000171": 32,
550
+ "00000175": 33,
551
+ "00000179": 34,
552
+ "00000182": 35,
553
+ "00000186": 36,
554
+ "00000206": 37,
555
+ "00000233": 38,
556
+ "00000237": 39,
557
+ "00000241": 40,
558
+ "00000243": 41,
559
+ "00000246": 42,
560
+ "00000259": 43,
561
+ "00000268": 44,
562
+ "00000269": 45,
563
+ "00000270": 46,
564
+ "00000272": 47,
565
+ "00000273": 48,
566
+ "00000274": 49,
567
+ "00000275": 50,
568
+ "00000282": 51,
569
+ "00000284": 52,
570
+ "00000285": 53,
571
+ "00000296": 54,
572
+ "00000311": 55,
573
+ "00000312": 56,
574
+ "00000315": 57,
575
+ "00000328": 58,
576
+ "00000329": 59,
577
+ "00000333": 60,
578
+ "00000340": 61,
579
+ "00000345": 62,
580
+ "00000346": 63,
581
+ "00000348": 64,
582
+ "00000349": 65,
583
+ "00000350": 66,
584
+ "00000352": 67,
585
+ "00000358": 68,
586
+ "00000359": 69,
587
+ "00000368": 70,
588
+ "00000372": 71,
589
+ "00000373": 72,
590
+ "00000378": 73,
591
+ "00000382": 74,
592
+ "00000383": 75,
593
+ "00000384": 76,
594
+ "00000399": 77,
595
+ "00000406": 78,
596
+ "00000418": 79,
597
+ "00000423": 80,
598
+ "00000431": 81,
599
+ "00000437": 82,
600
+ "00000441": 83,
601
+ "00000443": 84,
602
+ "00000459": 85,
603
+ "00000462": 86,
604
+ "00000463": 87,
605
+ "00000477": 88,
606
+ "00000494": 89,
607
+ "00000500": 90,
608
+ "00000509": 91,
609
+ "00000512": 92,
610
+ "00000515": 93,
611
+ "00000522": 94,
612
+ "00000543": 95,
613
+ "00000550": 96,
614
+ "00000551": 97,
615
+ "00000559": 98,
616
+ "00000561": 99,
617
+ "00000562": 100,
618
+ "00000567": 101,
619
+ "00000568": 102,
620
+ "00000576": 103,
621
+ "00000582": 104,
622
+ "00000585": 105,
623
+ "00000586": 106,
624
+ "00000587": 107,
625
+ "00000590": 108,
626
+ "00000597": 109,
627
+ "00000608": 110,
628
+ "00000616": 111,
629
+ "00000619": 112,
630
+ "00000626": 113,
631
+ "00000627": 114,
632
+ "00000629": 115,
633
+ "00000637": 116,
634
+ "00000652": 117,
635
+ "00000653": 118,
636
+ "00000654": 119,
637
+ "00000655": 120,
638
+ "00000658": 121,
639
+ "00000660": 122,
640
+ "00000661": 123,
641
+ "00000664": 124,
642
+ "00000683": 125,
643
+ "00000684": 126,
644
+ "00000685": 127,
645
+ "00000699": 128,
646
+ "00000701": 129,
647
+ "00000703": 130,
648
+ "00000718": 131,
649
+ "00000721": 132,
650
+ "00000729": 133,
651
+ "00000732": 134,
652
+ "00000736": 135,
653
+ "00000738": 136,
654
+ "00000746": 137,
655
+ "00000751": 138,
656
+ "00000757": 139,
657
+ "00000763": 140,
658
+ "00000774": 141,
659
+ "00000784": 142,
660
+ "00000785": 143,
661
+ "00000795": 144,
662
+ "00000799": 145,
663
+ "00000809": 146,
664
+ "00000816": 147,
665
+ "00000817": 148,
666
+ "00000820": 149,
667
+ "00000829": 150,
668
+ "00000831": 151,
669
+ "00000834": 152,
670
+ "00000837": 153,
671
+ "00000845": 154,
672
+ "00000849": 155,
673
+ "00000860": 156,
674
+ "00000862": 157,
675
+ "00000863": 158,
676
+ "00000869": 159,
677
+ "00000877": 160,
678
+ "00000882": 161,
679
+ "00000885": 162,
680
+ "00000887": 163,
681
+ "00000890": 164,
682
+ "00000900": 165,
683
+ "00000904": 166,
684
+ "00000913": 167,
685
+ "00000931": 168,
686
+ "00000933": 169,
687
+ "00000936": 170,
688
+ "00000941": 171,
689
+ "00000944": 172,
690
+ "00000946": 173,
691
+ "00000950": 174,
692
+ "00000952": 175,
693
+ "00000954": 176,
694
+ "00000956": 177,
695
+ "00000958": 178,
696
+ "00000966": 179,
697
+ "00000968": 180,
698
+ "00000972": 181,
699
+ "00000975": 182,
700
+ "00000976": 183,
701
+ "00000985": 184,
702
+ "00000988": 185,
703
+ "00000993": 186,
704
+ "00000998": 187,
705
+ "00000999": 188,
706
+ "00001001": 189,
707
+ "00001002": 190,
708
+ "00001003": 191,
709
+ "00001004": 192,
710
+ "00001010": 193,
711
+ "00001013": 194,
712
+ "00001017": 195,
713
+ "00001018": 196,
714
+ "00001019": 197,
715
+ "00001020": 198,
716
+ "00001027": 199,
717
+ "00001028": 200,
718
+ "00001058": 201,
719
+ "00001067": 202,
720
+ "00001069": 203,
721
+ "00001072": 204,
722
+ "00001084": 205,
723
+ "00001093": 206,
724
+ "00001097": 207,
725
+ "00001098": 208,
726
+ "00001099": 209,
727
+ "00001100": 210,
728
+ "00001101": 211,
729
+ "00001105": 212,
730
+ "00001107": 213,
731
+ "00001112": 214,
732
+ "00001118": 215,
733
+ "00001120": 216,
734
+ "00001124": 217,
735
+ "00001128": 218,
736
+ "00001140": 219,
737
+ "00001142": 220,
738
+ "00001143": 221,
739
+ "00001146": 222,
740
+ "00001155": 223,
741
+ "00001157": 224,
742
+ "00001159": 225,
743
+ "00001162": 226,
744
+ "00001164": 227,
745
+ "00001175": 228,
746
+ "00001177": 229,
747
+ "00001178": 230,
748
+ "00001183": 231,
749
+ "00001188": 232,
750
+ "00001191": 233,
751
+ "00001195": 234,
752
+ "00001196": 235,
753
+ "00001206": 236,
754
+ "00001208": 237,
755
+ "00001210": 238,
756
+ "00001211": 239,
757
+ "00001212": 240,
758
+ "00001213": 241,
759
+ "00001214": 242,
760
+ "00001217": 243,
761
+ "00001220": 244,
762
+ "00001227": 245,
763
+ "00001228": 246,
764
+ "00001230": 247,
765
+ "00001232": 248,
766
+ "00001233": 249,
767
+ "00001235": 250,
768
+ "00001236": 251,
769
+ "00001237": 252,
770
+ "00001240": 253,
771
+ "00001243": 254,
772
+ "00001246": 255,
773
+ "00001249": 256,
774
+ "00001251": 257,
775
+ "00001257": 258,
776
+ "00001259": 259,
777
+ "00001262": 260,
778
+ "00001264": 261,
779
+ "00001272": 262,
780
+ "00001276": 263,
781
+ "00001277": 264,
782
+ "00001285": 265,
783
+ "00001301": 266,
784
+ "00001304": 267,
785
+ "00001305": 268,
786
+ "00001307": 269,
787
+ "00001309": 270,
788
+ "00001311": 271,
789
+ "00001317": 272,
790
+ "00001319": 273,
791
+ "00001320": 274,
792
+ "00001321": 275,
793
+ "00001323": 276,
794
+ "00001328": 277,
795
+ "00001333": 278,
796
+ "00001334": 279,
797
+ "00001338": 280,
798
+ "00001340": 281,
799
+ "00001346": 282,
800
+ "00001348": 283,
801
+ "00001354": 284,
802
+ "00001371": 285,
803
+ "00001378": 286,
804
+ "00001381": 287,
805
+ "00001394": 288,
806
+ "00001397": 289,
807
+ "00001398": 290,
808
+ "00001399": 291,
809
+ "00001403": 292,
810
+ "00001406": 293,
811
+ "00001408": 294,
812
+ "00001409": 295,
813
+ "00001410": 296,
814
+ "00001412": 297,
815
+ "00001415": 298,
816
+ "00001422": 299,
817
+ "00001429": 300,
818
+ "00001430": 301,
819
+ "00001431": 302,
820
+ "00001432": 303,
821
+ "00001437": 304,
822
+ "00001445": 305,
823
+ "00001453": 306,
824
+ "00001466": 307,
825
+ "00001467": 308,
826
+ "00001475": 309,
827
+ "00001485": 310,
828
+ "00001487": 311,
829
+ "00001488": 312,
830
+ "00001490": 313,
831
+ "00001491": 314,
832
+ "00001492": 315,
833
+ "00001496": 316,
834
+ "00001498": 317,
835
+ "00001499": 318,
836
+ "00001500": 319,
837
+ "00001502": 320,
838
+ "00001506": 321,
839
+ "00001507": 322,
840
+ "00001508": 323,
841
+ "00001522": 324,
842
+ "00001525": 325,
843
+ "00001535": 326,
844
+ "00001545": 327,
845
+ "00001547": 328,
846
+ "00001549": 329,
847
+ "00001559": 330,
848
+ "00001569": 331,
849
+ "00001579": 332,
850
+ "00001581": 333,
851
+ "00001587": 334,
852
+ "00001597": 335,
853
+ "00001599": 336,
854
+ "00001601": 337,
855
+ "00001605": 338,
856
+ "00001613": 339,
857
+ "00001616": 340,
858
+ "00001621": 341,
859
+ "00001630": 342,
860
+ "00001631": 343,
861
+ "00001633": 344,
862
+ "00001634": 345,
863
+ "00001640": 346,
864
+ "00001642": 347,
865
+ "00001644": 348,
866
+ "00001645": 349,
867
+ "00001646": 350,
868
+ "00001651": 351,
869
+ "00001667": 352,
870
+ "00001675": 353,
871
+ "00001677": 354,
872
+ "00001680": 355,
873
+ "00001691": 356,
874
+ "00001694": 357,
875
+ "00001698": 358,
876
+ "00001703": 359,
877
+ "00001705": 360,
878
+ "00001706": 361,
879
+ "00001712": 362,
880
+ "00001714": 363,
881
+ "00001718": 364,
882
+ "00001720": 365,
883
+ "00001726": 366,
884
+ "00001728": 367,
885
+ "00001729": 368,
886
+ "00001738": 369,
887
+ "00001740": 370,
888
+ "00001745": 371,
889
+ "00001751": 372,
890
+ "00001752": 373,
891
+ "00001777": 374,
892
+ "00001779": 375,
893
+ "00001783": 376,
894
+ "00001790": 377,
895
+ "00001796": 378,
896
+ "00001801": 379,
897
+ "00001807": 380,
898
+ "00001821": 381,
899
+ "00001828": 382,
900
+ "00001829": 383,
901
+ "00001832": 384,
902
+ "00001834": 385,
903
+ "00001836": 386,
904
+ "00001840": 387,
905
+ "00001844": 388,
906
+ "00001873": 389,
907
+ "00001892": 390,
908
+ "00001893": 391,
909
+ "00001899": 392,
910
+ "00001907": 393,
911
+ "00001909": 394,
912
+ "00001913": 395,
913
+ "00001914": 396,
914
+ "00001916": 397,
915
+ "00001917": 398,
916
+ "00001926": 399,
917
+ "00001929": 400,
918
+ "00001964": 401,
919
+ "00001966": 402,
920
+ "00001970": 403,
921
+ "00001973": 404,
922
+ "00001977": 405,
923
+ "00001979": 406,
924
+ "00001980": 407,
925
+ "00001984": 408,
926
+ "00001988": 409,
927
+ "00001989": 410,
928
+ "00001998": 411,
929
+ "00002000": 412,
930
+ "00002006": 413,
931
+ "00002009": 414,
932
+ "00002011": 415,
933
+ "00002016": 416,
934
+ "00002031": 417,
935
+ "00002033": 418,
936
+ "00002039": 419,
937
+ "00002040": 420,
938
+ "00002047": 421,
939
+ "00002056": 422,
940
+ "00002058": 423,
941
+ "00002063": 424,
942
+ "00002068": 425,
943
+ "00002071": 426,
944
+ "00002077": 427,
945
+ "00002087": 428,
946
+ "00002093": 429,
947
+ "00002098": 430,
948
+ "00002108": 431,
949
+ "00002109": 432,
950
+ "00002114": 433,
951
+ "00002119": 434,
952
+ "00002121": 435,
953
+ "00002126": 436,
954
+ "00002129": 437,
955
+ "00002136": 438,
956
+ "00002137": 439,
957
+ "00002138": 440,
958
+ "00002146": 441,
959
+ "00002154": 442,
960
+ "00002159": 443,
961
+ "00002161": 444,
962
+ "00002162": 445,
963
+ "00002173": 446,
964
+ "00002178": 447,
965
+ "00002183": 448,
966
+ "00002185": 449,
967
+ "00002195": 450,
968
+ "00002197": 451,
969
+ "00002200": 452,
970
+ "00002202": 453,
971
+ "00002203": 454,
972
+ "00002208": 455,
973
+ "00002209": 456,
974
+ "00002210": 457,
975
+ "00002214": 458,
976
+ "00002225": 459,
977
+ "00002227": 460,
978
+ "00002231": 461,
979
+ "00002235": 462,
980
+ "00002236": 463,
981
+ "00002241": 464,
982
+ "00002243": 465,
983
+ "00002254": 466,
984
+ "00002256": 467,
985
+ "00002257": 468,
986
+ "00002259": 469,
987
+ "00002261": 470,
988
+ "00002263": 471,
989
+ "00002267": 472,
990
+ "00002270": 473,
991
+ "00002276": 474,
992
+ "00002278": 475,
993
+ "00002316": 476,
994
+ "00002320": 477,
995
+ "00002326": 478,
996
+ "00002328": 479,
997
+ "00002331": 480,
998
+ "00002332": 481,
999
+ "00002334": 482,
1000
+ "00002337": 483,
1001
+ "00002341": 484,
1002
+ "00002352": 485,
1003
+ "00002356": 486,
1004
+ "00002365": 487,
1005
+ "00002366": 488,
1006
+ "00002371": 489,
1007
+ "00002378": 490,
1008
+ "00002379": 491,
1009
+ "00002381": 492,
1010
+ "00002387": 493,
1011
+ "00002393": 494,
1012
+ "00002398": 495,
1013
+ "00002399": 496,
1014
+ "00002405": 497,
1015
+ "00002406": 498,
1016
+ "00002414": 499
1017
+ },
1018
+ "layer_norm_eps": 1e-06,
1019
+ "layerscale_value": 1.0,
1020
+ "mlp_ratio": 4,
1021
+ "model_type": "dinov2",
1022
+ "num_attention_heads": 12,
1023
+ "num_channels": 3,
1024
+ "num_hidden_layers": 12,
1025
+ "out_features": [
1026
+ "stage12"
1027
+ ],
1028
+ "out_indices": [
1029
+ 12
1030
+ ],
1031
+ "patch_size": 14,
1032
+ "problem_type": "single_label_classification",
1033
+ "qkv_bias": true,
1034
+ "reshape_hidden_states": true,
1035
+ "stage_names": [
1036
+ "stem",
1037
+ "stage1",
1038
+ "stage2",
1039
+ "stage3",
1040
+ "stage4",
1041
+ "stage5",
1042
+ "stage6",
1043
+ "stage7",
1044
+ "stage8",
1045
+ "stage9",
1046
+ "stage10",
1047
+ "stage11",
1048
+ "stage12"
1049
+ ],
1050
+ "torch_dtype": "float32",
1051
+ "transformers_version": "4.38.2",
1052
+ "use_swiglu_ffn": false
1053
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.6914285714285714,
4
+ "eval_loss": 1.726584792137146,
5
+ "eval_runtime": 14.2787,
6
+ "eval_samples_per_second": 49.024,
7
+ "eval_steps_per_second": 0.77
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "total_flos": 5.2828663104e+18,
4
+ "train_loss": 0.7520242673994014,
5
+ "train_runtime": 3943.4049,
6
+ "train_samples_per_second": 21.301,
7
+ "train_steps_per_second": 0.084
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6914285714285714,
3
+ "best_model_checkpoint": "dinov2-base-finetuned-eurosat/checkpoint-308",
4
+ "epoch": 30.0,
5
+ "eval_steps": 500,
6
+ "global_step": 330,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.91,
13
+ "grad_norm": 71.39833068847656,
14
+ "learning_rate": 1.5151515151515153e-05,
15
+ "loss": 6.646,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.004285714285714286,
21
+ "eval_loss": 6.344563961029053,
22
+ "eval_runtime": 12.2748,
23
+ "eval_samples_per_second": 57.027,
24
+ "eval_steps_per_second": 0.896,
25
+ "step": 11
26
+ },
27
+ {
28
+ "epoch": 1.82,
29
+ "grad_norm": 36.275108337402344,
30
+ "learning_rate": 3.0303030303030306e-05,
31
+ "loss": 6.0586,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.037142857142857144,
37
+ "eval_loss": 5.812839031219482,
38
+ "eval_runtime": 11.6948,
39
+ "eval_samples_per_second": 59.856,
40
+ "eval_steps_per_second": 0.941,
41
+ "step": 22
42
+ },
43
+ {
44
+ "epoch": 2.73,
45
+ "grad_norm": 78.4278564453125,
46
+ "learning_rate": 4.545454545454546e-05,
47
+ "loss": 4.9553,
48
+ "step": 30
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.24285714285714285,
53
+ "eval_loss": 4.52340030670166,
54
+ "eval_runtime": 11.5613,
55
+ "eval_samples_per_second": 60.547,
56
+ "eval_steps_per_second": 0.951,
57
+ "step": 33
58
+ },
59
+ {
60
+ "epoch": 3.64,
61
+ "grad_norm": 94.20513153076172,
62
+ "learning_rate": 4.882154882154882e-05,
63
+ "loss": 3.2097,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_accuracy": 0.48428571428571426,
69
+ "eval_loss": 3.1874964237213135,
70
+ "eval_runtime": 11.6294,
71
+ "eval_samples_per_second": 60.192,
72
+ "eval_steps_per_second": 0.946,
73
+ "step": 44
74
+ },
75
+ {
76
+ "epoch": 4.55,
77
+ "grad_norm": 55.16205596923828,
78
+ "learning_rate": 4.713804713804714e-05,
79
+ "loss": 1.6208,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_accuracy": 0.5957142857142858,
85
+ "eval_loss": 2.3652451038360596,
86
+ "eval_runtime": 11.6572,
87
+ "eval_samples_per_second": 60.048,
88
+ "eval_steps_per_second": 0.944,
89
+ "step": 55
90
+ },
91
+ {
92
+ "epoch": 5.45,
93
+ "grad_norm": 28.252750396728516,
94
+ "learning_rate": 4.545454545454546e-05,
95
+ "loss": 0.7822,
96
+ "step": 60
97
+ },
98
+ {
99
+ "epoch": 6.0,
100
+ "eval_accuracy": 0.6485714285714286,
101
+ "eval_loss": 2.007438898086548,
102
+ "eval_runtime": 11.7326,
103
+ "eval_samples_per_second": 59.663,
104
+ "eval_steps_per_second": 0.938,
105
+ "step": 66
106
+ },
107
+ {
108
+ "epoch": 6.36,
109
+ "grad_norm": 17.972673416137695,
110
+ "learning_rate": 4.3771043771043774e-05,
111
+ "loss": 0.3699,
112
+ "step": 70
113
+ },
114
+ {
115
+ "epoch": 7.0,
116
+ "eval_accuracy": 0.66,
117
+ "eval_loss": 1.9254851341247559,
118
+ "eval_runtime": 11.7512,
119
+ "eval_samples_per_second": 59.569,
120
+ "eval_steps_per_second": 0.936,
121
+ "step": 77
122
+ },
123
+ {
124
+ "epoch": 7.27,
125
+ "grad_norm": 21.875259399414062,
126
+ "learning_rate": 4.208754208754209e-05,
127
+ "loss": 0.1745,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.6557142857142857,
133
+ "eval_loss": 1.865968942642212,
134
+ "eval_runtime": 11.609,
135
+ "eval_samples_per_second": 60.298,
136
+ "eval_steps_per_second": 0.948,
137
+ "step": 88
138
+ },
139
+ {
140
+ "epoch": 8.18,
141
+ "grad_norm": 13.34464168548584,
142
+ "learning_rate": 4.0404040404040405e-05,
143
+ "loss": 0.1285,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "eval_accuracy": 0.6571428571428571,
149
+ "eval_loss": 1.8786249160766602,
150
+ "eval_runtime": 11.6967,
151
+ "eval_samples_per_second": 59.846,
152
+ "eval_steps_per_second": 0.94,
153
+ "step": 99
154
+ },
155
+ {
156
+ "epoch": 9.09,
157
+ "grad_norm": 11.297475814819336,
158
+ "learning_rate": 3.872053872053872e-05,
159
+ "loss": 0.1178,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "grad_norm": 16.153575897216797,
165
+ "learning_rate": 3.7037037037037037e-05,
166
+ "loss": 0.0883,
167
+ "step": 110
168
+ },
169
+ {
170
+ "epoch": 10.0,
171
+ "eval_accuracy": 0.6585714285714286,
172
+ "eval_loss": 1.8617857694625854,
173
+ "eval_runtime": 11.6432,
174
+ "eval_samples_per_second": 60.121,
175
+ "eval_steps_per_second": 0.945,
176
+ "step": 110
177
+ },
178
+ {
179
+ "epoch": 10.91,
180
+ "grad_norm": 13.22706127166748,
181
+ "learning_rate": 3.535353535353535e-05,
182
+ "loss": 0.0721,
183
+ "step": 120
184
+ },
185
+ {
186
+ "epoch": 11.0,
187
+ "eval_accuracy": 0.6514285714285715,
188
+ "eval_loss": 1.9431724548339844,
189
+ "eval_runtime": 12.6039,
190
+ "eval_samples_per_second": 55.538,
191
+ "eval_steps_per_second": 0.873,
192
+ "step": 121
193
+ },
194
+ {
195
+ "epoch": 11.82,
196
+ "grad_norm": 8.195013046264648,
197
+ "learning_rate": 3.3670033670033675e-05,
198
+ "loss": 0.0693,
199
+ "step": 130
200
+ },
201
+ {
202
+ "epoch": 12.0,
203
+ "eval_accuracy": 0.6642857142857143,
204
+ "eval_loss": 1.873042345046997,
205
+ "eval_runtime": 11.7524,
206
+ "eval_samples_per_second": 59.562,
207
+ "eval_steps_per_second": 0.936,
208
+ "step": 132
209
+ },
210
+ {
211
+ "epoch": 12.73,
212
+ "grad_norm": 9.13159465789795,
213
+ "learning_rate": 3.198653198653199e-05,
214
+ "loss": 0.0901,
215
+ "step": 140
216
+ },
217
+ {
218
+ "epoch": 13.0,
219
+ "eval_accuracy": 0.6557142857142857,
220
+ "eval_loss": 1.8676621913909912,
221
+ "eval_runtime": 11.7011,
222
+ "eval_samples_per_second": 59.823,
223
+ "eval_steps_per_second": 0.94,
224
+ "step": 143
225
+ },
226
+ {
227
+ "epoch": 13.64,
228
+ "grad_norm": 5.170494556427002,
229
+ "learning_rate": 3.0303030303030306e-05,
230
+ "loss": 0.0608,
231
+ "step": 150
232
+ },
233
+ {
234
+ "epoch": 14.0,
235
+ "eval_accuracy": 0.6757142857142857,
236
+ "eval_loss": 1.846497654914856,
237
+ "eval_runtime": 12.5066,
238
+ "eval_samples_per_second": 55.971,
239
+ "eval_steps_per_second": 0.88,
240
+ "step": 154
241
+ },
242
+ {
243
+ "epoch": 14.55,
244
+ "grad_norm": 4.774472713470459,
245
+ "learning_rate": 2.8619528619528618e-05,
246
+ "loss": 0.0443,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 15.0,
251
+ "eval_accuracy": 0.6642857142857143,
252
+ "eval_loss": 1.8421980142593384,
253
+ "eval_runtime": 11.808,
254
+ "eval_samples_per_second": 59.282,
255
+ "eval_steps_per_second": 0.932,
256
+ "step": 165
257
+ },
258
+ {
259
+ "epoch": 15.45,
260
+ "grad_norm": 2.623682737350464,
261
+ "learning_rate": 2.6936026936026937e-05,
262
+ "loss": 0.0552,
263
+ "step": 170
264
+ },
265
+ {
266
+ "epoch": 16.0,
267
+ "eval_accuracy": 0.6585714285714286,
268
+ "eval_loss": 1.9717400074005127,
269
+ "eval_runtime": 11.7743,
270
+ "eval_samples_per_second": 59.451,
271
+ "eval_steps_per_second": 0.934,
272
+ "step": 176
273
+ },
274
+ {
275
+ "epoch": 16.36,
276
+ "grad_norm": 3.4440066814422607,
277
+ "learning_rate": 2.5252525252525256e-05,
278
+ "loss": 0.0416,
279
+ "step": 180
280
+ },
281
+ {
282
+ "epoch": 17.0,
283
+ "eval_accuracy": 0.6657142857142857,
284
+ "eval_loss": 1.8076777458190918,
285
+ "eval_runtime": 11.7226,
286
+ "eval_samples_per_second": 59.714,
287
+ "eval_steps_per_second": 0.938,
288
+ "step": 187
289
+ },
290
+ {
291
+ "epoch": 17.27,
292
+ "grad_norm": 8.230661392211914,
293
+ "learning_rate": 2.356902356902357e-05,
294
+ "loss": 0.0366,
295
+ "step": 190
296
+ },
297
+ {
298
+ "epoch": 18.0,
299
+ "eval_accuracy": 0.6742857142857143,
300
+ "eval_loss": 1.8198397159576416,
301
+ "eval_runtime": 11.6594,
302
+ "eval_samples_per_second": 60.037,
303
+ "eval_steps_per_second": 0.943,
304
+ "step": 198
305
+ },
306
+ {
307
+ "epoch": 18.18,
308
+ "grad_norm": 3.6574606895446777,
309
+ "learning_rate": 2.1885521885521887e-05,
310
+ "loss": 0.0313,
311
+ "step": 200
312
+ },
313
+ {
314
+ "epoch": 19.0,
315
+ "eval_accuracy": 0.6757142857142857,
316
+ "eval_loss": 1.8081269264221191,
317
+ "eval_runtime": 11.957,
318
+ "eval_samples_per_second": 58.543,
319
+ "eval_steps_per_second": 0.92,
320
+ "step": 209
321
+ },
322
+ {
323
+ "epoch": 19.09,
324
+ "grad_norm": 4.515919208526611,
325
+ "learning_rate": 2.0202020202020203e-05,
326
+ "loss": 0.0272,
327
+ "step": 210
328
+ },
329
+ {
330
+ "epoch": 20.0,
331
+ "grad_norm": 4.542725086212158,
332
+ "learning_rate": 1.8518518518518518e-05,
333
+ "loss": 0.0296,
334
+ "step": 220
335
+ },
336
+ {
337
+ "epoch": 20.0,
338
+ "eval_accuracy": 0.6785714285714286,
339
+ "eval_loss": 1.776505947113037,
340
+ "eval_runtime": 11.5903,
341
+ "eval_samples_per_second": 60.395,
342
+ "eval_steps_per_second": 0.949,
343
+ "step": 220
344
+ },
345
+ {
346
+ "epoch": 20.91,
347
+ "grad_norm": 2.6347365379333496,
348
+ "learning_rate": 1.6835016835016837e-05,
349
+ "loss": 0.0215,
350
+ "step": 230
351
+ },
352
+ {
353
+ "epoch": 21.0,
354
+ "eval_accuracy": 0.6828571428571428,
355
+ "eval_loss": 1.6916331052780151,
356
+ "eval_runtime": 13.3341,
357
+ "eval_samples_per_second": 52.497,
358
+ "eval_steps_per_second": 0.825,
359
+ "step": 231
360
+ },
361
+ {
362
+ "epoch": 21.82,
363
+ "grad_norm": 0.4444705545902252,
364
+ "learning_rate": 1.5151515151515153e-05,
365
+ "loss": 0.0144,
366
+ "step": 240
367
+ },
368
+ {
369
+ "epoch": 22.0,
370
+ "eval_accuracy": 0.6857142857142857,
371
+ "eval_loss": 1.7237095832824707,
372
+ "eval_runtime": 11.4907,
373
+ "eval_samples_per_second": 60.919,
374
+ "eval_steps_per_second": 0.957,
375
+ "step": 242
376
+ },
377
+ {
378
+ "epoch": 22.73,
379
+ "grad_norm": 4.013304710388184,
380
+ "learning_rate": 1.3468013468013468e-05,
381
+ "loss": 0.0108,
382
+ "step": 250
383
+ },
384
+ {
385
+ "epoch": 23.0,
386
+ "eval_accuracy": 0.67,
387
+ "eval_loss": 1.792176365852356,
388
+ "eval_runtime": 11.5859,
389
+ "eval_samples_per_second": 60.418,
390
+ "eval_steps_per_second": 0.949,
391
+ "step": 253
392
+ },
393
+ {
394
+ "epoch": 23.64,
395
+ "grad_norm": 0.9613437056541443,
396
+ "learning_rate": 1.1784511784511786e-05,
397
+ "loss": 0.0232,
398
+ "step": 260
399
+ },
400
+ {
401
+ "epoch": 24.0,
402
+ "eval_accuracy": 0.6828571428571428,
403
+ "eval_loss": 1.7594307661056519,
404
+ "eval_runtime": 12.8499,
405
+ "eval_samples_per_second": 54.475,
406
+ "eval_steps_per_second": 0.856,
407
+ "step": 264
408
+ },
409
+ {
410
+ "epoch": 24.55,
411
+ "grad_norm": 2.5503318309783936,
412
+ "learning_rate": 1.0101010101010101e-05,
413
+ "loss": 0.0129,
414
+ "step": 270
415
+ },
416
+ {
417
+ "epoch": 25.0,
418
+ "eval_accuracy": 0.6828571428571428,
419
+ "eval_loss": 1.7361136674880981,
420
+ "eval_runtime": 11.7158,
421
+ "eval_samples_per_second": 59.749,
422
+ "eval_steps_per_second": 0.939,
423
+ "step": 275
424
+ },
425
+ {
426
+ "epoch": 25.45,
427
+ "grad_norm": 5.675755977630615,
428
+ "learning_rate": 8.417508417508419e-06,
429
+ "loss": 0.0093,
430
+ "step": 280
431
+ },
432
+ {
433
+ "epoch": 26.0,
434
+ "eval_accuracy": 0.6828571428571428,
435
+ "eval_loss": 1.7426681518554688,
436
+ "eval_runtime": 12.593,
437
+ "eval_samples_per_second": 55.586,
438
+ "eval_steps_per_second": 0.873,
439
+ "step": 286
440
+ },
441
+ {
442
+ "epoch": 26.36,
443
+ "grad_norm": 2.090123176574707,
444
+ "learning_rate": 6.734006734006734e-06,
445
+ "loss": 0.0067,
446
+ "step": 290
447
+ },
448
+ {
449
+ "epoch": 27.0,
450
+ "eval_accuracy": 0.69,
451
+ "eval_loss": 1.730440378189087,
452
+ "eval_runtime": 11.8655,
453
+ "eval_samples_per_second": 58.995,
454
+ "eval_steps_per_second": 0.927,
455
+ "step": 297
456
+ },
457
+ {
458
+ "epoch": 27.27,
459
+ "grad_norm": 0.6074270009994507,
460
+ "learning_rate": 5.050505050505051e-06,
461
+ "loss": 0.0013,
462
+ "step": 300
463
+ },
464
+ {
465
+ "epoch": 28.0,
466
+ "eval_accuracy": 0.6914285714285714,
467
+ "eval_loss": 1.726584792137146,
468
+ "eval_runtime": 11.8751,
469
+ "eval_samples_per_second": 58.947,
470
+ "eval_steps_per_second": 0.926,
471
+ "step": 308
472
+ },
473
+ {
474
+ "epoch": 28.18,
475
+ "grad_norm": 0.04077678918838501,
476
+ "learning_rate": 3.367003367003367e-06,
477
+ "loss": 0.0031,
478
+ "step": 310
479
+ },
480
+ {
481
+ "epoch": 29.0,
482
+ "eval_accuracy": 0.69,
483
+ "eval_loss": 1.7368921041488647,
484
+ "eval_runtime": 11.7621,
485
+ "eval_samples_per_second": 59.513,
486
+ "eval_steps_per_second": 0.935,
487
+ "step": 319
488
+ },
489
+ {
490
+ "epoch": 29.09,
491
+ "grad_norm": 0.32179221510887146,
492
+ "learning_rate": 1.6835016835016836e-06,
493
+ "loss": 0.002,
494
+ "step": 320
495
+ },
496
+ {
497
+ "epoch": 30.0,
498
+ "grad_norm": 0.05003494769334793,
499
+ "learning_rate": 0.0,
500
+ "loss": 0.0019,
501
+ "step": 330
502
+ },
503
+ {
504
+ "epoch": 30.0,
505
+ "eval_accuracy": 0.69,
506
+ "eval_loss": 1.7391921281814575,
507
+ "eval_runtime": 12.8063,
508
+ "eval_samples_per_second": 54.661,
509
+ "eval_steps_per_second": 0.859,
510
+ "step": 330
511
+ },
512
+ {
513
+ "epoch": 30.0,
514
+ "step": 330,
515
+ "total_flos": 5.2828663104e+18,
516
+ "train_loss": 0.7520242673994014,
517
+ "train_runtime": 3943.4049,
518
+ "train_samples_per_second": 21.301,
519
+ "train_steps_per_second": 0.084
520
+ }
521
+ ],
522
+ "logging_steps": 10,
523
+ "max_steps": 330,
524
+ "num_input_tokens_seen": 0,
525
+ "num_train_epochs": 30,
526
+ "save_steps": 500,
527
+ "total_flos": 5.2828663104e+18,
528
+ "train_batch_size": 64,
529
+ "trial_name": null,
530
+ "trial_params": null
531
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727c17ab3fa4cb2a69eceaf858e280e7294ea5a1c3007c8c8d83f3d538762f85
3
+ size 4920