laszlokiss27 commited on
Commit
5bd74a2
1 Parent(s): cf3e670

Upload 8 files

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.774412,
4
+ "eval_loss": 0.8919877409934998,
5
+ "eval_runtime": 138.4801,
6
+ "eval_samples_per_second": 1805.314,
7
+ "eval_steps_per_second": 7.055,
8
+ "total_flos": 1.93274424e+18,
9
+ "train_loss": 0.9357909288237652,
10
+ "train_runtime": 45635.435,
11
+ "train_samples_per_second": 493.038,
12
+ "train_steps_per_second": 1.926
13
+ }
config.json ADDED
@@ -0,0 +1,737 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "apple/mobilevitv2-1.0-imagenet1k-256",
3
+ "architectures": [
4
+ "MobileViTV2ForImageClassification"
5
+ ],
6
+ "aspp_dropout_prob": 0.1,
7
+ "aspp_out_channels": 512,
8
+ "atrous_rates": [
9
+ 6,
10
+ 12,
11
+ 18
12
+ ],
13
+ "attn_dropout": 0.0,
14
+ "base_attn_unit_dims": [
15
+ 128,
16
+ 192,
17
+ 256
18
+ ],
19
+ "classifier_dropout_prob": 0.1,
20
+ "conv_kernel_size": 3,
21
+ "expand_ratio": 2.0,
22
+ "ffn_dropout": 0.0,
23
+ "ffn_multiplier": 2,
24
+ "hidden_act": "swish",
25
+ "id2label": {
26
+ "0": "aircraft carrier",
27
+ "1": "airplane",
28
+ "10": "asparagus",
29
+ "100": "dumbbell",
30
+ "101": "ear",
31
+ "102": "elbow",
32
+ "103": "elephant",
33
+ "104": "envelope",
34
+ "105": "eraser",
35
+ "106": "eye",
36
+ "107": "eyeglasses",
37
+ "108": "face",
38
+ "109": "fan",
39
+ "11": "axe",
40
+ "110": "feather",
41
+ "111": "fence",
42
+ "112": "finger",
43
+ "113": "fire hydrant",
44
+ "114": "fireplace",
45
+ "115": "firetruck",
46
+ "116": "fish",
47
+ "117": "flamingo",
48
+ "118": "flashlight",
49
+ "119": "flip flops",
50
+ "12": "backpack",
51
+ "120": "floor lamp",
52
+ "121": "flower",
53
+ "122": "flying saucer",
54
+ "123": "foot",
55
+ "124": "fork",
56
+ "125": "frog",
57
+ "126": "frying pan",
58
+ "127": "garden hose",
59
+ "128": "garden",
60
+ "129": "giraffe",
61
+ "13": "banana",
62
+ "130": "goatee",
63
+ "131": "golf club",
64
+ "132": "grapes",
65
+ "133": "grass",
66
+ "134": "guitar",
67
+ "135": "hamburger",
68
+ "136": "hammer",
69
+ "137": "hand",
70
+ "138": "harp",
71
+ "139": "hat",
72
+ "14": "bandage",
73
+ "140": "headphones",
74
+ "141": "hedgehog",
75
+ "142": "helicopter",
76
+ "143": "helmet",
77
+ "144": "hexagon",
78
+ "145": "hockey puck",
79
+ "146": "hockey stick",
80
+ "147": "horse",
81
+ "148": "hospital",
82
+ "149": "hot air balloon",
83
+ "15": "barn",
84
+ "150": "hot dog",
85
+ "151": "hot tub",
86
+ "152": "hourglass",
87
+ "153": "house plant",
88
+ "154": "house",
89
+ "155": "hurricane",
90
+ "156": "ice cream",
91
+ "157": "jacket",
92
+ "158": "jail",
93
+ "159": "kangaroo",
94
+ "16": "baseball bat",
95
+ "160": "key",
96
+ "161": "keyboard",
97
+ "162": "knee",
98
+ "163": "knife",
99
+ "164": "ladder",
100
+ "165": "lantern",
101
+ "166": "laptop",
102
+ "167": "leaf",
103
+ "168": "leg",
104
+ "169": "light bulb",
105
+ "17": "baseball",
106
+ "170": "lighter",
107
+ "171": "lighthouse",
108
+ "172": "lightning",
109
+ "173": "line",
110
+ "174": "lion",
111
+ "175": "lipstick",
112
+ "176": "lobster",
113
+ "177": "lollipop",
114
+ "178": "mailbox",
115
+ "179": "map",
116
+ "18": "basket",
117
+ "180": "marker",
118
+ "181": "matches",
119
+ "182": "megaphone",
120
+ "183": "mermaid",
121
+ "184": "microphone",
122
+ "185": "microwave",
123
+ "186": "monkey",
124
+ "187": "moon",
125
+ "188": "mosquito",
126
+ "189": "motorbike",
127
+ "19": "basketball",
128
+ "190": "mountain",
129
+ "191": "mouse",
130
+ "192": "moustache",
131
+ "193": "mouth",
132
+ "194": "mug",
133
+ "195": "mushroom",
134
+ "196": "nail",
135
+ "197": "necklace",
136
+ "198": "nose",
137
+ "199": "ocean",
138
+ "2": "alarm clock",
139
+ "20": "bat",
140
+ "200": "octagon",
141
+ "201": "octopus",
142
+ "202": "onion",
143
+ "203": "oven",
144
+ "204": "owl",
145
+ "205": "paint can",
146
+ "206": "paintbrush",
147
+ "207": "palm tree",
148
+ "208": "panda",
149
+ "209": "pants",
150
+ "21": "bathtub",
151
+ "210": "paper clip",
152
+ "211": "parachute",
153
+ "212": "parrot",
154
+ "213": "passport",
155
+ "214": "peanut",
156
+ "215": "pear",
157
+ "216": "peas",
158
+ "217": "pencil",
159
+ "218": "penguin",
160
+ "219": "piano",
161
+ "22": "beach",
162
+ "220": "pickup truck",
163
+ "221": "picture frame",
164
+ "222": "pig",
165
+ "223": "pillow",
166
+ "224": "pineapple",
167
+ "225": "pizza",
168
+ "226": "pliers",
169
+ "227": "police car",
170
+ "228": "pond",
171
+ "229": "pool",
172
+ "23": "bear",
173
+ "230": "popsicle",
174
+ "231": "postcard",
175
+ "232": "potato",
176
+ "233": "power outlet",
177
+ "234": "purse",
178
+ "235": "rabbit",
179
+ "236": "raccoon",
180
+ "237": "radio",
181
+ "238": "rain",
182
+ "239": "rainbow",
183
+ "24": "beard",
184
+ "240": "rake",
185
+ "241": "remote control",
186
+ "242": "rhinoceros",
187
+ "243": "rifle",
188
+ "244": "river",
189
+ "245": "roller coaster",
190
+ "246": "rollerskates",
191
+ "247": "sailboat",
192
+ "248": "sandwich",
193
+ "249": "saw",
194
+ "25": "bed",
195
+ "250": "saxophone",
196
+ "251": "school bus",
197
+ "252": "scissors",
198
+ "253": "scorpion",
199
+ "254": "screwdriver",
200
+ "255": "sea turtle",
201
+ "256": "see saw",
202
+ "257": "shark",
203
+ "258": "sheep",
204
+ "259": "shoe",
205
+ "26": "bee",
206
+ "260": "shorts",
207
+ "261": "shovel",
208
+ "262": "sink",
209
+ "263": "skateboard",
210
+ "264": "skull",
211
+ "265": "skyscraper",
212
+ "266": "sleeping bag",
213
+ "267": "smiley face",
214
+ "268": "snail",
215
+ "269": "snake",
216
+ "27": "belt",
217
+ "270": "snorkel",
218
+ "271": "snowflake",
219
+ "272": "snowman",
220
+ "273": "soccer ball",
221
+ "274": "sock",
222
+ "275": "speedboat",
223
+ "276": "spider",
224
+ "277": "spoon",
225
+ "278": "spreadsheet",
226
+ "279": "square",
227
+ "28": "bench",
228
+ "280": "squiggle",
229
+ "281": "squirrel",
230
+ "282": "stairs",
231
+ "283": "star",
232
+ "284": "steak",
233
+ "285": "stereo",
234
+ "286": "stethoscope",
235
+ "287": "stitches",
236
+ "288": "stop sign",
237
+ "289": "stove",
238
+ "29": "bicycle",
239
+ "290": "strawberry",
240
+ "291": "streetlight",
241
+ "292": "string bean",
242
+ "293": "submarine",
243
+ "294": "suitcase",
244
+ "295": "sun",
245
+ "296": "swan",
246
+ "297": "sweater",
247
+ "298": "swing set",
248
+ "299": "sword",
249
+ "3": "ambulance",
250
+ "30": "binoculars",
251
+ "300": "syringe",
252
+ "301": "t-shirt",
253
+ "302": "table",
254
+ "303": "teapot",
255
+ "304": "teddy-bear",
256
+ "305": "telephone",
257
+ "306": "television",
258
+ "307": "tennis racquet",
259
+ "308": "tent",
260
+ "309": "The Eiffel Tower",
261
+ "31": "bird",
262
+ "310": "The Great Wall of China",
263
+ "311": "The Mona Lisa",
264
+ "312": "tiger",
265
+ "313": "toaster",
266
+ "314": "toe",
267
+ "315": "toilet",
268
+ "316": "tooth",
269
+ "317": "toothbrush",
270
+ "318": "toothpaste",
271
+ "319": "tornado",
272
+ "32": "birthday cake",
273
+ "320": "tractor",
274
+ "321": "traffic light",
275
+ "322": "train",
276
+ "323": "tree",
277
+ "324": "triangle",
278
+ "325": "trombone",
279
+ "326": "truck",
280
+ "327": "trumpet",
281
+ "328": "umbrella",
282
+ "329": "underwear",
283
+ "33": "blackberry",
284
+ "330": "van",
285
+ "331": "vase",
286
+ "332": "violin",
287
+ "333": "washing machine",
288
+ "334": "watermelon",
289
+ "335": "waterslide",
290
+ "336": "whale",
291
+ "337": "wheel",
292
+ "338": "windmill",
293
+ "339": "wine bottle",
294
+ "34": "blueberry",
295
+ "340": "wine glass",
296
+ "341": "wristwatch",
297
+ "342": "yoga",
298
+ "343": "zebra",
299
+ "344": "zigzag",
300
+ "35": "book",
301
+ "36": "boomerang",
302
+ "37": "bottlecap",
303
+ "38": "bowtie",
304
+ "39": "bracelet",
305
+ "4": "angel",
306
+ "40": "brain",
307
+ "41": "bread",
308
+ "42": "bridge",
309
+ "43": "broccoli",
310
+ "44": "broom",
311
+ "45": "bucket",
312
+ "46": "bulldozer",
313
+ "47": "bus",
314
+ "48": "bush",
315
+ "49": "butterfly",
316
+ "5": "animal migration",
317
+ "50": "cactus",
318
+ "51": "cake",
319
+ "52": "calculator",
320
+ "53": "calendar",
321
+ "54": "camel",
322
+ "55": "camera",
323
+ "56": "camouflage",
324
+ "57": "campfire",
325
+ "58": "candle",
326
+ "59": "cannon",
327
+ "6": "ant",
328
+ "60": "canoe",
329
+ "61": "car",
330
+ "62": "carrot",
331
+ "63": "castle",
332
+ "64": "cat",
333
+ "65": "ceiling fan",
334
+ "66": "cell phone",
335
+ "67": "cello",
336
+ "68": "chair",
337
+ "69": "chandelier",
338
+ "7": "anvil",
339
+ "70": "church",
340
+ "71": "circle",
341
+ "72": "clarinet",
342
+ "73": "clock",
343
+ "74": "cloud",
344
+ "75": "coffee cup",
345
+ "76": "compass",
346
+ "77": "computer",
347
+ "78": "cookie",
348
+ "79": "cooler",
349
+ "8": "apple",
350
+ "80": "couch",
351
+ "81": "cow",
352
+ "82": "crab",
353
+ "83": "crayon",
354
+ "84": "crocodile",
355
+ "85": "crown",
356
+ "86": "cruise ship",
357
+ "87": "cup",
358
+ "88": "diamond",
359
+ "89": "dishwasher",
360
+ "9": "arm",
361
+ "90": "diving board",
362
+ "91": "dog",
363
+ "92": "dolphin",
364
+ "93": "donut",
365
+ "94": "door",
366
+ "95": "dragon",
367
+ "96": "dresser",
368
+ "97": "drill",
369
+ "98": "drums",
370
+ "99": "duck"
371
+ },
372
+ "image_size": 64,
373
+ "initializer_range": 0.02,
374
+ "label2id": {
375
+ "The Eiffel Tower": "309",
376
+ "The Great Wall of China": "310",
377
+ "The Mona Lisa": "311",
378
+ "aircraft carrier": "0",
379
+ "airplane": "1",
380
+ "alarm clock": "2",
381
+ "ambulance": "3",
382
+ "angel": "4",
383
+ "animal migration": "5",
384
+ "ant": "6",
385
+ "anvil": "7",
386
+ "apple": "8",
387
+ "arm": "9",
388
+ "asparagus": "10",
389
+ "axe": "11",
390
+ "backpack": "12",
391
+ "banana": "13",
392
+ "bandage": "14",
393
+ "barn": "15",
394
+ "baseball": "17",
395
+ "baseball bat": "16",
396
+ "basket": "18",
397
+ "basketball": "19",
398
+ "bat": "20",
399
+ "bathtub": "21",
400
+ "beach": "22",
401
+ "bear": "23",
402
+ "beard": "24",
403
+ "bed": "25",
404
+ "bee": "26",
405
+ "belt": "27",
406
+ "bench": "28",
407
+ "bicycle": "29",
408
+ "binoculars": "30",
409
+ "bird": "31",
410
+ "birthday cake": "32",
411
+ "blackberry": "33",
412
+ "blueberry": "34",
413
+ "book": "35",
414
+ "boomerang": "36",
415
+ "bottlecap": "37",
416
+ "bowtie": "38",
417
+ "bracelet": "39",
418
+ "brain": "40",
419
+ "bread": "41",
420
+ "bridge": "42",
421
+ "broccoli": "43",
422
+ "broom": "44",
423
+ "bucket": "45",
424
+ "bulldozer": "46",
425
+ "bus": "47",
426
+ "bush": "48",
427
+ "butterfly": "49",
428
+ "cactus": "50",
429
+ "cake": "51",
430
+ "calculator": "52",
431
+ "calendar": "53",
432
+ "camel": "54",
433
+ "camera": "55",
434
+ "camouflage": "56",
435
+ "campfire": "57",
436
+ "candle": "58",
437
+ "cannon": "59",
438
+ "canoe": "60",
439
+ "car": "61",
440
+ "carrot": "62",
441
+ "castle": "63",
442
+ "cat": "64",
443
+ "ceiling fan": "65",
444
+ "cell phone": "66",
445
+ "cello": "67",
446
+ "chair": "68",
447
+ "chandelier": "69",
448
+ "church": "70",
449
+ "circle": "71",
450
+ "clarinet": "72",
451
+ "clock": "73",
452
+ "cloud": "74",
453
+ "coffee cup": "75",
454
+ "compass": "76",
455
+ "computer": "77",
456
+ "cookie": "78",
457
+ "cooler": "79",
458
+ "couch": "80",
459
+ "cow": "81",
460
+ "crab": "82",
461
+ "crayon": "83",
462
+ "crocodile": "84",
463
+ "crown": "85",
464
+ "cruise ship": "86",
465
+ "cup": "87",
466
+ "diamond": "88",
467
+ "dishwasher": "89",
468
+ "diving board": "90",
469
+ "dog": "91",
470
+ "dolphin": "92",
471
+ "donut": "93",
472
+ "door": "94",
473
+ "dragon": "95",
474
+ "dresser": "96",
475
+ "drill": "97",
476
+ "drums": "98",
477
+ "duck": "99",
478
+ "dumbbell": "100",
479
+ "ear": "101",
480
+ "elbow": "102",
481
+ "elephant": "103",
482
+ "envelope": "104",
483
+ "eraser": "105",
484
+ "eye": "106",
485
+ "eyeglasses": "107",
486
+ "face": "108",
487
+ "fan": "109",
488
+ "feather": "110",
489
+ "fence": "111",
490
+ "finger": "112",
491
+ "fire hydrant": "113",
492
+ "fireplace": "114",
493
+ "firetruck": "115",
494
+ "fish": "116",
495
+ "flamingo": "117",
496
+ "flashlight": "118",
497
+ "flip flops": "119",
498
+ "floor lamp": "120",
499
+ "flower": "121",
500
+ "flying saucer": "122",
501
+ "foot": "123",
502
+ "fork": "124",
503
+ "frog": "125",
504
+ "frying pan": "126",
505
+ "garden": "128",
506
+ "garden hose": "127",
507
+ "giraffe": "129",
508
+ "goatee": "130",
509
+ "golf club": "131",
510
+ "grapes": "132",
511
+ "grass": "133",
512
+ "guitar": "134",
513
+ "hamburger": "135",
514
+ "hammer": "136",
515
+ "hand": "137",
516
+ "harp": "138",
517
+ "hat": "139",
518
+ "headphones": "140",
519
+ "hedgehog": "141",
520
+ "helicopter": "142",
521
+ "helmet": "143",
522
+ "hexagon": "144",
523
+ "hockey puck": "145",
524
+ "hockey stick": "146",
525
+ "horse": "147",
526
+ "hospital": "148",
527
+ "hot air balloon": "149",
528
+ "hot dog": "150",
529
+ "hot tub": "151",
530
+ "hourglass": "152",
531
+ "house": "154",
532
+ "house plant": "153",
533
+ "hurricane": "155",
534
+ "ice cream": "156",
535
+ "jacket": "157",
536
+ "jail": "158",
537
+ "kangaroo": "159",
538
+ "key": "160",
539
+ "keyboard": "161",
540
+ "knee": "162",
541
+ "knife": "163",
542
+ "ladder": "164",
543
+ "lantern": "165",
544
+ "laptop": "166",
545
+ "leaf": "167",
546
+ "leg": "168",
547
+ "light bulb": "169",
548
+ "lighter": "170",
549
+ "lighthouse": "171",
550
+ "lightning": "172",
551
+ "line": "173",
552
+ "lion": "174",
553
+ "lipstick": "175",
554
+ "lobster": "176",
555
+ "lollipop": "177",
556
+ "mailbox": "178",
557
+ "map": "179",
558
+ "marker": "180",
559
+ "matches": "181",
560
+ "megaphone": "182",
561
+ "mermaid": "183",
562
+ "microphone": "184",
563
+ "microwave": "185",
564
+ "monkey": "186",
565
+ "moon": "187",
566
+ "mosquito": "188",
567
+ "motorbike": "189",
568
+ "mountain": "190",
569
+ "mouse": "191",
570
+ "moustache": "192",
571
+ "mouth": "193",
572
+ "mug": "194",
573
+ "mushroom": "195",
574
+ "nail": "196",
575
+ "necklace": "197",
576
+ "nose": "198",
577
+ "ocean": "199",
578
+ "octagon": "200",
579
+ "octopus": "201",
580
+ "onion": "202",
581
+ "oven": "203",
582
+ "owl": "204",
583
+ "paint can": "205",
584
+ "paintbrush": "206",
585
+ "palm tree": "207",
586
+ "panda": "208",
587
+ "pants": "209",
588
+ "paper clip": "210",
589
+ "parachute": "211",
590
+ "parrot": "212",
591
+ "passport": "213",
592
+ "peanut": "214",
593
+ "pear": "215",
594
+ "peas": "216",
595
+ "pencil": "217",
596
+ "penguin": "218",
597
+ "piano": "219",
598
+ "pickup truck": "220",
599
+ "picture frame": "221",
600
+ "pig": "222",
601
+ "pillow": "223",
602
+ "pineapple": "224",
603
+ "pizza": "225",
604
+ "pliers": "226",
605
+ "police car": "227",
606
+ "pond": "228",
607
+ "pool": "229",
608
+ "popsicle": "230",
609
+ "postcard": "231",
610
+ "potato": "232",
611
+ "power outlet": "233",
612
+ "purse": "234",
613
+ "rabbit": "235",
614
+ "raccoon": "236",
615
+ "radio": "237",
616
+ "rain": "238",
617
+ "rainbow": "239",
618
+ "rake": "240",
619
+ "remote control": "241",
620
+ "rhinoceros": "242",
621
+ "rifle": "243",
622
+ "river": "244",
623
+ "roller coaster": "245",
624
+ "rollerskates": "246",
625
+ "sailboat": "247",
626
+ "sandwich": "248",
627
+ "saw": "249",
628
+ "saxophone": "250",
629
+ "school bus": "251",
630
+ "scissors": "252",
631
+ "scorpion": "253",
632
+ "screwdriver": "254",
633
+ "sea turtle": "255",
634
+ "see saw": "256",
635
+ "shark": "257",
636
+ "sheep": "258",
637
+ "shoe": "259",
638
+ "shorts": "260",
639
+ "shovel": "261",
640
+ "sink": "262",
641
+ "skateboard": "263",
642
+ "skull": "264",
643
+ "skyscraper": "265",
644
+ "sleeping bag": "266",
645
+ "smiley face": "267",
646
+ "snail": "268",
647
+ "snake": "269",
648
+ "snorkel": "270",
649
+ "snowflake": "271",
650
+ "snowman": "272",
651
+ "soccer ball": "273",
652
+ "sock": "274",
653
+ "speedboat": "275",
654
+ "spider": "276",
655
+ "spoon": "277",
656
+ "spreadsheet": "278",
657
+ "square": "279",
658
+ "squiggle": "280",
659
+ "squirrel": "281",
660
+ "stairs": "282",
661
+ "star": "283",
662
+ "steak": "284",
663
+ "stereo": "285",
664
+ "stethoscope": "286",
665
+ "stitches": "287",
666
+ "stop sign": "288",
667
+ "stove": "289",
668
+ "strawberry": "290",
669
+ "streetlight": "291",
670
+ "string bean": "292",
671
+ "submarine": "293",
672
+ "suitcase": "294",
673
+ "sun": "295",
674
+ "swan": "296",
675
+ "sweater": "297",
676
+ "swing set": "298",
677
+ "sword": "299",
678
+ "syringe": "300",
679
+ "t-shirt": "301",
680
+ "table": "302",
681
+ "teapot": "303",
682
+ "teddy-bear": "304",
683
+ "telephone": "305",
684
+ "television": "306",
685
+ "tennis racquet": "307",
686
+ "tent": "308",
687
+ "tiger": "312",
688
+ "toaster": "313",
689
+ "toe": "314",
690
+ "toilet": "315",
691
+ "tooth": "316",
692
+ "toothbrush": "317",
693
+ "toothpaste": "318",
694
+ "tornado": "319",
695
+ "tractor": "320",
696
+ "traffic light": "321",
697
+ "train": "322",
698
+ "tree": "323",
699
+ "triangle": "324",
700
+ "trombone": "325",
701
+ "truck": "326",
702
+ "trumpet": "327",
703
+ "umbrella": "328",
704
+ "underwear": "329",
705
+ "van": "330",
706
+ "vase": "331",
707
+ "violin": "332",
708
+ "washing machine": "333",
709
+ "watermelon": "334",
710
+ "waterslide": "335",
711
+ "whale": "336",
712
+ "wheel": "337",
713
+ "windmill": "338",
714
+ "wine bottle": "339",
715
+ "wine glass": "340",
716
+ "wristwatch": "341",
717
+ "yoga": "342",
718
+ "zebra": "343",
719
+ "zigzag": "344"
720
+ },
721
+ "layer_norm_eps": 1e-05,
722
+ "mlp_ratio": 2.0,
723
+ "model_type": "mobilevitv2",
724
+ "n_attn_blocks": [
725
+ 2,
726
+ 4,
727
+ 3
728
+ ],
729
+ "num_channels": 1,
730
+ "output_stride": 32,
731
+ "patch_size": 2,
732
+ "problem_type": "single_label_classification",
733
+ "semantic_loss_ignore_index": 255,
734
+ "torch_dtype": "float32",
735
+ "transformers_version": "4.40.0",
736
+ "width_multiplier": 1.0
737
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28184813e695da9074eb277c8000be311e12d53352ec1ed2b6b268532b81b323
3
+ size 18360744
preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "segmentation_maps",
5
+ "do_resize",
6
+ "size",
7
+ "resample",
8
+ "do_rescale",
9
+ "rescale_factor",
10
+ "do_center_crop",
11
+ "crop_size",
12
+ "do_flip_channel_order",
13
+ "return_tensors",
14
+ "data_format",
15
+ "input_data_format"
16
+ ],
17
+ "crop_size": {
18
+ "height": 56,
19
+ "width": 56
20
+ },
21
+ "do_center_crop": true,
22
+ "do_convert_rgb": false,
23
+ "do_flip_channel_order": false,
24
+ "do_rescale": true,
25
+ "do_resize": true,
26
+ "image_processor_type": "MobileViTImageProcessor",
27
+ "resample": 2,
28
+ "rescale_factor": 0.00392156862745098,
29
+ "size": {
30
+ "shortest_edge": 56
31
+ }
32
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.774412,
4
+ "eval_loss": 0.8919877409934998,
5
+ "eval_runtime": 138.4801,
6
+ "eval_samples_per_second": 1805.314,
7
+ "eval_steps_per_second": 7.055
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 1.93274424e+18,
4
+ "train_loss": 0.9357909288237652,
5
+ "train_runtime": 45635.435,
6
+ "train_samples_per_second": 493.038,
7
+ "train_steps_per_second": 1.926
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,792 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 5000,
6
+ "global_step": 87895,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05688605722737357,
13
+ "grad_norm": 2.4062280654907227,
14
+ "learning_rate": 0.0007908982308436202,
15
+ "loss": 2.1918,
16
+ "step": 1000
17
+ },
18
+ {
19
+ "epoch": 0.11377211445474714,
20
+ "grad_norm": 1.431848406791687,
21
+ "learning_rate": 0.0007817964616872405,
22
+ "loss": 1.4818,
23
+ "step": 2000
24
+ },
25
+ {
26
+ "epoch": 0.17065817168212072,
27
+ "grad_norm": 1.5747077465057373,
28
+ "learning_rate": 0.0007726946925308607,
29
+ "loss": 1.3634,
30
+ "step": 3000
31
+ },
32
+ {
33
+ "epoch": 0.22754422890949427,
34
+ "grad_norm": 1.4864206314086914,
35
+ "learning_rate": 0.0007635929233744809,
36
+ "loss": 1.2967,
37
+ "step": 4000
38
+ },
39
+ {
40
+ "epoch": 0.2844302861368678,
41
+ "grad_norm": 1.2000905275344849,
42
+ "learning_rate": 0.0007544911542181011,
43
+ "loss": 1.2574,
44
+ "step": 5000
45
+ },
46
+ {
47
+ "epoch": 0.2844302861368678,
48
+ "eval_accuracy": 0.689128,
49
+ "eval_loss": 1.238457202911377,
50
+ "eval_runtime": 203.0197,
51
+ "eval_samples_per_second": 1231.407,
52
+ "eval_steps_per_second": 4.812,
53
+ "step": 5000
54
+ },
55
+ {
56
+ "epoch": 0.34131634336424144,
57
+ "grad_norm": 1.2910780906677246,
58
+ "learning_rate": 0.0007453893850617214,
59
+ "loss": 1.2181,
60
+ "step": 6000
61
+ },
62
+ {
63
+ "epoch": 0.398202400591615,
64
+ "grad_norm": 1.1383774280548096,
65
+ "learning_rate": 0.0007362876159053416,
66
+ "loss": 1.1863,
67
+ "step": 7000
68
+ },
69
+ {
70
+ "epoch": 0.45508845781898855,
71
+ "grad_norm": 1.135689616203308,
72
+ "learning_rate": 0.0007271858467489618,
73
+ "loss": 1.1653,
74
+ "step": 8000
75
+ },
76
+ {
77
+ "epoch": 0.5119745150463622,
78
+ "grad_norm": 1.1965036392211914,
79
+ "learning_rate": 0.0007180840775925821,
80
+ "loss": 1.147,
81
+ "step": 9000
82
+ },
83
+ {
84
+ "epoch": 0.5688605722737357,
85
+ "grad_norm": 1.0561026334762573,
86
+ "learning_rate": 0.0007089823084362024,
87
+ "loss": 1.1281,
88
+ "step": 10000
89
+ },
90
+ {
91
+ "epoch": 0.5688605722737357,
92
+ "eval_accuracy": 0.715764,
93
+ "eval_loss": 1.1192152500152588,
94
+ "eval_runtime": 128.8781,
95
+ "eval_samples_per_second": 1939.817,
96
+ "eval_steps_per_second": 7.581,
97
+ "step": 10000
98
+ },
99
+ {
100
+ "epoch": 0.6257466295011093,
101
+ "grad_norm": 0.9711835980415344,
102
+ "learning_rate": 0.0006998805392798226,
103
+ "loss": 1.1232,
104
+ "step": 11000
105
+ },
106
+ {
107
+ "epoch": 0.6826326867284829,
108
+ "grad_norm": 0.8913602828979492,
109
+ "learning_rate": 0.0006907787701234428,
110
+ "loss": 1.0988,
111
+ "step": 12000
112
+ },
113
+ {
114
+ "epoch": 0.7395187439558564,
115
+ "grad_norm": 1.092698097229004,
116
+ "learning_rate": 0.000681677000967063,
117
+ "loss": 1.0897,
118
+ "step": 13000
119
+ },
120
+ {
121
+ "epoch": 0.79640480118323,
122
+ "grad_norm": 0.9319038391113281,
123
+ "learning_rate": 0.0006725752318106833,
124
+ "loss": 1.0826,
125
+ "step": 14000
126
+ },
127
+ {
128
+ "epoch": 0.8532908584106036,
129
+ "grad_norm": 1.0223675966262817,
130
+ "learning_rate": 0.0006634734626543035,
131
+ "loss": 1.0698,
132
+ "step": 15000
133
+ },
134
+ {
135
+ "epoch": 0.8532908584106036,
136
+ "eval_accuracy": 0.728676,
137
+ "eval_loss": 1.0653605461120605,
138
+ "eval_runtime": 128.0826,
139
+ "eval_samples_per_second": 1951.866,
140
+ "eval_steps_per_second": 7.628,
141
+ "step": 15000
142
+ },
143
+ {
144
+ "epoch": 0.9101769156379771,
145
+ "grad_norm": 0.8995338678359985,
146
+ "learning_rate": 0.0006543716934979237,
147
+ "loss": 1.0624,
148
+ "step": 16000
149
+ },
150
+ {
151
+ "epoch": 0.9670629728653507,
152
+ "grad_norm": 0.8418471217155457,
153
+ "learning_rate": 0.0006452699243415439,
154
+ "loss": 1.0538,
155
+ "step": 17000
156
+ },
157
+ {
158
+ "epoch": 1.0239490300927243,
159
+ "grad_norm": 1.024624228477478,
160
+ "learning_rate": 0.0006361681551851641,
161
+ "loss": 1.0311,
162
+ "step": 18000
163
+ },
164
+ {
165
+ "epoch": 1.0808350873200978,
166
+ "grad_norm": 0.9130891561508179,
167
+ "learning_rate": 0.0006270663860287844,
168
+ "loss": 0.999,
169
+ "step": 19000
170
+ },
171
+ {
172
+ "epoch": 1.1377211445474713,
173
+ "grad_norm": 0.8896342515945435,
174
+ "learning_rate": 0.0006179646168724045,
175
+ "loss": 1.0,
176
+ "step": 20000
177
+ },
178
+ {
179
+ "epoch": 1.1377211445474713,
180
+ "eval_accuracy": 0.739712,
181
+ "eval_loss": 1.0235533714294434,
182
+ "eval_runtime": 127.2585,
183
+ "eval_samples_per_second": 1964.505,
184
+ "eval_steps_per_second": 7.677,
185
+ "step": 20000
186
+ },
187
+ {
188
+ "epoch": 1.194607201774845,
189
+ "grad_norm": 0.7940112948417664,
190
+ "learning_rate": 0.0006088628477160248,
191
+ "loss": 0.9957,
192
+ "step": 21000
193
+ },
194
+ {
195
+ "epoch": 1.2514932590022185,
196
+ "grad_norm": 0.9015308618545532,
197
+ "learning_rate": 0.000599761078559645,
198
+ "loss": 0.9967,
199
+ "step": 22000
200
+ },
201
+ {
202
+ "epoch": 1.3083793162295922,
203
+ "grad_norm": 0.9106078147888184,
204
+ "learning_rate": 0.0005906593094032653,
205
+ "loss": 0.9939,
206
+ "step": 23000
207
+ },
208
+ {
209
+ "epoch": 1.3652653734569657,
210
+ "grad_norm": 0.9563422203063965,
211
+ "learning_rate": 0.0005815575402468854,
212
+ "loss": 0.9931,
213
+ "step": 24000
214
+ },
215
+ {
216
+ "epoch": 1.4221514306843392,
217
+ "grad_norm": 0.7646272778511047,
218
+ "learning_rate": 0.0005724557710905057,
219
+ "loss": 0.9774,
220
+ "step": 25000
221
+ },
222
+ {
223
+ "epoch": 1.4221514306843392,
224
+ "eval_accuracy": 0.743348,
225
+ "eval_loss": 1.0054922103881836,
226
+ "eval_runtime": 127.7729,
227
+ "eval_samples_per_second": 1956.596,
228
+ "eval_steps_per_second": 7.646,
229
+ "step": 25000
230
+ },
231
+ {
232
+ "epoch": 1.4790374879117127,
233
+ "grad_norm": 0.7779045104980469,
234
+ "learning_rate": 0.000563354001934126,
235
+ "loss": 0.9792,
236
+ "step": 26000
237
+ },
238
+ {
239
+ "epoch": 1.5359235451390862,
240
+ "grad_norm": 0.8506484627723694,
241
+ "learning_rate": 0.0005542522327777463,
242
+ "loss": 0.9778,
243
+ "step": 27000
244
+ },
245
+ {
246
+ "epoch": 1.59280960236646,
247
+ "grad_norm": 0.8443676829338074,
248
+ "learning_rate": 0.0005451504636213664,
249
+ "loss": 0.9715,
250
+ "step": 28000
251
+ },
252
+ {
253
+ "epoch": 1.6496956595938337,
254
+ "grad_norm": 0.9333568215370178,
255
+ "learning_rate": 0.0005360486944649867,
256
+ "loss": 0.9679,
257
+ "step": 29000
258
+ },
259
+ {
260
+ "epoch": 1.7065817168212072,
261
+ "grad_norm": 0.9501623511314392,
262
+ "learning_rate": 0.0005269469253086069,
263
+ "loss": 0.9684,
264
+ "step": 30000
265
+ },
266
+ {
267
+ "epoch": 1.7065817168212072,
268
+ "eval_accuracy": 0.749276,
269
+ "eval_loss": 0.9812818765640259,
270
+ "eval_runtime": 128.5758,
271
+ "eval_samples_per_second": 1944.379,
272
+ "eval_steps_per_second": 7.599,
273
+ "step": 30000
274
+ },
275
+ {
276
+ "epoch": 1.7634677740485807,
277
+ "grad_norm": 0.7442188262939453,
278
+ "learning_rate": 0.0005178451561522272,
279
+ "loss": 0.9636,
280
+ "step": 31000
281
+ },
282
+ {
283
+ "epoch": 1.8203538312759542,
284
+ "grad_norm": 0.7510819435119629,
285
+ "learning_rate": 0.0005087433869958473,
286
+ "loss": 0.9647,
287
+ "step": 32000
288
+ },
289
+ {
290
+ "epoch": 1.8772398885033277,
291
+ "grad_norm": 0.7448764443397522,
292
+ "learning_rate": 0.0004996416178394676,
293
+ "loss": 0.9591,
294
+ "step": 33000
295
+ },
296
+ {
297
+ "epoch": 1.9341259457307014,
298
+ "grad_norm": 0.8019358515739441,
299
+ "learning_rate": 0.0004905398486830878,
300
+ "loss": 0.9513,
301
+ "step": 34000
302
+ },
303
+ {
304
+ "epoch": 1.9910120029580751,
305
+ "grad_norm": 0.9495121240615845,
306
+ "learning_rate": 0.00048143807952670797,
307
+ "loss": 0.9511,
308
+ "step": 35000
309
+ },
310
+ {
311
+ "epoch": 1.9910120029580751,
312
+ "eval_accuracy": 0.755448,
313
+ "eval_loss": 0.9558805227279663,
314
+ "eval_runtime": 127.8711,
315
+ "eval_samples_per_second": 1955.094,
316
+ "eval_steps_per_second": 7.641,
317
+ "step": 35000
318
+ },
319
+ {
320
+ "epoch": 2.0478980601854486,
321
+ "grad_norm": 0.8410281538963318,
322
+ "learning_rate": 0.00047233631037032825,
323
+ "loss": 0.9081,
324
+ "step": 36000
325
+ },
326
+ {
327
+ "epoch": 2.104784117412822,
328
+ "grad_norm": 0.8246123194694519,
329
+ "learning_rate": 0.00046323454121394847,
330
+ "loss": 0.8964,
331
+ "step": 37000
332
+ },
333
+ {
334
+ "epoch": 2.1616701746401956,
335
+ "grad_norm": 0.9567108154296875,
336
+ "learning_rate": 0.0004541327720575687,
337
+ "loss": 0.8952,
338
+ "step": 38000
339
+ },
340
+ {
341
+ "epoch": 2.218556231867569,
342
+ "grad_norm": 0.8104901313781738,
343
+ "learning_rate": 0.0004450310029011889,
344
+ "loss": 0.8925,
345
+ "step": 39000
346
+ },
347
+ {
348
+ "epoch": 2.2754422890949426,
349
+ "grad_norm": 0.9034276008605957,
350
+ "learning_rate": 0.0004359292337448092,
351
+ "loss": 0.8998,
352
+ "step": 40000
353
+ },
354
+ {
355
+ "epoch": 2.2754422890949426,
356
+ "eval_accuracy": 0.755948,
357
+ "eval_loss": 0.9492226839065552,
358
+ "eval_runtime": 127.8812,
359
+ "eval_samples_per_second": 1954.94,
360
+ "eval_steps_per_second": 7.64,
361
+ "step": 40000
362
+ },
363
+ {
364
+ "epoch": 2.3323283463223166,
365
+ "grad_norm": 1.3229442834854126,
366
+ "learning_rate": 0.00042682746458842937,
367
+ "loss": 0.8962,
368
+ "step": 41000
369
+ },
370
+ {
371
+ "epoch": 2.38921440354969,
372
+ "grad_norm": 0.8582925200462341,
373
+ "learning_rate": 0.00041772569543204965,
374
+ "loss": 0.8976,
375
+ "step": 42000
376
+ },
377
+ {
378
+ "epoch": 2.4461004607770636,
379
+ "grad_norm": 0.8881712555885315,
380
+ "learning_rate": 0.0004086239262756698,
381
+ "loss": 0.8898,
382
+ "step": 43000
383
+ },
384
+ {
385
+ "epoch": 2.502986518004437,
386
+ "grad_norm": 0.8713961839675903,
387
+ "learning_rate": 0.00039952215711929005,
388
+ "loss": 0.8927,
389
+ "step": 44000
390
+ },
391
+ {
392
+ "epoch": 2.5598725752318106,
393
+ "grad_norm": 0.7883007526397705,
394
+ "learning_rate": 0.00039042038796291027,
395
+ "loss": 0.8967,
396
+ "step": 45000
397
+ },
398
+ {
399
+ "epoch": 2.5598725752318106,
400
+ "eval_accuracy": 0.760028,
401
+ "eval_loss": 0.937300980091095,
402
+ "eval_runtime": 130.0782,
403
+ "eval_samples_per_second": 1921.921,
404
+ "eval_steps_per_second": 7.511,
405
+ "step": 45000
406
+ },
407
+ {
408
+ "epoch": 2.6167586324591845,
409
+ "grad_norm": 0.8600155711174011,
410
+ "learning_rate": 0.00038131861880653055,
411
+ "loss": 0.8927,
412
+ "step": 46000
413
+ },
414
+ {
415
+ "epoch": 2.673644689686558,
416
+ "grad_norm": 0.8501909971237183,
417
+ "learning_rate": 0.0003722168496501508,
418
+ "loss": 0.8913,
419
+ "step": 47000
420
+ },
421
+ {
422
+ "epoch": 2.7305307469139315,
423
+ "grad_norm": 0.8116582632064819,
424
+ "learning_rate": 0.000363115080493771,
425
+ "loss": 0.8889,
426
+ "step": 48000
427
+ },
428
+ {
429
+ "epoch": 2.787416804141305,
430
+ "grad_norm": 0.8065186738967896,
431
+ "learning_rate": 0.0003540133113373912,
432
+ "loss": 0.8896,
433
+ "step": 49000
434
+ },
435
+ {
436
+ "epoch": 2.8443028613686785,
437
+ "grad_norm": 0.9248031973838806,
438
+ "learning_rate": 0.00034491154218101145,
439
+ "loss": 0.8837,
440
+ "step": 50000
441
+ },
442
+ {
443
+ "epoch": 2.8443028613686785,
444
+ "eval_accuracy": 0.762176,
445
+ "eval_loss": 0.9251159429550171,
446
+ "eval_runtime": 128.4439,
447
+ "eval_samples_per_second": 1946.376,
448
+ "eval_steps_per_second": 7.606,
449
+ "step": 50000
450
+ },
451
+ {
452
+ "epoch": 2.901188918596052,
453
+ "grad_norm": 0.8191467523574829,
454
+ "learning_rate": 0.0003358097730246317,
455
+ "loss": 0.878,
456
+ "step": 51000
457
+ },
458
+ {
459
+ "epoch": 2.9580749758234255,
460
+ "grad_norm": 0.7620063424110413,
461
+ "learning_rate": 0.0003267080038682519,
462
+ "loss": 0.8832,
463
+ "step": 52000
464
+ },
465
+ {
466
+ "epoch": 3.0149610330507994,
467
+ "grad_norm": 0.8365482687950134,
468
+ "learning_rate": 0.0003176062347118721,
469
+ "loss": 0.8621,
470
+ "step": 53000
471
+ },
472
+ {
473
+ "epoch": 3.071847090278173,
474
+ "grad_norm": 0.9817807078361511,
475
+ "learning_rate": 0.00030850446555549235,
476
+ "loss": 0.8224,
477
+ "step": 54000
478
+ },
479
+ {
480
+ "epoch": 3.1287331475055464,
481
+ "grad_norm": 0.847806453704834,
482
+ "learning_rate": 0.00029940269639911263,
483
+ "loss": 0.8253,
484
+ "step": 55000
485
+ },
486
+ {
487
+ "epoch": 3.1287331475055464,
488
+ "eval_accuracy": 0.76438,
489
+ "eval_loss": 0.9235970973968506,
490
+ "eval_runtime": 126.2531,
491
+ "eval_samples_per_second": 1980.15,
492
+ "eval_steps_per_second": 7.738,
493
+ "step": 55000
494
+ },
495
+ {
496
+ "epoch": 3.18561920473292,
497
+ "grad_norm": 1.1729530096054077,
498
+ "learning_rate": 0.00029030092724273285,
499
+ "loss": 0.8225,
500
+ "step": 56000
501
+ },
502
+ {
503
+ "epoch": 3.2425052619602934,
504
+ "grad_norm": 1.0548408031463623,
505
+ "learning_rate": 0.0002811991580863531,
506
+ "loss": 0.821,
507
+ "step": 57000
508
+ },
509
+ {
510
+ "epoch": 3.299391319187667,
511
+ "grad_norm": 1.0199774503707886,
512
+ "learning_rate": 0.0002720973889299733,
513
+ "loss": 0.8213,
514
+ "step": 58000
515
+ },
516
+ {
517
+ "epoch": 3.356277376415041,
518
+ "grad_norm": 0.9180177450180054,
519
+ "learning_rate": 0.00026299561977359353,
520
+ "loss": 0.8274,
521
+ "step": 59000
522
+ },
523
+ {
524
+ "epoch": 3.4131634336424144,
525
+ "grad_norm": 0.9745663404464722,
526
+ "learning_rate": 0.0002538938506172137,
527
+ "loss": 0.8229,
528
+ "step": 60000
529
+ },
530
+ {
531
+ "epoch": 3.4131634336424144,
532
+ "eval_accuracy": 0.766832,
533
+ "eval_loss": 0.9138370156288147,
534
+ "eval_runtime": 129.2727,
535
+ "eval_samples_per_second": 1933.897,
536
+ "eval_steps_per_second": 7.558,
537
+ "step": 60000
538
+ },
539
+ {
540
+ "epoch": 3.470049490869788,
541
+ "grad_norm": 0.8708947896957397,
542
+ "learning_rate": 0.0002447920814608339,
543
+ "loss": 0.8256,
544
+ "step": 61000
545
+ },
546
+ {
547
+ "epoch": 3.5269355480971614,
548
+ "grad_norm": 0.9808185696601868,
549
+ "learning_rate": 0.00023569031230445418,
550
+ "loss": 0.8298,
551
+ "step": 62000
552
+ },
553
+ {
554
+ "epoch": 3.583821605324535,
555
+ "grad_norm": 0.8228833079338074,
556
+ "learning_rate": 0.0002265885431480744,
557
+ "loss": 0.827,
558
+ "step": 63000
559
+ },
560
+ {
561
+ "epoch": 3.6407076625519084,
562
+ "grad_norm": 0.9581019878387451,
563
+ "learning_rate": 0.00021748677399169463,
564
+ "loss": 0.8275,
565
+ "step": 64000
566
+ },
567
+ {
568
+ "epoch": 3.697593719779282,
569
+ "grad_norm": 0.8560314178466797,
570
+ "learning_rate": 0.00020838500483531488,
571
+ "loss": 0.8145,
572
+ "step": 65000
573
+ },
574
+ {
575
+ "epoch": 3.697593719779282,
576
+ "eval_accuracy": 0.769172,
577
+ "eval_loss": 0.9042648673057556,
578
+ "eval_runtime": 129.2138,
579
+ "eval_samples_per_second": 1934.778,
580
+ "eval_steps_per_second": 7.561,
581
+ "step": 65000
582
+ },
583
+ {
584
+ "epoch": 3.754479777006656,
585
+ "grad_norm": 0.8918451070785522,
586
+ "learning_rate": 0.0001992832356789351,
587
+ "loss": 0.819,
588
+ "step": 66000
589
+ },
590
+ {
591
+ "epoch": 3.8113658342340293,
592
+ "grad_norm": 1.0977294445037842,
593
+ "learning_rate": 0.00019018146652255533,
594
+ "loss": 0.8122,
595
+ "step": 67000
596
+ },
597
+ {
598
+ "epoch": 3.868251891461403,
599
+ "grad_norm": 0.7856444716453552,
600
+ "learning_rate": 0.00018107969736617555,
601
+ "loss": 0.8225,
602
+ "step": 68000
603
+ },
604
+ {
605
+ "epoch": 3.9251379486887763,
606
+ "grad_norm": 0.9270259141921997,
607
+ "learning_rate": 0.00017197792820979578,
608
+ "loss": 0.8158,
609
+ "step": 69000
610
+ },
611
+ {
612
+ "epoch": 3.98202400591615,
613
+ "grad_norm": 1.082774043083191,
614
+ "learning_rate": 0.00016287615905341603,
615
+ "loss": 0.8156,
616
+ "step": 70000
617
+ },
618
+ {
619
+ "epoch": 3.98202400591615,
620
+ "eval_accuracy": 0.770764,
621
+ "eval_loss": 0.8961142301559448,
622
+ "eval_runtime": 138.0555,
623
+ "eval_samples_per_second": 1810.866,
624
+ "eval_steps_per_second": 7.077,
625
+ "step": 70000
626
+ },
627
+ {
628
+ "epoch": 4.038910063143524,
629
+ "grad_norm": 0.909858226776123,
630
+ "learning_rate": 0.00015377438989703626,
631
+ "loss": 0.7785,
632
+ "step": 71000
633
+ },
634
+ {
635
+ "epoch": 4.095796120370897,
636
+ "grad_norm": 0.931280791759491,
637
+ "learning_rate": 0.00014467262074065645,
638
+ "loss": 0.7637,
639
+ "step": 72000
640
+ },
641
+ {
642
+ "epoch": 4.152682177598271,
643
+ "grad_norm": 0.94422847032547,
644
+ "learning_rate": 0.0001355708515842767,
645
+ "loss": 0.7612,
646
+ "step": 73000
647
+ },
648
+ {
649
+ "epoch": 4.209568234825644,
650
+ "grad_norm": 0.9250127077102661,
651
+ "learning_rate": 0.00012646908242789693,
652
+ "loss": 0.7616,
653
+ "step": 74000
654
+ },
655
+ {
656
+ "epoch": 4.266454292053018,
657
+ "grad_norm": 0.8467296957969666,
658
+ "learning_rate": 0.00011736731327151716,
659
+ "loss": 0.7557,
660
+ "step": 75000
661
+ },
662
+ {
663
+ "epoch": 4.266454292053018,
664
+ "eval_accuracy": 0.77204,
665
+ "eval_loss": 0.9022773504257202,
666
+ "eval_runtime": 144.5432,
667
+ "eval_samples_per_second": 1729.587,
668
+ "eval_steps_per_second": 6.759,
669
+ "step": 75000
670
+ },
671
+ {
672
+ "epoch": 4.323340349280391,
673
+ "grad_norm": 0.8985564708709717,
674
+ "learning_rate": 0.00010826554411513738,
675
+ "loss": 0.7604,
676
+ "step": 76000
677
+ },
678
+ {
679
+ "epoch": 4.380226406507765,
680
+ "grad_norm": 0.8618564605712891,
681
+ "learning_rate": 9.916377495875762e-05,
682
+ "loss": 0.7632,
683
+ "step": 77000
684
+ },
685
+ {
686
+ "epoch": 4.437112463735138,
687
+ "grad_norm": 0.9467126727104187,
688
+ "learning_rate": 9.006200580237784e-05,
689
+ "loss": 0.7614,
690
+ "step": 78000
691
+ },
692
+ {
693
+ "epoch": 4.493998520962512,
694
+ "grad_norm": 1.0163730382919312,
695
+ "learning_rate": 8.096023664599807e-05,
696
+ "loss": 0.7575,
697
+ "step": 79000
698
+ },
699
+ {
700
+ "epoch": 4.550884578189885,
701
+ "grad_norm": 1.1194038391113281,
702
+ "learning_rate": 7.18584674896183e-05,
703
+ "loss": 0.7595,
704
+ "step": 80000
705
+ },
706
+ {
707
+ "epoch": 4.550884578189885,
708
+ "eval_accuracy": 0.772256,
709
+ "eval_loss": 0.897346019744873,
710
+ "eval_runtime": 136.9434,
711
+ "eval_samples_per_second": 1825.571,
712
+ "eval_steps_per_second": 7.134,
713
+ "step": 80000
714
+ },
715
+ {
716
+ "epoch": 4.607770635417259,
717
+ "grad_norm": 1.0589629411697388,
718
+ "learning_rate": 6.275669833323853e-05,
719
+ "loss": 0.7548,
720
+ "step": 81000
721
+ },
722
+ {
723
+ "epoch": 4.664656692644633,
724
+ "grad_norm": 0.8540852665901184,
725
+ "learning_rate": 5.365492917685876e-05,
726
+ "loss": 0.7601,
727
+ "step": 82000
728
+ },
729
+ {
730
+ "epoch": 4.721542749872007,
731
+ "grad_norm": 1.127475380897522,
732
+ "learning_rate": 4.455316002047898e-05,
733
+ "loss": 0.7554,
734
+ "step": 83000
735
+ },
736
+ {
737
+ "epoch": 4.77842880709938,
738
+ "grad_norm": 0.9464063048362732,
739
+ "learning_rate": 3.545139086409921e-05,
740
+ "loss": 0.756,
741
+ "step": 84000
742
+ },
743
+ {
744
+ "epoch": 4.835314864326754,
745
+ "grad_norm": 0.9705914855003357,
746
+ "learning_rate": 2.634962170771944e-05,
747
+ "loss": 0.7581,
748
+ "step": 85000
749
+ },
750
+ {
751
+ "epoch": 4.835314864326754,
752
+ "eval_accuracy": 0.773724,
753
+ "eval_loss": 0.8925997018814087,
754
+ "eval_runtime": 138.7415,
755
+ "eval_samples_per_second": 1801.913,
756
+ "eval_steps_per_second": 7.042,
757
+ "step": 85000
758
+ },
759
+ {
760
+ "epoch": 4.892200921554127,
761
+ "grad_norm": 0.8879310488700867,
762
+ "learning_rate": 1.7247852551339668e-05,
763
+ "loss": 0.758,
764
+ "step": 86000
765
+ },
766
+ {
767
+ "epoch": 4.949086978781501,
768
+ "grad_norm": 1.2024400234222412,
769
+ "learning_rate": 8.146083394959896e-06,
770
+ "loss": 0.751,
771
+ "step": 87000
772
+ },
773
+ {
774
+ "epoch": 5.0,
775
+ "step": 87895,
776
+ "total_flos": 1.93274424e+18,
777
+ "train_loss": 0.9357909288237652,
778
+ "train_runtime": 45635.435,
779
+ "train_samples_per_second": 493.038,
780
+ "train_steps_per_second": 1.926
781
+ }
782
+ ],
783
+ "logging_steps": 1000,
784
+ "max_steps": 87895,
785
+ "num_input_tokens_seen": 0,
786
+ "num_train_epochs": 5,
787
+ "save_steps": 5000,
788
+ "total_flos": 1.93274424e+18,
789
+ "train_batch_size": 256,
790
+ "trial_name": null,
791
+ "trial_params": null
792
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09269e0c37bdf9c229beefd43c807681f37512212896aa362181dedc28d01416
3
+ size 4984