{ "best_metric": 0.9985501993475897, "best_model_checkpoint": "Smart_Tour_Luxor_v1.0\\checkpoint-194", "epoch": 4.0, "eval_steps": 500, "global_step": 776, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 1.1416014432907104, "learning_rate": 6.41025641025641e-06, "loss": 2.9726, "step": 10 }, { "epoch": 0.1, "grad_norm": 1.0776972770690918, "learning_rate": 1.282051282051282e-05, "loss": 2.9053, "step": 20 }, { "epoch": 0.15, "grad_norm": 1.0615094900131226, "learning_rate": 1.923076923076923e-05, "loss": 2.7576, "step": 30 }, { "epoch": 0.21, "grad_norm": 1.0457894802093506, "learning_rate": 2.564102564102564e-05, "loss": 2.5866, "step": 40 }, { "epoch": 0.26, "grad_norm": 1.1404309272766113, "learning_rate": 3.205128205128206e-05, "loss": 2.3687, "step": 50 }, { "epoch": 0.31, "grad_norm": 1.1770880222320557, "learning_rate": 3.846153846153846e-05, "loss": 2.1344, "step": 60 }, { "epoch": 0.36, "grad_norm": 1.2791804075241089, "learning_rate": 4.4871794871794874e-05, "loss": 1.8869, "step": 70 }, { "epoch": 0.41, "grad_norm": 1.2911182641983032, "learning_rate": 4.98567335243553e-05, "loss": 1.5984, "step": 80 }, { "epoch": 0.46, "grad_norm": 1.3716331720352173, "learning_rate": 4.914040114613181e-05, "loss": 1.3587, "step": 90 }, { "epoch": 0.52, "grad_norm": 1.3185632228851318, "learning_rate": 4.842406876790831e-05, "loss": 1.1518, "step": 100 }, { "epoch": 0.57, "grad_norm": 1.4818017482757568, "learning_rate": 4.7707736389684815e-05, "loss": 0.9694, "step": 110 }, { "epoch": 0.62, "grad_norm": 1.235606074333191, "learning_rate": 4.699140401146132e-05, "loss": 0.8449, "step": 120 }, { "epoch": 0.67, "grad_norm": 1.0605517625808716, "learning_rate": 4.627507163323783e-05, "loss": 0.7504, "step": 130 }, { "epoch": 0.72, "grad_norm": 1.0399278402328491, "learning_rate": 4.555873925501433e-05, "loss": 0.6532, "step": 140 }, { "epoch": 0.77, "grad_norm": 1.2140040397644043, "learning_rate": 4.4842406876790833e-05, "loss": 0.5724, "step": 150 }, { "epoch": 0.82, "grad_norm": 0.8598456382751465, "learning_rate": 4.412607449856734e-05, "loss": 0.5135, "step": 160 }, { "epoch": 0.88, "grad_norm": 0.7887383103370667, "learning_rate": 4.3409742120343846e-05, "loss": 0.4758, "step": 170 }, { "epoch": 0.93, "grad_norm": 0.7831109762191772, "learning_rate": 4.2693409742120346e-05, "loss": 0.4344, "step": 180 }, { "epoch": 0.98, "grad_norm": 0.6504438519477844, "learning_rate": 4.197707736389685e-05, "loss": 0.4048, "step": 190 }, { "epoch": 1.0, "eval_accuracy": 0.9985501993475897, "eval_loss": 0.3498220443725586, "eval_runtime": 330.0181, "eval_samples_per_second": 8.36, "eval_steps_per_second": 0.264, "step": 194 }, { "epoch": 1.03, "grad_norm": 0.8069166541099548, "learning_rate": 4.126074498567336e-05, "loss": 0.3732, "step": 200 }, { "epoch": 1.08, "grad_norm": 0.8006480932235718, "learning_rate": 4.054441260744986e-05, "loss": 0.3454, "step": 210 }, { "epoch": 1.13, "grad_norm": 0.695347011089325, "learning_rate": 3.982808022922636e-05, "loss": 0.3199, "step": 220 }, { "epoch": 1.19, "grad_norm": 0.5613417029380798, "learning_rate": 3.9111747851002864e-05, "loss": 0.2974, "step": 230 }, { "epoch": 1.24, "grad_norm": 0.48088908195495605, "learning_rate": 3.839541547277937e-05, "loss": 0.3018, "step": 240 }, { "epoch": 1.29, "grad_norm": 0.43887242674827576, "learning_rate": 3.767908309455588e-05, "loss": 0.2825, "step": 250 }, { "epoch": 1.34, "grad_norm": 0.630588173866272, "learning_rate": 3.6962750716332376e-05, "loss": 0.2695, "step": 260 }, { "epoch": 1.39, "grad_norm": 0.4198044538497925, "learning_rate": 3.624641833810888e-05, "loss": 0.2483, "step": 270 }, { "epoch": 1.44, "grad_norm": 0.9144806265830994, "learning_rate": 3.553008595988539e-05, "loss": 0.2452, "step": 280 }, { "epoch": 1.49, "grad_norm": 0.8064547181129456, "learning_rate": 3.4813753581661896e-05, "loss": 0.2453, "step": 290 }, { "epoch": 1.55, "grad_norm": 0.6866409182548523, "learning_rate": 3.4097421203438395e-05, "loss": 0.2348, "step": 300 }, { "epoch": 1.6, "grad_norm": 0.5553216934204102, "learning_rate": 3.33810888252149e-05, "loss": 0.2193, "step": 310 }, { "epoch": 1.65, "grad_norm": 0.4838345944881439, "learning_rate": 3.266475644699141e-05, "loss": 0.2216, "step": 320 }, { "epoch": 1.7, "grad_norm": 0.6289733648300171, "learning_rate": 3.1948424068767914e-05, "loss": 0.202, "step": 330 }, { "epoch": 1.75, "grad_norm": 0.7053207159042358, "learning_rate": 3.1232091690544414e-05, "loss": 0.1981, "step": 340 }, { "epoch": 1.8, "grad_norm": 0.30541422963142395, "learning_rate": 3.0515759312320917e-05, "loss": 0.202, "step": 350 }, { "epoch": 1.86, "grad_norm": 0.3764614760875702, "learning_rate": 2.9799426934097423e-05, "loss": 0.191, "step": 360 }, { "epoch": 1.91, "grad_norm": 0.3376723825931549, "learning_rate": 2.9083094555873923e-05, "loss": 0.1861, "step": 370 }, { "epoch": 1.96, "grad_norm": 0.33649709820747375, "learning_rate": 2.836676217765043e-05, "loss": 0.1711, "step": 380 }, { "epoch": 2.0, "eval_accuracy": 0.9978252990213845, "eval_loss": 0.16818779706954956, "eval_runtime": 316.359, "eval_samples_per_second": 8.721, "eval_steps_per_second": 0.275, "step": 388 }, { "epoch": 2.01, "grad_norm": 0.4789816737174988, "learning_rate": 2.7650429799426936e-05, "loss": 0.1692, "step": 390 }, { "epoch": 2.06, "grad_norm": 0.37141987681388855, "learning_rate": 2.6934097421203442e-05, "loss": 0.168, "step": 400 }, { "epoch": 2.11, "grad_norm": 0.5811685919761658, "learning_rate": 2.6217765042979942e-05, "loss": 0.1614, "step": 410 }, { "epoch": 2.16, "grad_norm": 0.5884858965873718, "learning_rate": 2.5501432664756448e-05, "loss": 0.169, "step": 420 }, { "epoch": 2.22, "grad_norm": 0.7159774899482727, "learning_rate": 2.4785100286532954e-05, "loss": 0.1584, "step": 430 }, { "epoch": 2.27, "grad_norm": 0.32133427262306213, "learning_rate": 2.4068767908309457e-05, "loss": 0.1605, "step": 440 }, { "epoch": 2.32, "grad_norm": 1.0729812383651733, "learning_rate": 2.335243553008596e-05, "loss": 0.1539, "step": 450 }, { "epoch": 2.37, "grad_norm": 0.3316652178764343, "learning_rate": 2.2636103151862463e-05, "loss": 0.1433, "step": 460 }, { "epoch": 2.42, "grad_norm": 0.6638622879981995, "learning_rate": 2.191977077363897e-05, "loss": 0.1494, "step": 470 }, { "epoch": 2.47, "grad_norm": 0.7581809163093567, "learning_rate": 2.1203438395415473e-05, "loss": 0.1536, "step": 480 }, { "epoch": 2.53, "grad_norm": 0.2173503339290619, "learning_rate": 2.048710601719198e-05, "loss": 0.1483, "step": 490 }, { "epoch": 2.58, "grad_norm": 0.8919398188591003, "learning_rate": 1.9770773638968482e-05, "loss": 0.1439, "step": 500 }, { "epoch": 2.63, "grad_norm": 0.8781760334968567, "learning_rate": 1.905444126074499e-05, "loss": 0.1364, "step": 510 }, { "epoch": 2.68, "grad_norm": 0.26618167757987976, "learning_rate": 1.833810888252149e-05, "loss": 0.1307, "step": 520 }, { "epoch": 2.73, "grad_norm": 0.31138116121292114, "learning_rate": 1.7621776504297995e-05, "loss": 0.1308, "step": 530 }, { "epoch": 2.78, "grad_norm": 0.8110724091529846, "learning_rate": 1.6905444126074498e-05, "loss": 0.1405, "step": 540 }, { "epoch": 2.84, "grad_norm": 0.2559654712677002, "learning_rate": 1.6189111747851004e-05, "loss": 0.1315, "step": 550 }, { "epoch": 2.89, "grad_norm": 0.2344350814819336, "learning_rate": 1.5472779369627507e-05, "loss": 0.1298, "step": 560 }, { "epoch": 2.94, "grad_norm": 0.298566073179245, "learning_rate": 1.4756446991404013e-05, "loss": 0.1316, "step": 570 }, { "epoch": 2.99, "grad_norm": 0.25082945823669434, "learning_rate": 1.4040114613180516e-05, "loss": 0.1307, "step": 580 }, { "epoch": 3.0, "eval_accuracy": 0.9985501993475897, "eval_loss": 0.11476617306470871, "eval_runtime": 376.2622, "eval_samples_per_second": 7.333, "eval_steps_per_second": 0.231, "step": 582 }, { "epoch": 3.04, "grad_norm": 0.21889334917068481, "learning_rate": 1.3323782234957023e-05, "loss": 0.1195, "step": 590 }, { "epoch": 3.09, "grad_norm": 0.3546507954597473, "learning_rate": 1.2607449856733524e-05, "loss": 0.1333, "step": 600 }, { "epoch": 3.14, "grad_norm": 0.2458142638206482, "learning_rate": 1.1891117478510029e-05, "loss": 0.1176, "step": 610 }, { "epoch": 3.2, "grad_norm": 0.45963388681411743, "learning_rate": 1.1174785100286533e-05, "loss": 0.1342, "step": 620 }, { "epoch": 3.25, "grad_norm": 0.2312440127134323, "learning_rate": 1.0458452722063038e-05, "loss": 0.1207, "step": 630 }, { "epoch": 3.3, "grad_norm": 0.21415020525455475, "learning_rate": 9.742120343839543e-06, "loss": 0.1294, "step": 640 }, { "epoch": 3.35, "grad_norm": 0.1821221262216568, "learning_rate": 9.025787965616046e-06, "loss": 0.1262, "step": 650 }, { "epoch": 3.4, "grad_norm": 0.18438111245632172, "learning_rate": 8.30945558739255e-06, "loss": 0.108, "step": 660 }, { "epoch": 3.45, "grad_norm": 0.2131635844707489, "learning_rate": 7.593123209169055e-06, "loss": 0.1138, "step": 670 }, { "epoch": 3.51, "grad_norm": 1.0909886360168457, "learning_rate": 6.876790830945559e-06, "loss": 0.1179, "step": 680 }, { "epoch": 3.56, "grad_norm": 0.6753919124603271, "learning_rate": 6.160458452722064e-06, "loss": 0.1118, "step": 690 }, { "epoch": 3.61, "grad_norm": 0.20348551869392395, "learning_rate": 5.4441260744985674e-06, "loss": 0.1019, "step": 700 }, { "epoch": 3.66, "grad_norm": 0.22448669373989105, "learning_rate": 4.727793696275072e-06, "loss": 0.1093, "step": 710 }, { "epoch": 3.71, "grad_norm": 0.6886144280433655, "learning_rate": 4.011461318051577e-06, "loss": 0.1067, "step": 720 }, { "epoch": 3.76, "grad_norm": 0.19005030393600464, "learning_rate": 3.2951289398280802e-06, "loss": 0.1237, "step": 730 }, { "epoch": 3.81, "grad_norm": 0.2179213911294937, "learning_rate": 2.578796561604585e-06, "loss": 0.1187, "step": 740 }, { "epoch": 3.87, "grad_norm": 0.47610709071159363, "learning_rate": 1.862464183381089e-06, "loss": 0.1068, "step": 750 }, { "epoch": 3.92, "grad_norm": 0.34112074971199036, "learning_rate": 1.1461318051575932e-06, "loss": 0.1204, "step": 760 }, { "epoch": 3.97, "grad_norm": 0.24934335052967072, "learning_rate": 4.2979942693409743e-07, "loss": 0.115, "step": 770 }, { "epoch": 4.0, "eval_accuracy": 0.9985501993475897, "eval_loss": 0.10186277329921722, "eval_runtime": 304.0711, "eval_samples_per_second": 9.074, "eval_steps_per_second": 0.286, "step": 776 }, { "epoch": 4.0, "step": 776, "total_flos": 7.69643703046162e+18, "train_loss": 0.4810273270938814, "train_runtime": 53143.3355, "train_samples_per_second": 1.869, "train_steps_per_second": 0.015 } ], "logging_steps": 10, "max_steps": 776, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 7.69643703046162e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }