|
{ |
|
"best_metric": 0.9985501993475897, |
|
"best_model_checkpoint": "Smart_Tour_Luxor_v1.0\\checkpoint-194", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 776, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.1416014432907104, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 2.9726, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.0776972770690918, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 2.9053, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.0615094900131226, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 2.7576, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.0457894802093506, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 2.5866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1404309272766113, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 2.3687, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.1770880222320557, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 2.1344, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.2791804075241089, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 1.8869, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.2911182641983032, |
|
"learning_rate": 4.98567335243553e-05, |
|
"loss": 1.5984, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.3716331720352173, |
|
"learning_rate": 4.914040114613181e-05, |
|
"loss": 1.3587, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.3185632228851318, |
|
"learning_rate": 4.842406876790831e-05, |
|
"loss": 1.1518, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.4818017482757568, |
|
"learning_rate": 4.7707736389684815e-05, |
|
"loss": 0.9694, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.235606074333191, |
|
"learning_rate": 4.699140401146132e-05, |
|
"loss": 0.8449, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.0605517625808716, |
|
"learning_rate": 4.627507163323783e-05, |
|
"loss": 0.7504, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.0399278402328491, |
|
"learning_rate": 4.555873925501433e-05, |
|
"loss": 0.6532, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.2140040397644043, |
|
"learning_rate": 4.4842406876790833e-05, |
|
"loss": 0.5724, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.8598456382751465, |
|
"learning_rate": 4.412607449856734e-05, |
|
"loss": 0.5135, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.7887383103370667, |
|
"learning_rate": 4.3409742120343846e-05, |
|
"loss": 0.4758, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.7831109762191772, |
|
"learning_rate": 4.2693409742120346e-05, |
|
"loss": 0.4344, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.6504438519477844, |
|
"learning_rate": 4.197707736389685e-05, |
|
"loss": 0.4048, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9985501993475897, |
|
"eval_loss": 0.3498220443725586, |
|
"eval_runtime": 330.0181, |
|
"eval_samples_per_second": 8.36, |
|
"eval_steps_per_second": 0.264, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.8069166541099548, |
|
"learning_rate": 4.126074498567336e-05, |
|
"loss": 0.3732, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.8006480932235718, |
|
"learning_rate": 4.054441260744986e-05, |
|
"loss": 0.3454, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.695347011089325, |
|
"learning_rate": 3.982808022922636e-05, |
|
"loss": 0.3199, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.5613417029380798, |
|
"learning_rate": 3.9111747851002864e-05, |
|
"loss": 0.2974, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.48088908195495605, |
|
"learning_rate": 3.839541547277937e-05, |
|
"loss": 0.3018, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.43887242674827576, |
|
"learning_rate": 3.767908309455588e-05, |
|
"loss": 0.2825, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.630588173866272, |
|
"learning_rate": 3.6962750716332376e-05, |
|
"loss": 0.2695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.4198044538497925, |
|
"learning_rate": 3.624641833810888e-05, |
|
"loss": 0.2483, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.9144806265830994, |
|
"learning_rate": 3.553008595988539e-05, |
|
"loss": 0.2452, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.8064547181129456, |
|
"learning_rate": 3.4813753581661896e-05, |
|
"loss": 0.2453, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.6866409182548523, |
|
"learning_rate": 3.4097421203438395e-05, |
|
"loss": 0.2348, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.5553216934204102, |
|
"learning_rate": 3.33810888252149e-05, |
|
"loss": 0.2193, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.4838345944881439, |
|
"learning_rate": 3.266475644699141e-05, |
|
"loss": 0.2216, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.6289733648300171, |
|
"learning_rate": 3.1948424068767914e-05, |
|
"loss": 0.202, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.7053207159042358, |
|
"learning_rate": 3.1232091690544414e-05, |
|
"loss": 0.1981, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.30541422963142395, |
|
"learning_rate": 3.0515759312320917e-05, |
|
"loss": 0.202, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.3764614760875702, |
|
"learning_rate": 2.9799426934097423e-05, |
|
"loss": 0.191, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.3376723825931549, |
|
"learning_rate": 2.9083094555873923e-05, |
|
"loss": 0.1861, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.33649709820747375, |
|
"learning_rate": 2.836676217765043e-05, |
|
"loss": 0.1711, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9978252990213845, |
|
"eval_loss": 0.16818779706954956, |
|
"eval_runtime": 316.359, |
|
"eval_samples_per_second": 8.721, |
|
"eval_steps_per_second": 0.275, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.4789816737174988, |
|
"learning_rate": 2.7650429799426936e-05, |
|
"loss": 0.1692, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.37141987681388855, |
|
"learning_rate": 2.6934097421203442e-05, |
|
"loss": 0.168, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.5811685919761658, |
|
"learning_rate": 2.6217765042979942e-05, |
|
"loss": 0.1614, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.5884858965873718, |
|
"learning_rate": 2.5501432664756448e-05, |
|
"loss": 0.169, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.7159774899482727, |
|
"learning_rate": 2.4785100286532954e-05, |
|
"loss": 0.1584, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.32133427262306213, |
|
"learning_rate": 2.4068767908309457e-05, |
|
"loss": 0.1605, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 1.0729812383651733, |
|
"learning_rate": 2.335243553008596e-05, |
|
"loss": 0.1539, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.3316652178764343, |
|
"learning_rate": 2.2636103151862463e-05, |
|
"loss": 0.1433, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.6638622879981995, |
|
"learning_rate": 2.191977077363897e-05, |
|
"loss": 0.1494, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.7581809163093567, |
|
"learning_rate": 2.1203438395415473e-05, |
|
"loss": 0.1536, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.2173503339290619, |
|
"learning_rate": 2.048710601719198e-05, |
|
"loss": 0.1483, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.8919398188591003, |
|
"learning_rate": 1.9770773638968482e-05, |
|
"loss": 0.1439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.8781760334968567, |
|
"learning_rate": 1.905444126074499e-05, |
|
"loss": 0.1364, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.26618167757987976, |
|
"learning_rate": 1.833810888252149e-05, |
|
"loss": 0.1307, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.31138116121292114, |
|
"learning_rate": 1.7621776504297995e-05, |
|
"loss": 0.1308, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.8110724091529846, |
|
"learning_rate": 1.6905444126074498e-05, |
|
"loss": 0.1405, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.2559654712677002, |
|
"learning_rate": 1.6189111747851004e-05, |
|
"loss": 0.1315, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.2344350814819336, |
|
"learning_rate": 1.5472779369627507e-05, |
|
"loss": 0.1298, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.298566073179245, |
|
"learning_rate": 1.4756446991404013e-05, |
|
"loss": 0.1316, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.25082945823669434, |
|
"learning_rate": 1.4040114613180516e-05, |
|
"loss": 0.1307, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9985501993475897, |
|
"eval_loss": 0.11476617306470871, |
|
"eval_runtime": 376.2622, |
|
"eval_samples_per_second": 7.333, |
|
"eval_steps_per_second": 0.231, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.21889334917068481, |
|
"learning_rate": 1.3323782234957023e-05, |
|
"loss": 0.1195, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.3546507954597473, |
|
"learning_rate": 1.2607449856733524e-05, |
|
"loss": 0.1333, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.2458142638206482, |
|
"learning_rate": 1.1891117478510029e-05, |
|
"loss": 0.1176, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.45963388681411743, |
|
"learning_rate": 1.1174785100286533e-05, |
|
"loss": 0.1342, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 0.2312440127134323, |
|
"learning_rate": 1.0458452722063038e-05, |
|
"loss": 0.1207, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.21415020525455475, |
|
"learning_rate": 9.742120343839543e-06, |
|
"loss": 0.1294, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.1821221262216568, |
|
"learning_rate": 9.025787965616046e-06, |
|
"loss": 0.1262, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.18438111245632172, |
|
"learning_rate": 8.30945558739255e-06, |
|
"loss": 0.108, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.2131635844707489, |
|
"learning_rate": 7.593123209169055e-06, |
|
"loss": 0.1138, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 1.0909886360168457, |
|
"learning_rate": 6.876790830945559e-06, |
|
"loss": 0.1179, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.6753919124603271, |
|
"learning_rate": 6.160458452722064e-06, |
|
"loss": 0.1118, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.20348551869392395, |
|
"learning_rate": 5.4441260744985674e-06, |
|
"loss": 0.1019, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 0.22448669373989105, |
|
"learning_rate": 4.727793696275072e-06, |
|
"loss": 0.1093, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 0.6886144280433655, |
|
"learning_rate": 4.011461318051577e-06, |
|
"loss": 0.1067, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.19005030393600464, |
|
"learning_rate": 3.2951289398280802e-06, |
|
"loss": 0.1237, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 0.2179213911294937, |
|
"learning_rate": 2.578796561604585e-06, |
|
"loss": 0.1187, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 0.47610709071159363, |
|
"learning_rate": 1.862464183381089e-06, |
|
"loss": 0.1068, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.34112074971199036, |
|
"learning_rate": 1.1461318051575932e-06, |
|
"loss": 0.1204, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"grad_norm": 0.24934335052967072, |
|
"learning_rate": 4.2979942693409743e-07, |
|
"loss": 0.115, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9985501993475897, |
|
"eval_loss": 0.10186277329921722, |
|
"eval_runtime": 304.0711, |
|
"eval_samples_per_second": 9.074, |
|
"eval_steps_per_second": 0.286, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 776, |
|
"total_flos": 7.69643703046162e+18, |
|
"train_loss": 0.4810273270938814, |
|
"train_runtime": 53143.3355, |
|
"train_samples_per_second": 1.869, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 776, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 7.69643703046162e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|