{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9454545454545453, "eval_steps": 500, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "loss": 1.1464, "step": 1 }, { "epoch": 0.07, "learning_rate": 4.000000000000001e-06, "loss": 1.1231, "step": 2 }, { "epoch": 0.11, "learning_rate": 6e-06, "loss": 1.0741, "step": 3 }, { "epoch": 0.15, "learning_rate": 8.000000000000001e-06, "loss": 1.0219, "step": 4 }, { "epoch": 0.18, "learning_rate": 1e-05, "loss": 1.0006, "step": 5 }, { "epoch": 0.22, "learning_rate": 1.2e-05, "loss": 0.8375, "step": 6 }, { "epoch": 0.25, "learning_rate": 1.4e-05, "loss": 0.7492, "step": 7 }, { "epoch": 0.29, "learning_rate": 1.6000000000000003e-05, "loss": 0.6305, "step": 8 }, { "epoch": 0.33, "learning_rate": 1.8e-05, "loss": 0.5905, "step": 9 }, { "epoch": 0.36, "learning_rate": 2e-05, "loss": 0.5033, "step": 10 }, { "epoch": 0.4, "learning_rate": 1.9990212265199738e-05, "loss": 0.4194, "step": 11 }, { "epoch": 0.44, "learning_rate": 1.996086822074945e-05, "loss": 0.3283, "step": 12 }, { "epoch": 0.47, "learning_rate": 1.9912025308994146e-05, "loss": 0.2703, "step": 13 }, { "epoch": 0.51, "learning_rate": 1.9843779142227258e-05, "loss": 0.2523, "step": 14 }, { "epoch": 0.55, "learning_rate": 1.975626331552507e-05, "loss": 0.2421, "step": 15 }, { "epoch": 0.58, "learning_rate": 1.96496491452281e-05, "loss": 0.2092, "step": 16 }, { "epoch": 0.62, "learning_rate": 1.9524145333581315e-05, "loss": 0.1988, "step": 17 }, { "epoch": 0.65, "learning_rate": 1.9379997560189677e-05, "loss": 0.1909, "step": 18 }, { "epoch": 0.69, "learning_rate": 1.9217488001088784e-05, "loss": 0.1854, "step": 19 }, { "epoch": 0.73, "learning_rate": 1.903693477637204e-05, "loss": 0.1908, "step": 20 }, { "epoch": 0.76, "learning_rate": 1.883869132745561e-05, "loss": 0.1873, "step": 21 }, { "epoch": 0.8, "learning_rate": 1.862314572520028e-05, "loss": 0.1706, "step": 22 }, { "epoch": 0.84, "learning_rate": 1.8390719910244487e-05, "loss": 0.175, "step": 23 }, { "epoch": 0.87, "learning_rate": 1.8141868867035745e-05, "loss": 0.1628, "step": 24 }, { "epoch": 0.91, "learning_rate": 1.7877079733177185e-05, "loss": 0.1604, "step": 25 }, { "epoch": 0.95, "learning_rate": 1.759687084583285e-05, "loss": 0.1537, "step": 26 }, { "epoch": 0.98, "learning_rate": 1.7301790727058344e-05, "loss": 0.1493, "step": 27 }, { "epoch": 1.02, "learning_rate": 1.6992417010043144e-05, "loss": 0.1396, "step": 28 }, { "epoch": 1.05, "learning_rate": 1.666935530836651e-05, "loss": 0.1569, "step": 29 }, { "epoch": 1.09, "learning_rate": 1.6333238030480473e-05, "loss": 0.151, "step": 30 }, { "epoch": 1.13, "learning_rate": 1.5984723141740578e-05, "loss": 0.1426, "step": 31 }, { "epoch": 1.16, "learning_rate": 1.562449287640781e-05, "loss": 0.1285, "step": 32 }, { "epoch": 1.2, "learning_rate": 1.5253252402142989e-05, "loss": 0.1379, "step": 33 }, { "epoch": 1.24, "learning_rate": 1.4871728439607967e-05, "loss": 0.1275, "step": 34 }, { "epoch": 1.27, "learning_rate": 1.4480667839875786e-05, "loss": 0.1377, "step": 35 }, { "epoch": 1.31, "learning_rate": 1.408083612243465e-05, "loss": 0.1316, "step": 36 }, { "epoch": 1.35, "learning_rate": 1.367301597664757e-05, "loss": 0.1338, "step": 37 }, { "epoch": 1.38, "learning_rate": 1.3258005729601178e-05, "loss": 0.1201, "step": 38 }, { "epoch": 1.42, "learning_rate": 1.2836617783342968e-05, "loss": 0.1174, "step": 39 }, { "epoch": 1.45, "learning_rate": 1.2409677024566145e-05, "loss": 0.1283, "step": 40 }, { "epoch": 1.49, "learning_rate": 1.1978019209855174e-05, "loss": 0.1332, "step": 41 }, { "epoch": 1.53, "learning_rate": 1.1542489329653024e-05, "loss": 0.1157, "step": 42 }, { "epoch": 1.56, "learning_rate": 1.11039399541527e-05, "loss": 0.1339, "step": 43 }, { "epoch": 1.6, "learning_rate": 1.066322956435104e-05, "loss": 0.1149, "step": 44 }, { "epoch": 1.64, "learning_rate": 1.022122087153187e-05, "loss": 0.1129, "step": 45 }, { "epoch": 1.67, "learning_rate": 9.778779128468133e-06, "loss": 0.1161, "step": 46 }, { "epoch": 1.71, "learning_rate": 9.336770435648963e-06, "loss": 0.1238, "step": 47 }, { "epoch": 1.75, "learning_rate": 8.896060045847305e-06, "loss": 0.1132, "step": 48 }, { "epoch": 1.78, "learning_rate": 8.457510670346976e-06, "loss": 0.1172, "step": 49 }, { "epoch": 1.82, "learning_rate": 8.021980790144828e-06, "loss": 0.1228, "step": 50 }, { "epoch": 1.85, "learning_rate": 7.590322975433857e-06, "loss": 0.1177, "step": 51 }, { "epoch": 1.89, "learning_rate": 7.163382216657033e-06, "loss": 0.1318, "step": 52 }, { "epoch": 1.93, "learning_rate": 6.741994270398826e-06, "loss": 0.111, "step": 53 }, { "epoch": 1.96, "learning_rate": 6.326984023352435e-06, "loss": 0.113, "step": 54 }, { "epoch": 2.0, "learning_rate": 5.919163877565351e-06, "loss": 0.1157, "step": 55 }, { "epoch": 2.04, "learning_rate": 5.519332160124215e-06, "loss": 0.1044, "step": 56 }, { "epoch": 2.07, "learning_rate": 5.128271560392037e-06, "loss": 0.1079, "step": 57 }, { "epoch": 2.11, "learning_rate": 4.746747597857014e-06, "loss": 0.1172, "step": 58 }, { "epoch": 2.15, "learning_rate": 4.375507123592194e-06, "loss": 0.1111, "step": 59 }, { "epoch": 2.18, "learning_rate": 4.015276858259427e-06, "loss": 0.1158, "step": 60 }, { "epoch": 2.22, "learning_rate": 3.6667619695195287e-06, "loss": 0.114, "step": 61 }, { "epoch": 2.25, "learning_rate": 3.330644691633492e-06, "loss": 0.1063, "step": 62 }, { "epoch": 2.29, "learning_rate": 3.00758298995686e-06, "loss": 0.1101, "step": 63 }, { "epoch": 2.33, "learning_rate": 2.698209272941659e-06, "loss": 0.1105, "step": 64 }, { "epoch": 2.36, "learning_rate": 2.403129154167153e-06, "loss": 0.1094, "step": 65 }, { "epoch": 2.4, "learning_rate": 2.1229202668228197e-06, "loss": 0.1153, "step": 66 }, { "epoch": 2.44, "learning_rate": 1.8581311329642592e-06, "loss": 0.1258, "step": 67 }, { "epoch": 2.47, "learning_rate": 1.609280089755515e-06, "loss": 0.1177, "step": 68 }, { "epoch": 2.51, "learning_rate": 1.3768542747997215e-06, "loss": 0.1179, "step": 69 }, { "epoch": 2.55, "learning_rate": 1.161308672544389e-06, "loss": 0.1188, "step": 70 }, { "epoch": 2.58, "learning_rate": 9.630652236279626e-07, "loss": 0.108, "step": 71 }, { "epoch": 2.62, "learning_rate": 7.825119989112173e-07, "loss": 0.1122, "step": 72 }, { "epoch": 2.65, "learning_rate": 6.200024398103255e-07, "loss": 0.1125, "step": 73 }, { "epoch": 2.69, "learning_rate": 4.7585466641868696e-07, "loss": 0.1204, "step": 74 }, { "epoch": 2.73, "learning_rate": 3.5035085477190143e-07, "loss": 0.1185, "step": 75 }, { "epoch": 2.76, "learning_rate": 2.4373668447493225e-07, "loss": 0.1059, "step": 76 }, { "epoch": 2.8, "learning_rate": 1.562208577727442e-07, "loss": 0.1061, "step": 77 }, { "epoch": 2.84, "learning_rate": 8.797469100585432e-08, "loss": 0.1147, "step": 78 }, { "epoch": 2.87, "learning_rate": 3.913177925055189e-08, "loss": 0.1114, "step": 79 }, { "epoch": 2.91, "learning_rate": 9.78773480026396e-09, "loss": 0.1119, "step": 80 }, { "epoch": 2.95, "learning_rate": 0.0, "loss": 0.1178, "step": 81 }, { "epoch": 2.95, "step": 81, "total_flos": 5.979626147309158e+16, "train_loss": 0.23224136665647413, "train_runtime": 1011.6456, "train_samples_per_second": 20.758, "train_steps_per_second": 0.08 } ], "logging_steps": 1.0, "max_steps": 81, "num_train_epochs": 3, "save_steps": 100, "total_flos": 5.979626147309158e+16, "trial_name": null, "trial_params": null }