{ "best_metric": 0.24638938903808594, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.27155465037338766, "eval_steps": 50, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0036207286716451684, "grad_norm": 0.4402937889099121, "learning_rate": 5e-06, "loss": 0.4127, "step": 1 }, { "epoch": 0.0036207286716451684, "eval_loss": 0.39411357045173645, "eval_runtime": 233.1042, "eval_samples_per_second": 3.994, "eval_steps_per_second": 1.0, "step": 1 }, { "epoch": 0.007241457343290337, "grad_norm": 0.43906018137931824, "learning_rate": 1e-05, "loss": 0.3877, "step": 2 }, { "epoch": 0.010862186014935505, "grad_norm": 0.43620216846466064, "learning_rate": 1.5e-05, "loss": 0.3505, "step": 3 }, { "epoch": 0.014482914686580674, "grad_norm": 0.4520662724971771, "learning_rate": 2e-05, "loss": 0.4246, "step": 4 }, { "epoch": 0.018103643358225844, "grad_norm": 0.3887633979320526, "learning_rate": 2.5e-05, "loss": 0.3654, "step": 5 }, { "epoch": 0.02172437202987101, "grad_norm": 0.32681211829185486, "learning_rate": 3e-05, "loss": 0.3817, "step": 6 }, { "epoch": 0.02534510070151618, "grad_norm": 0.30532073974609375, "learning_rate": 3.5e-05, "loss": 0.3417, "step": 7 }, { "epoch": 0.028965829373161348, "grad_norm": 0.2958042621612549, "learning_rate": 4e-05, "loss": 0.3413, "step": 8 }, { "epoch": 0.032586558044806514, "grad_norm": 0.25813692808151245, "learning_rate": 4.5e-05, "loss": 0.3232, "step": 9 }, { "epoch": 0.03620728671645169, "grad_norm": 0.27644163370132446, "learning_rate": 5e-05, "loss": 0.3526, "step": 10 }, { "epoch": 0.039828015388096855, "grad_norm": 0.25497642159461975, "learning_rate": 5.500000000000001e-05, "loss": 0.2976, "step": 11 }, { "epoch": 0.04344874405974202, "grad_norm": 0.26926350593566895, "learning_rate": 6e-05, "loss": 0.2951, "step": 12 }, { "epoch": 0.047069472731387195, "grad_norm": 0.2823635935783386, "learning_rate": 6.500000000000001e-05, "loss": 0.3029, "step": 13 }, { "epoch": 0.05069020140303236, "grad_norm": 0.19785167276859283, "learning_rate": 7e-05, "loss": 0.2589, "step": 14 }, { "epoch": 0.05431093007467753, "grad_norm": 0.19602355360984802, "learning_rate": 7.500000000000001e-05, "loss": 0.2614, "step": 15 }, { "epoch": 0.057931658746322695, "grad_norm": 0.18251068890094757, "learning_rate": 8e-05, "loss": 0.275, "step": 16 }, { "epoch": 0.06155238741796787, "grad_norm": 0.21483223140239716, "learning_rate": 8.5e-05, "loss": 0.3278, "step": 17 }, { "epoch": 0.06517311608961303, "grad_norm": 0.19363826513290405, "learning_rate": 9e-05, "loss": 0.2633, "step": 18 }, { "epoch": 0.0687938447612582, "grad_norm": 0.17659173905849457, "learning_rate": 9.5e-05, "loss": 0.2504, "step": 19 }, { "epoch": 0.07241457343290338, "grad_norm": 0.20334695279598236, "learning_rate": 0.0001, "loss": 0.3023, "step": 20 }, { "epoch": 0.07603530210454854, "grad_norm": 0.18668325245380402, "learning_rate": 9.991845519630678e-05, "loss": 0.2935, "step": 21 }, { "epoch": 0.07965603077619371, "grad_norm": 0.1616354137659073, "learning_rate": 9.967408676742751e-05, "loss": 0.256, "step": 22 }, { "epoch": 0.08327675944783888, "grad_norm": 0.16258352994918823, "learning_rate": 9.926769179238466e-05, "loss": 0.2442, "step": 23 }, { "epoch": 0.08689748811948404, "grad_norm": 0.17701072990894318, "learning_rate": 9.870059584711668e-05, "loss": 0.2841, "step": 24 }, { "epoch": 0.09051821679112922, "grad_norm": 0.16547025740146637, "learning_rate": 9.797464868072488e-05, "loss": 0.2709, "step": 25 }, { "epoch": 0.09413894546277439, "grad_norm": 0.17125892639160156, "learning_rate": 9.709221818197624e-05, "loss": 0.2884, "step": 26 }, { "epoch": 0.09775967413441955, "grad_norm": 0.16323907673358917, "learning_rate": 9.60561826557425e-05, "loss": 0.2505, "step": 27 }, { "epoch": 0.10138040280606472, "grad_norm": 0.16210584342479706, "learning_rate": 9.486992143456792e-05, "loss": 0.2708, "step": 28 }, { "epoch": 0.10500113147770988, "grad_norm": 0.1565285623073578, "learning_rate": 9.353730385598887e-05, "loss": 0.2688, "step": 29 }, { "epoch": 0.10862186014935506, "grad_norm": 0.16366994380950928, "learning_rate": 9.206267664155907e-05, "loss": 0.2684, "step": 30 }, { "epoch": 0.11224258882100023, "grad_norm": 0.16400501132011414, "learning_rate": 9.045084971874738e-05, "loss": 0.2653, "step": 31 }, { "epoch": 0.11586331749264539, "grad_norm": 0.1745917946100235, "learning_rate": 8.870708053195413e-05, "loss": 0.292, "step": 32 }, { "epoch": 0.11948404616429056, "grad_norm": 0.16044358909130096, "learning_rate": 8.683705689382024e-05, "loss": 0.244, "step": 33 }, { "epoch": 0.12310477483593574, "grad_norm": 0.14699938893318176, "learning_rate": 8.484687843276469e-05, "loss": 0.2351, "step": 34 }, { "epoch": 0.1267255035075809, "grad_norm": 0.16715097427368164, "learning_rate": 8.274303669726426e-05, "loss": 0.2797, "step": 35 }, { "epoch": 0.13034623217922606, "grad_norm": 0.16253520548343658, "learning_rate": 8.053239398177191e-05, "loss": 0.2663, "step": 36 }, { "epoch": 0.13396696085087123, "grad_norm": 0.15796582400798798, "learning_rate": 7.822216094333847e-05, "loss": 0.2547, "step": 37 }, { "epoch": 0.1375876895225164, "grad_norm": 0.1650385856628418, "learning_rate": 7.58198730819481e-05, "loss": 0.2821, "step": 38 }, { "epoch": 0.14120841819416158, "grad_norm": 0.14839529991149902, "learning_rate": 7.333336616128369e-05, "loss": 0.2406, "step": 39 }, { "epoch": 0.14482914686580675, "grad_norm": 0.14774177968502045, "learning_rate": 7.077075065009433e-05, "loss": 0.2531, "step": 40 }, { "epoch": 0.14844987553745193, "grad_norm": 0.16159765422344208, "learning_rate": 6.814038526753205e-05, "loss": 0.2816, "step": 41 }, { "epoch": 0.15207060420909707, "grad_norm": 0.15454764664173126, "learning_rate": 6.545084971874738e-05, "loss": 0.2643, "step": 42 }, { "epoch": 0.15569133288074224, "grad_norm": 0.15375515818595886, "learning_rate": 6.271091670967436e-05, "loss": 0.2524, "step": 43 }, { "epoch": 0.15931206155238742, "grad_norm": 0.15904226899147034, "learning_rate": 5.992952333228728e-05, "loss": 0.2604, "step": 44 }, { "epoch": 0.1629327902240326, "grad_norm": 0.14393344521522522, "learning_rate": 5.7115741913664264e-05, "loss": 0.243, "step": 45 }, { "epoch": 0.16655351889567777, "grad_norm": 0.14154766499996185, "learning_rate": 5.427875042394199e-05, "loss": 0.2498, "step": 46 }, { "epoch": 0.1701742475673229, "grad_norm": 0.15560780465602875, "learning_rate": 5.142780253968481e-05, "loss": 0.2599, "step": 47 }, { "epoch": 0.17379497623896809, "grad_norm": 0.16247297823429108, "learning_rate": 4.85721974603152e-05, "loss": 0.2789, "step": 48 }, { "epoch": 0.17741570491061326, "grad_norm": 0.15790365636348724, "learning_rate": 4.5721249576058027e-05, "loss": 0.2529, "step": 49 }, { "epoch": 0.18103643358225843, "grad_norm": 0.1570451706647873, "learning_rate": 4.288425808633575e-05, "loss": 0.2401, "step": 50 }, { "epoch": 0.18103643358225843, "eval_loss": 0.24638938903808594, "eval_runtime": 233.4655, "eval_samples_per_second": 3.988, "eval_steps_per_second": 0.998, "step": 50 }, { "epoch": 0.1846571622539036, "grad_norm": 0.1534356325864792, "learning_rate": 4.007047666771274e-05, "loss": 0.2336, "step": 51 }, { "epoch": 0.18827789092554878, "grad_norm": 0.1455630511045456, "learning_rate": 3.728908329032567e-05, "loss": 0.2266, "step": 52 }, { "epoch": 0.19189861959719393, "grad_norm": 0.15997833013534546, "learning_rate": 3.4549150281252636e-05, "loss": 0.2716, "step": 53 }, { "epoch": 0.1955193482688391, "grad_norm": 0.16611649096012115, "learning_rate": 3.1859614732467954e-05, "loss": 0.2594, "step": 54 }, { "epoch": 0.19914007694048427, "grad_norm": 0.14760734140872955, "learning_rate": 2.9229249349905684e-05, "loss": 0.2265, "step": 55 }, { "epoch": 0.20276080561212945, "grad_norm": 0.14981094002723694, "learning_rate": 2.6666633838716314e-05, "loss": 0.2602, "step": 56 }, { "epoch": 0.20638153428377462, "grad_norm": 0.15253275632858276, "learning_rate": 2.418012691805191e-05, "loss": 0.2395, "step": 57 }, { "epoch": 0.21000226295541977, "grad_norm": 0.14788293838500977, "learning_rate": 2.1777839056661554e-05, "loss": 0.2445, "step": 58 }, { "epoch": 0.21362299162706494, "grad_norm": 0.14573223888874054, "learning_rate": 1.946760601822809e-05, "loss": 0.2352, "step": 59 }, { "epoch": 0.2172437202987101, "grad_norm": 0.1522066593170166, "learning_rate": 1.725696330273575e-05, "loss": 0.2466, "step": 60 }, { "epoch": 0.2208644489703553, "grad_norm": 0.15221665799617767, "learning_rate": 1.5153121567235335e-05, "loss": 0.2667, "step": 61 }, { "epoch": 0.22448517764200046, "grad_norm": 0.15091469883918762, "learning_rate": 1.3162943106179749e-05, "loss": 0.2431, "step": 62 }, { "epoch": 0.22810590631364563, "grad_norm": 0.14665627479553223, "learning_rate": 1.1292919468045877e-05, "loss": 0.2503, "step": 63 }, { "epoch": 0.23172663498529078, "grad_norm": 0.1365693211555481, "learning_rate": 9.549150281252633e-06, "loss": 0.243, "step": 64 }, { "epoch": 0.23534736365693595, "grad_norm": 0.14656876027584076, "learning_rate": 7.937323358440935e-06, "loss": 0.2269, "step": 65 }, { "epoch": 0.23896809232858113, "grad_norm": 0.150408074259758, "learning_rate": 6.462696144011149e-06, "loss": 0.2334, "step": 66 }, { "epoch": 0.2425888210002263, "grad_norm": 0.1671358346939087, "learning_rate": 5.13007856543209e-06, "loss": 0.2789, "step": 67 }, { "epoch": 0.24620954967187147, "grad_norm": 0.15173529088497162, "learning_rate": 3.9438173442575e-06, "loss": 0.2539, "step": 68 }, { "epoch": 0.24983027834351662, "grad_norm": 0.13886642456054688, "learning_rate": 2.9077818180237693e-06, "loss": 0.2293, "step": 69 }, { "epoch": 0.2534510070151618, "grad_norm": 0.14482015371322632, "learning_rate": 2.0253513192751373e-06, "loss": 0.2467, "step": 70 }, { "epoch": 0.25707173568680697, "grad_norm": 0.14723734557628632, "learning_rate": 1.2994041528833266e-06, "loss": 0.2377, "step": 71 }, { "epoch": 0.2606924643584521, "grad_norm": 0.16239996254444122, "learning_rate": 7.323082076153509e-07, "loss": 0.2581, "step": 72 }, { "epoch": 0.2643131930300973, "grad_norm": 0.14651106297969818, "learning_rate": 3.2591323257248893e-07, "loss": 0.2307, "step": 73 }, { "epoch": 0.26793392170174246, "grad_norm": 0.15978926420211792, "learning_rate": 8.15448036932176e-08, "loss": 0.2613, "step": 74 }, { "epoch": 0.27155465037338766, "grad_norm": 0.1479509025812149, "learning_rate": 0.0, "loss": 0.2537, "step": 75 } ], "logging_steps": 1, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.815322950521324e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }