{ "best_metric": 0.19424556195735931, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.02146037877568539, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010730189387842696, "eval_loss": 1.2311546802520752, "eval_runtime": 231.2461, "eval_samples_per_second": 16.969, "eval_steps_per_second": 4.242, "step": 1 }, { "epoch": 0.0010730189387842695, "grad_norm": 11.073667526245117, "learning_rate": 4.2000000000000004e-05, "loss": 0.8668, "step": 10 }, { "epoch": 0.002146037877568539, "grad_norm": 4.785333633422852, "learning_rate": 8.400000000000001e-05, "loss": 0.2644, "step": 20 }, { "epoch": 0.003219056816352809, "grad_norm": 7.615594387054443, "learning_rate": 0.000126, "loss": 0.2329, "step": 30 }, { "epoch": 0.004292075755137078, "grad_norm": 12.958745002746582, "learning_rate": 0.00016800000000000002, "loss": 0.2349, "step": 40 }, { "epoch": 0.005365094693921347, "grad_norm": 12.965938568115234, "learning_rate": 0.00021, "loss": 0.2444, "step": 50 }, { "epoch": 0.005365094693921347, "eval_loss": 0.19424556195735931, "eval_runtime": 231.1605, "eval_samples_per_second": 16.975, "eval_steps_per_second": 4.244, "step": 50 }, { "epoch": 0.006438113632705618, "grad_norm": 7.702192783355713, "learning_rate": 0.00020974422527728155, "loss": 0.4518, "step": 60 }, { "epoch": 0.007511132571489887, "grad_norm": 7.632840156555176, "learning_rate": 0.0002089781472178649, "loss": 0.253, "step": 70 }, { "epoch": 0.008584151510274156, "grad_norm": 16.95334243774414, "learning_rate": 0.0002077054980770496, "loss": 0.2136, "step": 80 }, { "epoch": 0.009657170449058426, "grad_norm": 3.675330638885498, "learning_rate": 0.00020593247807352348, "loss": 0.2567, "step": 90 }, { "epoch": 0.010730189387842695, "grad_norm": 28.563945770263672, "learning_rate": 0.00020366772518252038, "loss": 0.6538, "step": 100 }, { "epoch": 0.010730189387842695, "eval_loss": 0.9097299575805664, "eval_runtime": 230.7518, "eval_samples_per_second": 17.005, "eval_steps_per_second": 4.251, "step": 100 }, { "epoch": 0.011803208326626964, "grad_norm": 6.406527996063232, "learning_rate": 0.0002009222730524731, "loss": 0.7567, "step": 110 }, { "epoch": 0.012876227265411235, "grad_norm": 20.16675567626953, "learning_rate": 0.00019770949725018733, "loss": 0.3424, "step": 120 }, { "epoch": 0.013949246204195504, "grad_norm": 63.08838653564453, "learning_rate": 0.00019404505009642473, "loss": 0.4155, "step": 130 }, { "epoch": 0.015022265142979774, "grad_norm": 7.741647720336914, "learning_rate": 0.0001899467844093695, "loss": 0.3971, "step": 140 }, { "epoch": 0.016095284081764045, "grad_norm": 160.4084930419922, "learning_rate": 0.00018543466652749268, "loss": 0.5994, "step": 150 }, { "epoch": 0.016095284081764045, "eval_loss": 0.6185657382011414, "eval_runtime": 230.7161, "eval_samples_per_second": 17.008, "eval_steps_per_second": 4.252, "step": 150 }, { "epoch": 0.017168303020548312, "grad_norm": 7.015183925628662, "learning_rate": 0.00018053067903555837, "loss": 1.3173, "step": 160 }, { "epoch": 0.018241321959332583, "grad_norm": 12.889235496520996, "learning_rate": 0.00017525871366768012, "loss": 0.228, "step": 170 }, { "epoch": 0.01931434089811685, "grad_norm": 8.948780059814453, "learning_rate": 0.00016964445490919413, "loss": 0.3793, "step": 180 }, { "epoch": 0.020387359836901122, "grad_norm": 10.446616172790527, "learning_rate": 0.00016371525486442843, "loss": 0.3429, "step": 190 }, { "epoch": 0.02146037877568539, "grad_norm": 33.06787872314453, "learning_rate": 0.0001575, "loss": 0.3998, "step": 200 }, { "epoch": 0.02146037877568539, "eval_loss": 0.20314748585224152, "eval_runtime": 230.9297, "eval_samples_per_second": 16.992, "eval_steps_per_second": 4.248, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5865033455960064e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }