{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.177777777777778, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4444444444444444, "grad_norm": 1.9879343509674072, "learning_rate": 0.0002, "loss": 2.2707, "mean_token_accuracy": 0.5294100181649257, "step": 5, "timestamp_in_seconds": 1739766577.8310535 }, { "epoch": 0.8888888888888888, "grad_norm": 1.009445309638977, "learning_rate": 0.00019047619047619048, "loss": 2.0272, "mean_token_accuracy": 0.5268474393109208, "step": 10, "timestamp_in_seconds": 1739766593.809831 }, { "contract_score": 0.5466966775887976, "epoch": 0.8888888888888888, "eval_loss": 1.8623384237289429, "eval_mean_token_accuracy": 0.5099889012208657, "eval_runtime": 1.2647, "eval_samples_per_second": 7.907, "eval_steps_per_second": 1.581, "step": 10, "timestamp_in_seconds": 1739766613.2455263 }, { "epoch": 1.2666666666666666, "grad_norm": 0.8467594981193542, "learning_rate": 0.00018095238095238095, "loss": 1.8597, "mean_token_accuracy": 0.5747608879576899, "step": 15, "timestamp_in_seconds": 1739766627.466834 }, { "epoch": 1.7111111111111112, "grad_norm": 1.00863516330719, "learning_rate": 0.00017142857142857143, "loss": 1.5547, "mean_token_accuracy": 0.609207512410729, "step": 20, "timestamp_in_seconds": 1739766643.4715047 }, { "contract_score": 0.517531061316395, "epoch": 1.7111111111111112, "eval_loss": 1.909725546836853, "eval_mean_token_accuracy": 0.5060673325934147, "eval_runtime": 1.2694, "eval_samples_per_second": 7.878, "eval_steps_per_second": 1.576, "step": 20, "timestamp_in_seconds": 1739766662.6723397 }, { "epoch": 2.088888888888889, "grad_norm": 1.0704385042190552, "learning_rate": 0.00016190476190476192, "loss": 1.4427, "mean_token_accuracy": 0.6196816753107063, "step": 25, "timestamp_in_seconds": 1739766676.269455 }, { "epoch": 2.533333333333333, "grad_norm": 1.5208722352981567, "learning_rate": 0.00015238095238095237, "loss": 1.1007, "mean_token_accuracy": 0.690360556370996, "step": 30, "timestamp_in_seconds": 1739766691.5902622 }, { "contract_score": 0.5104592473887242, "epoch": 2.533333333333333, "eval_loss": 2.0625336170196533, "eval_mean_token_accuracy": 0.48486866444691085, "eval_runtime": 1.284, "eval_samples_per_second": 7.788, "eval_steps_per_second": 1.558, "step": 30, "timestamp_in_seconds": 1739766712.605668 }, { "epoch": 2.977777777777778, "grad_norm": 2.2304763793945312, "learning_rate": 0.00014285714285714287, "loss": 1.0283, "mean_token_accuracy": 0.7108713038621913, "step": 35, "timestamp_in_seconds": 1739766729.188164 }, { "epoch": 3.3555555555555556, "grad_norm": 1.914406180381775, "learning_rate": 0.00013333333333333334, "loss": 0.5798, "mean_token_accuracy": 0.8472582884142912, "step": 40, "timestamp_in_seconds": 1739766742.8015592 }, { "contract_score": 0.5328695250138283, "epoch": 3.3555555555555556, "eval_loss": 2.535480499267578, "eval_mean_token_accuracy": 0.49589345172031074, "eval_runtime": 1.2706, "eval_samples_per_second": 7.87, "eval_steps_per_second": 1.574, "step": 40, "timestamp_in_seconds": 1739766762.4698246 }, { "epoch": 3.8, "grad_norm": 3.442265272140503, "learning_rate": 0.0001238095238095238, "loss": 0.4816, "mean_token_accuracy": 0.8550400784948758, "step": 45, "timestamp_in_seconds": 1739766778.3630936 }, { "epoch": 4.177777777777778, "grad_norm": 1.0488659143447876, "learning_rate": 0.00011428571428571428, "loss": 0.2982, "mean_token_accuracy": 0.9270337260594542, "step": 50, "timestamp_in_seconds": 1739766791.7478123 }, { "epoch": 4.177777777777778, "eval_loss": 2.858454704284668, "eval_mean_token_accuracy": 0.49822419533851275, "eval_runtime": 1.2915, "eval_samples_per_second": 7.743, "eval_steps_per_second": 1.549, "step": 50 } ], "logging_steps": 5, "max_steps": 110, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7860274034343936.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }