{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9980636237897649, "eval_steps": 23, "global_step": 451, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05089903181189488, "grad_norm": 0.7468199729919434, "learning_rate": 0.001825741858350554, "loss": 1.4222, "step": 23 }, { "epoch": 0.10179806362378976, "grad_norm": 0.33098104596138, "learning_rate": 0.0008451542547285166, "loss": 1.2833, "step": 46 }, { "epoch": 0.15269709543568466, "grad_norm": 0.3552784323692322, "learning_rate": 0.0006262242910851496, "loss": 1.095, "step": 69 }, { "epoch": 0.20359612724757953, "grad_norm": 0.32930678129196167, "learning_rate": 0.0005198752449100363, "loss": 1.0648, "step": 92 }, { "epoch": 0.2544951590594744, "grad_norm": 0.32375234365463257, "learning_rate": 0.0004540766091864998, "loss": 1.0184, "step": 115 }, { "epoch": 0.3053941908713693, "grad_norm": 0.37444040179252625, "learning_rate": 0.0004082482904638631, "loss": 1.0037, "step": 138 }, { "epoch": 0.3562932226832642, "grad_norm": 0.398503839969635, "learning_rate": 0.00037397879600338285, "loss": 0.9613, "step": 161 }, { "epoch": 0.40719225449515906, "grad_norm": 0.36333534121513367, "learning_rate": 0.00034710506725031166, "loss": 0.9395, "step": 184 }, { "epoch": 0.4580912863070539, "grad_norm": 0.3362521231174469, "learning_rate": 0.0003253000243161777, "loss": 0.929, "step": 207 }, { "epoch": 0.5089903181189488, "grad_norm": 0.3286592960357666, "learning_rate": 0.0003071475584169756, "loss": 0.9067, "step": 230 }, { "epoch": 0.5598893499308437, "grad_norm": 0.37335479259490967, "learning_rate": 0.0002917299829957891, "loss": 0.8955, "step": 253 }, { "epoch": 0.6107883817427386, "grad_norm": 0.3964427411556244, "learning_rate": 0.0002784230231948523, "loss": 0.8665, "step": 276 }, { "epoch": 0.6616874135546335, "grad_norm": 0.39572906494140625, "learning_rate": 0.0002667852642561041, "loss": 0.8622, "step": 299 }, { "epoch": 0.7125864453665284, "grad_norm": 0.4889402389526367, "learning_rate": 0.0002564945880212886, "loss": 0.8716, "step": 322 }, { "epoch": 0.7634854771784232, "grad_norm": 0.3482915163040161, "learning_rate": 0.00024730968341474897, "loss": 0.8326, "step": 345 }, { "epoch": 0.8143845089903181, "grad_norm": 0.4124608337879181, "learning_rate": 0.00023904572186687873, "loss": 0.8199, "step": 368 }, { "epoch": 0.865283540802213, "grad_norm": 0.3616255819797516, "learning_rate": 0.00023155842232374464, "loss": 0.8103, "step": 391 }, { "epoch": 0.9161825726141078, "grad_norm": 0.42308276891708374, "learning_rate": 0.00022473328748774736, "loss": 0.8067, "step": 414 }, { "epoch": 0.9670816044260028, "grad_norm": 0.4363687038421631, "learning_rate": 0.00021847813825958586, "loss": 0.8163, "step": 437 }, { "epoch": 0.9980636237897649, "step": 451, "total_flos": 4.580565166436909e+18, "train_loss": 0.9513344627261955, "train_runtime": 3338.9072, "train_samples_per_second": 17.323, "train_steps_per_second": 0.135 } ], "logging_steps": 23, "max_steps": 451, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 23, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.580565166436909e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }