{ "best_metric": 0.3607921898365021, "best_model_checkpoint": "bert_tiny_lda_50_v1_book_sst2/checkpoint-264", "epoch": 6.0, "eval_steps": 500, "global_step": 1584, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 9.187464714050293, "learning_rate": 4.9e-05, "loss": 0.3652, "step": 264 }, { "epoch": 1.0, "eval_accuracy": 0.8532110091743119, "eval_loss": 0.3607921898365021, "eval_runtime": 0.2732, "eval_samples_per_second": 3191.99, "eval_steps_per_second": 14.642, "step": 264 }, { "epoch": 2.0, "grad_norm": 7.1309661865234375, "learning_rate": 4.8e-05, "loss": 0.2205, "step": 528 }, { "epoch": 2.0, "eval_accuracy": 0.8635321100917431, "eval_loss": 0.37138402462005615, "eval_runtime": 0.2803, "eval_samples_per_second": 3110.469, "eval_steps_per_second": 14.268, "step": 528 }, { "epoch": 3.0, "grad_norm": 9.429926872253418, "learning_rate": 4.7e-05, "loss": 0.1577, "step": 792 }, { "epoch": 3.0, "eval_accuracy": 0.8497706422018348, "eval_loss": 0.4074440002441406, "eval_runtime": 0.291, "eval_samples_per_second": 2996.383, "eval_steps_per_second": 13.745, "step": 792 }, { "epoch": 4.0, "grad_norm": 8.587671279907227, "learning_rate": 4.600000000000001e-05, "loss": 0.1198, "step": 1056 }, { "epoch": 4.0, "eval_accuracy": 0.8658256880733946, "eval_loss": 0.38508209586143494, "eval_runtime": 0.3002, "eval_samples_per_second": 2904.627, "eval_steps_per_second": 13.324, "step": 1056 }, { "epoch": 5.0, "grad_norm": 4.722463607788086, "learning_rate": 4.5e-05, "loss": 0.0932, "step": 1320 }, { "epoch": 5.0, "eval_accuracy": 0.8704128440366973, "eval_loss": 0.4103902280330658, "eval_runtime": 0.2754, "eval_samples_per_second": 3166.269, "eval_steps_per_second": 14.524, "step": 1320 }, { "epoch": 6.0, "grad_norm": 7.038421154022217, "learning_rate": 4.4000000000000006e-05, "loss": 0.0733, "step": 1584 }, { "epoch": 6.0, "eval_accuracy": 0.8772935779816514, "eval_loss": 0.419316828250885, "eval_runtime": 0.3016, "eval_samples_per_second": 2891.226, "eval_steps_per_second": 13.263, "step": 1584 }, { "epoch": 6.0, "step": 1584, "total_flos": 1.0596790573590528e+16, "train_loss": 0.1716307449822474, "train_runtime": 226.4089, "train_samples_per_second": 14873.314, "train_steps_per_second": 58.302 } ], "logging_steps": 1, "max_steps": 13200, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0596790573590528e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }