{ "best_metric": 1.0962358713150024, "best_model_checkpoint": "distilbert_lda_50_v1_mnli/checkpoint-13806", "epoch": 14.0, "eval_steps": 500, "global_step": 21476, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.02029508352279663, "learning_rate": 0.00098, "loss": 1.1024, "step": 1534 }, { "epoch": 1.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0992883443832397, "eval_runtime": 4.1317, "eval_samples_per_second": 2375.563, "eval_steps_per_second": 9.439, "step": 1534 }, { "epoch": 2.0, "grad_norm": 0.11782866716384888, "learning_rate": 0.00096, "loss": 1.0986, "step": 3068 }, { "epoch": 2.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0987216234207153, "eval_runtime": 4.138, "eval_samples_per_second": 2371.933, "eval_steps_per_second": 9.425, "step": 3068 }, { "epoch": 3.0, "grad_norm": 0.038195185363292694, "learning_rate": 0.00094, "loss": 1.0987, "step": 4602 }, { "epoch": 3.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0989333391189575, "eval_runtime": 4.1583, "eval_samples_per_second": 2360.317, "eval_steps_per_second": 9.379, "step": 4602 }, { "epoch": 4.0, "grad_norm": 0.062206000089645386, "learning_rate": 0.00092, "loss": 1.0986, "step": 6136 }, { "epoch": 4.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.10163414478302, "eval_runtime": 4.0968, "eval_samples_per_second": 2395.767, "eval_steps_per_second": 9.52, "step": 6136 }, { "epoch": 5.0, "grad_norm": 0.07343439757823944, "learning_rate": 0.0009000000000000001, "loss": 1.0985, "step": 7670 }, { "epoch": 5.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0988649129867554, "eval_runtime": 4.2047, "eval_samples_per_second": 2334.293, "eval_steps_per_second": 9.275, "step": 7670 }, { "epoch": 6.0, "grad_norm": 0.047856856137514114, "learning_rate": 0.00088, "loss": 1.0987, "step": 9204 }, { "epoch": 6.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0988649129867554, "eval_runtime": 4.1454, "eval_samples_per_second": 2367.71, "eval_steps_per_second": 9.408, "step": 9204 }, { "epoch": 7.0, "grad_norm": 0.0791020318865776, "learning_rate": 0.00086, "loss": 1.0985, "step": 10738 }, { "epoch": 7.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0968025922775269, "eval_runtime": 4.0855, "eval_samples_per_second": 2402.388, "eval_steps_per_second": 9.546, "step": 10738 }, { "epoch": 8.0, "grad_norm": 0.016667626798152924, "learning_rate": 0.00084, "loss": 1.0986, "step": 12272 }, { "epoch": 8.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0990766286849976, "eval_runtime": 4.2035, "eval_samples_per_second": 2334.953, "eval_steps_per_second": 9.278, "step": 12272 }, { "epoch": 9.0, "grad_norm": 0.006651591043919325, "learning_rate": 0.00082, "loss": 1.0988, "step": 13806 }, { "epoch": 9.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0962358713150024, "eval_runtime": 4.1757, "eval_samples_per_second": 2350.501, "eval_steps_per_second": 9.34, "step": 13806 }, { "epoch": 10.0, "grad_norm": 0.045499037951231, "learning_rate": 0.0008, "loss": 1.0986, "step": 15340 }, { "epoch": 10.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0992168188095093, "eval_runtime": 4.1657, "eval_samples_per_second": 2356.14, "eval_steps_per_second": 9.362, "step": 15340 }, { "epoch": 11.0, "grad_norm": 0.009605828672647476, "learning_rate": 0.0007800000000000001, "loss": 1.0986, "step": 16874 }, { "epoch": 11.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0990049839019775, "eval_runtime": 4.099, "eval_samples_per_second": 2394.463, "eval_steps_per_second": 9.514, "step": 16874 }, { "epoch": 12.0, "grad_norm": 0.03646823763847351, "learning_rate": 0.00076, "loss": 1.0984, "step": 18408 }, { "epoch": 12.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0990766286849976, "eval_runtime": 4.3346, "eval_samples_per_second": 2264.34, "eval_steps_per_second": 8.997, "step": 18408 }, { "epoch": 13.0, "grad_norm": 0.09090559184551239, "learning_rate": 0.00074, "loss": 1.0985, "step": 19942 }, { "epoch": 13.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0965192317962646, "eval_runtime": 4.2309, "eval_samples_per_second": 2319.831, "eval_steps_per_second": 9.218, "step": 19942 }, { "epoch": 14.0, "grad_norm": 0.03515646234154701, "learning_rate": 0.0007199999999999999, "loss": 1.0988, "step": 21476 }, { "epoch": 14.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0987216234207153, "eval_runtime": 4.0634, "eval_samples_per_second": 2415.463, "eval_steps_per_second": 9.598, "step": 21476 }, { "epoch": 14.0, "step": 21476, "total_flos": 3.6414798065552794e+17, "train_loss": 1.098874776140008, "train_runtime": 5133.237, "train_samples_per_second": 3825.091, "train_steps_per_second": 14.942 } ], "logging_steps": 1, "max_steps": 76700, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.6414798065552794e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }