{ "best_metric": 0.6863997578620911, "best_model_checkpoint": "bert_base_lda_wnli/checkpoint-39", "epoch": 18.0, "eval_steps": 500, "global_step": 54, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 18.621891021728516, "learning_rate": 0.00098, "loss": 1.1638, "step": 3 }, { "epoch": 1.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 1.4607518911361694, "eval_runtime": 0.056, "eval_samples_per_second": 1268.435, "eval_steps_per_second": 17.865, "step": 3 }, { "epoch": 2.0, "grad_norm": 6.636824131011963, "learning_rate": 0.00096, "loss": 1.0198, "step": 6 }, { "epoch": 2.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 1.109704852104187, "eval_runtime": 0.0609, "eval_samples_per_second": 1164.911, "eval_steps_per_second": 16.407, "step": 6 }, { "epoch": 3.0, "grad_norm": 2.0930373668670654, "learning_rate": 0.00094, "loss": 1.1474, "step": 9 }, { "epoch": 3.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.8995377421379089, "eval_runtime": 0.0517, "eval_samples_per_second": 1372.362, "eval_steps_per_second": 19.329, "step": 9 }, { "epoch": 4.0, "grad_norm": 3.016080617904663, "learning_rate": 0.00092, "loss": 0.8846, "step": 12 }, { "epoch": 4.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.820120096206665, "eval_runtime": 0.0532, "eval_samples_per_second": 1335.808, "eval_steps_per_second": 18.814, "step": 12 }, { "epoch": 5.0, "grad_norm": 2.1296448707580566, "learning_rate": 0.0009000000000000001, "loss": 0.7886, "step": 15 }, { "epoch": 5.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.6994389295578003, "eval_runtime": 0.0527, "eval_samples_per_second": 1348.431, "eval_steps_per_second": 18.992, "step": 15 }, { "epoch": 6.0, "grad_norm": 2.4899051189422607, "learning_rate": 0.00088, "loss": 0.738, "step": 18 }, { "epoch": 6.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.708736777305603, "eval_runtime": 0.0518, "eval_samples_per_second": 1369.427, "eval_steps_per_second": 19.288, "step": 18 }, { "epoch": 7.0, "grad_norm": 0.7382462024688721, "learning_rate": 0.00086, "loss": 0.7195, "step": 21 }, { "epoch": 7.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7214460372924805, "eval_runtime": 0.0517, "eval_samples_per_second": 1372.273, "eval_steps_per_second": 19.328, "step": 21 }, { "epoch": 8.0, "grad_norm": 0.9765424132347107, "learning_rate": 0.00084, "loss": 0.7036, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.693111777305603, "eval_runtime": 0.0536, "eval_samples_per_second": 1325.279, "eval_steps_per_second": 18.666, "step": 24 }, { "epoch": 9.0, "grad_norm": 0.2757832407951355, "learning_rate": 0.00082, "loss": 0.6935, "step": 27 }, { "epoch": 9.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.689645528793335, "eval_runtime": 0.0517, "eval_samples_per_second": 1372.33, "eval_steps_per_second": 19.329, "step": 27 }, { "epoch": 10.0, "grad_norm": 0.41503846645355225, "learning_rate": 0.0008, "loss": 0.6941, "step": 30 }, { "epoch": 10.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6926164627075195, "eval_runtime": 0.0542, "eval_samples_per_second": 1310.789, "eval_steps_per_second": 18.462, "step": 30 }, { "epoch": 11.0, "grad_norm": 0.1713160276412964, "learning_rate": 0.0007800000000000001, "loss": 0.6949, "step": 33 }, { "epoch": 11.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.693606972694397, "eval_runtime": 0.0536, "eval_samples_per_second": 1325.279, "eval_steps_per_second": 18.666, "step": 33 }, { "epoch": 12.0, "grad_norm": 0.817528247833252, "learning_rate": 0.00076, "loss": 0.6959, "step": 36 }, { "epoch": 12.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6911312341690063, "eval_runtime": 0.0527, "eval_samples_per_second": 1346.784, "eval_steps_per_second": 18.969, "step": 36 }, { "epoch": 13.0, "grad_norm": 0.3534504175186157, "learning_rate": 0.00074, "loss": 0.6927, "step": 39 }, { "epoch": 13.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6863997578620911, "eval_runtime": 0.0533, "eval_samples_per_second": 1330.984, "eval_steps_per_second": 18.746, "step": 39 }, { "epoch": 14.0, "grad_norm": 0.8052321076393127, "learning_rate": 0.0007199999999999999, "loss": 0.6928, "step": 42 }, { "epoch": 14.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6893155574798584, "eval_runtime": 0.0524, "eval_samples_per_second": 1353.85, "eval_steps_per_second": 19.068, "step": 42 }, { "epoch": 15.0, "grad_norm": 0.29218751192092896, "learning_rate": 0.0007, "loss": 0.6958, "step": 45 }, { "epoch": 15.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.689645528793335, "eval_runtime": 0.0518, "eval_samples_per_second": 1371.091, "eval_steps_per_second": 19.311, "step": 45 }, { "epoch": 16.0, "grad_norm": 0.19813981652259827, "learning_rate": 0.00068, "loss": 0.6936, "step": 48 }, { "epoch": 16.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6911312341690063, "eval_runtime": 0.0523, "eval_samples_per_second": 1358.408, "eval_steps_per_second": 19.133, "step": 48 }, { "epoch": 17.0, "grad_norm": 0.6377934217453003, "learning_rate": 0.00066, "loss": 0.6955, "step": 51 }, { "epoch": 17.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6911312341690063, "eval_runtime": 0.0549, "eval_samples_per_second": 1292.678, "eval_steps_per_second": 18.207, "step": 51 }, { "epoch": 18.0, "grad_norm": 0.3209940493106842, "learning_rate": 0.00064, "loss": 0.6939, "step": 54 }, { "epoch": 18.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6906359791755676, "eval_runtime": 0.1325, "eval_samples_per_second": 535.893, "eval_steps_per_second": 7.548, "step": 54 }, { "epoch": 18.0, "step": 54, "total_flos": 1503679681382400.0, "train_loss": 0.7837636250036734, "train_runtime": 61.4069, "train_samples_per_second": 517.043, "train_steps_per_second": 2.443 } ], "logging_steps": 1, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1503679681382400.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }