{ "best_metric": 0.6848593354225159, "best_model_checkpoint": "bert_base_lda_5_wnli/checkpoint-30", "epoch": 15.0, "eval_steps": 500, "global_step": 45, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.482614517211914, "learning_rate": 0.0009666666666666667, "loss": 1.121, "step": 3 }, { "epoch": 1.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.820504903793335, "eval_runtime": 0.0522, "eval_samples_per_second": 1359.977, "eval_steps_per_second": 19.155, "step": 3 }, { "epoch": 2.0, "grad_norm": 15.61330509185791, "learning_rate": 0.0009333333333333333, "loss": 1.6034, "step": 6 }, { "epoch": 2.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 1.7292718887329102, "eval_runtime": 0.0552, "eval_samples_per_second": 1286.613, "eval_steps_per_second": 18.121, "step": 6 }, { "epoch": 3.0, "grad_norm": 0.5773879885673523, "learning_rate": 0.0009000000000000001, "loss": 0.9483, "step": 9 }, { "epoch": 3.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.764909565448761, "eval_runtime": 0.0521, "eval_samples_per_second": 1364.026, "eval_steps_per_second": 19.212, "step": 9 }, { "epoch": 4.0, "grad_norm": 4.563255786895752, "learning_rate": 0.0008666666666666667, "loss": 0.7514, "step": 12 }, { "epoch": 4.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.7557218074798584, "eval_runtime": 0.0521, "eval_samples_per_second": 1362.466, "eval_steps_per_second": 19.19, "step": 12 }, { "epoch": 5.0, "grad_norm": 0.8208609223365784, "learning_rate": 0.0008333333333333334, "loss": 0.746, "step": 15 }, { "epoch": 5.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.8104642033576965, "eval_runtime": 0.052, "eval_samples_per_second": 1366.58, "eval_steps_per_second": 19.248, "step": 15 }, { "epoch": 6.0, "grad_norm": 3.4645087718963623, "learning_rate": 0.0008, "loss": 0.7896, "step": 18 }, { "epoch": 6.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7383360266685486, "eval_runtime": 0.0519, "eval_samples_per_second": 1366.75, "eval_steps_per_second": 19.25, "step": 18 }, { "epoch": 7.0, "grad_norm": 0.9648402333259583, "learning_rate": 0.0007666666666666667, "loss": 0.7573, "step": 21 }, { "epoch": 7.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.685299277305603, "eval_runtime": 0.053, "eval_samples_per_second": 1338.925, "eval_steps_per_second": 18.858, "step": 21 }, { "epoch": 8.0, "grad_norm": 0.858669102191925, "learning_rate": 0.0007333333333333333, "loss": 0.6951, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.8345620036125183, "eval_runtime": 0.0526, "eval_samples_per_second": 1349.873, "eval_steps_per_second": 19.012, "step": 24 }, { "epoch": 9.0, "grad_norm": 0.8825590014457703, "learning_rate": 0.0007, "loss": 0.746, "step": 27 }, { "epoch": 9.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6906359791755676, "eval_runtime": 0.0518, "eval_samples_per_second": 1371.445, "eval_steps_per_second": 19.316, "step": 27 }, { "epoch": 10.0, "grad_norm": 1.12335205078125, "learning_rate": 0.0006666666666666666, "loss": 0.6992, "step": 30 }, { "epoch": 10.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6848593354225159, "eval_runtime": 0.0528, "eval_samples_per_second": 1345.421, "eval_steps_per_second": 18.95, "step": 30 }, { "epoch": 11.0, "grad_norm": 0.29337310791015625, "learning_rate": 0.0006333333333333333, "loss": 0.6942, "step": 33 }, { "epoch": 11.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.700924277305603, "eval_runtime": 0.0523, "eval_samples_per_second": 1358.042, "eval_steps_per_second": 19.127, "step": 33 }, { "epoch": 12.0, "grad_norm": 1.3950302600860596, "learning_rate": 0.0006, "loss": 0.7, "step": 36 }, { "epoch": 12.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.6950923204421997, "eval_runtime": 0.0545, "eval_samples_per_second": 1303.189, "eval_steps_per_second": 18.355, "step": 36 }, { "epoch": 13.0, "grad_norm": 0.581301748752594, "learning_rate": 0.0005666666666666667, "loss": 0.6976, "step": 39 }, { "epoch": 13.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.685354471206665, "eval_runtime": 0.0529, "eval_samples_per_second": 1342.534, "eval_steps_per_second": 18.909, "step": 39 }, { "epoch": 14.0, "grad_norm": 0.8408365845680237, "learning_rate": 0.0005333333333333334, "loss": 0.6999, "step": 42 }, { "epoch": 14.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6901406645774841, "eval_runtime": 0.053, "eval_samples_per_second": 1340.63, "eval_steps_per_second": 18.882, "step": 42 }, { "epoch": 15.0, "grad_norm": 0.37376636266708374, "learning_rate": 0.0005, "loss": 0.6948, "step": 45 }, { "epoch": 15.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6926164627075195, "eval_runtime": 0.0535, "eval_samples_per_second": 1327.152, "eval_steps_per_second": 18.692, "step": 45 }, { "epoch": 15.0, "step": 45, "total_flos": 1253066401152000.0, "train_loss": 0.8229163063897027, "train_runtime": 48.1937, "train_samples_per_second": 395.28, "train_steps_per_second": 1.867 } ], "logging_steps": 1, "max_steps": 90, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1253066401152000.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }