|
{ |
|
"best_metric": 0.6863997578620911, |
|
"best_model_checkpoint": "bert_base_lda_wnli/checkpoint-39", |
|
"epoch": 18.0, |
|
"eval_steps": 500, |
|
"global_step": 54, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 18.621891021728516, |
|
"learning_rate": 0.00098, |
|
"loss": 1.1638, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 1.4607518911361694, |
|
"eval_runtime": 0.056, |
|
"eval_samples_per_second": 1268.435, |
|
"eval_steps_per_second": 17.865, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.636824131011963, |
|
"learning_rate": 0.00096, |
|
"loss": 1.0198, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 1.109704852104187, |
|
"eval_runtime": 0.0609, |
|
"eval_samples_per_second": 1164.911, |
|
"eval_steps_per_second": 16.407, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.0930373668670654, |
|
"learning_rate": 0.00094, |
|
"loss": 1.1474, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.8995377421379089, |
|
"eval_runtime": 0.0517, |
|
"eval_samples_per_second": 1372.362, |
|
"eval_steps_per_second": 19.329, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.016080617904663, |
|
"learning_rate": 0.00092, |
|
"loss": 0.8846, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.820120096206665, |
|
"eval_runtime": 0.0532, |
|
"eval_samples_per_second": 1335.808, |
|
"eval_steps_per_second": 18.814, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.1296448707580566, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 0.7886, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.6994389295578003, |
|
"eval_runtime": 0.0527, |
|
"eval_samples_per_second": 1348.431, |
|
"eval_steps_per_second": 18.992, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.4899051189422607, |
|
"learning_rate": 0.00088, |
|
"loss": 0.738, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.708736777305603, |
|
"eval_runtime": 0.0518, |
|
"eval_samples_per_second": 1369.427, |
|
"eval_steps_per_second": 19.288, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7382462024688721, |
|
"learning_rate": 0.00086, |
|
"loss": 0.7195, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.7214460372924805, |
|
"eval_runtime": 0.0517, |
|
"eval_samples_per_second": 1372.273, |
|
"eval_steps_per_second": 19.328, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.9765424132347107, |
|
"learning_rate": 0.00084, |
|
"loss": 0.7036, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.693111777305603, |
|
"eval_runtime": 0.0536, |
|
"eval_samples_per_second": 1325.279, |
|
"eval_steps_per_second": 18.666, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.2757832407951355, |
|
"learning_rate": 0.00082, |
|
"loss": 0.6935, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.689645528793335, |
|
"eval_runtime": 0.0517, |
|
"eval_samples_per_second": 1372.33, |
|
"eval_steps_per_second": 19.329, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.41503846645355225, |
|
"learning_rate": 0.0008, |
|
"loss": 0.6941, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6926164627075195, |
|
"eval_runtime": 0.0542, |
|
"eval_samples_per_second": 1310.789, |
|
"eval_steps_per_second": 18.462, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.1713160276412964, |
|
"learning_rate": 0.0007800000000000001, |
|
"loss": 0.6949, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.693606972694397, |
|
"eval_runtime": 0.0536, |
|
"eval_samples_per_second": 1325.279, |
|
"eval_steps_per_second": 18.666, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.817528247833252, |
|
"learning_rate": 0.00076, |
|
"loss": 0.6959, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6911312341690063, |
|
"eval_runtime": 0.0527, |
|
"eval_samples_per_second": 1346.784, |
|
"eval_steps_per_second": 18.969, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.3534504175186157, |
|
"learning_rate": 0.00074, |
|
"loss": 0.6927, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6863997578620911, |
|
"eval_runtime": 0.0533, |
|
"eval_samples_per_second": 1330.984, |
|
"eval_steps_per_second": 18.746, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.8052321076393127, |
|
"learning_rate": 0.0007199999999999999, |
|
"loss": 0.6928, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6893155574798584, |
|
"eval_runtime": 0.0524, |
|
"eval_samples_per_second": 1353.85, |
|
"eval_steps_per_second": 19.068, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.29218751192092896, |
|
"learning_rate": 0.0007, |
|
"loss": 0.6958, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.689645528793335, |
|
"eval_runtime": 0.0518, |
|
"eval_samples_per_second": 1371.091, |
|
"eval_steps_per_second": 19.311, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.19813981652259827, |
|
"learning_rate": 0.00068, |
|
"loss": 0.6936, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6911312341690063, |
|
"eval_runtime": 0.0523, |
|
"eval_samples_per_second": 1358.408, |
|
"eval_steps_per_second": 19.133, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.6377934217453003, |
|
"learning_rate": 0.00066, |
|
"loss": 0.6955, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6911312341690063, |
|
"eval_runtime": 0.0549, |
|
"eval_samples_per_second": 1292.678, |
|
"eval_steps_per_second": 18.207, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.3209940493106842, |
|
"learning_rate": 0.00064, |
|
"loss": 0.6939, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6906359791755676, |
|
"eval_runtime": 0.1325, |
|
"eval_samples_per_second": 535.893, |
|
"eval_steps_per_second": 7.548, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"step": 54, |
|
"total_flos": 1503679681382400.0, |
|
"train_loss": 0.7837636250036734, |
|
"train_runtime": 61.4069, |
|
"train_samples_per_second": 517.043, |
|
"train_steps_per_second": 2.443 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1503679681382400.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|