|
{ |
|
"best_metric": 0.7929836748871136, |
|
"best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-992", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 1364, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7155262243834665, |
|
"eval_loss": 0.9702894687652588, |
|
"eval_macro-f1": 0.3565778389617827, |
|
"eval_micro-f1": 0.7155262243834665, |
|
"eval_precision-macro": 0.5485369419220343, |
|
"eval_precision-micro": 0.7155262243834665, |
|
"eval_recall-macro": 0.34472385514524134, |
|
"eval_recall-micro": 0.7155262243834665, |
|
"eval_runtime": 3.534, |
|
"eval_samples_per_second": 8.489, |
|
"eval_steps_per_second": 4.244, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7353247655435915, |
|
"eval_loss": 0.8005266189575195, |
|
"eval_macro-f1": 0.5080224547196512, |
|
"eval_micro-f1": 0.7353247655435915, |
|
"eval_precision-macro": 0.5180988726198199, |
|
"eval_precision-micro": 0.7353247655435915, |
|
"eval_recall-macro": 0.5222058583864981, |
|
"eval_recall-micro": 0.7353247655435915, |
|
"eval_runtime": 3.5582, |
|
"eval_samples_per_second": 8.431, |
|
"eval_steps_per_second": 4.216, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7453977075373394, |
|
"eval_loss": 0.815595269203186, |
|
"eval_macro-f1": 0.5288442820595285, |
|
"eval_micro-f1": 0.7453977075373394, |
|
"eval_precision-macro": 0.5625997012224085, |
|
"eval_precision-micro": 0.7453977075373394, |
|
"eval_recall-macro": 0.532191762651922, |
|
"eval_recall-micro": 0.7453977075373394, |
|
"eval_runtime": 3.3982, |
|
"eval_samples_per_second": 8.828, |
|
"eval_steps_per_second": 4.414, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7704063911080237, |
|
"eval_loss": 0.7056049108505249, |
|
"eval_macro-f1": 0.5179788731015686, |
|
"eval_micro-f1": 0.7704063911080237, |
|
"eval_precision-macro": 0.5880911415103544, |
|
"eval_precision-micro": 0.7704063911080237, |
|
"eval_recall-macro": 0.5197063822818007, |
|
"eval_recall-micro": 0.7704063911080237, |
|
"eval_runtime": 3.325, |
|
"eval_samples_per_second": 9.023, |
|
"eval_steps_per_second": 4.511, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 4.032258064516129, |
|
"grad_norm": 7.048013210296631, |
|
"learning_rate": 2.398790322580645e-05, |
|
"loss": 1.0549, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7641542202153525, |
|
"eval_loss": 0.7525667548179626, |
|
"eval_macro-f1": 0.5774815867474451, |
|
"eval_micro-f1": 0.7641542202153525, |
|
"eval_precision-macro": 0.5877505487951785, |
|
"eval_precision-micro": 0.7641542202153525, |
|
"eval_recall-macro": 0.5905806919233985, |
|
"eval_recall-micro": 0.7641542202153525, |
|
"eval_runtime": 3.7943, |
|
"eval_samples_per_second": 7.907, |
|
"eval_steps_per_second": 3.953, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7811740187565127, |
|
"eval_loss": 0.7093824148178101, |
|
"eval_macro-f1": 0.564925492252011, |
|
"eval_micro-f1": 0.7811740187565127, |
|
"eval_precision-macro": 0.6335954751289583, |
|
"eval_precision-micro": 0.7811740187565127, |
|
"eval_recall-macro": 0.5394598039562246, |
|
"eval_recall-micro": 0.7811740187565127, |
|
"eval_runtime": 3.6662, |
|
"eval_samples_per_second": 8.183, |
|
"eval_steps_per_second": 4.091, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.780826675929142, |
|
"eval_loss": 0.7391286492347717, |
|
"eval_macro-f1": 0.5535439959165813, |
|
"eval_micro-f1": 0.780826675929142, |
|
"eval_precision-macro": 0.6475047138793736, |
|
"eval_precision-micro": 0.780826675929142, |
|
"eval_recall-macro": 0.5338983669485645, |
|
"eval_recall-micro": 0.780826675929142, |
|
"eval_runtime": 3.3578, |
|
"eval_samples_per_second": 8.934, |
|
"eval_steps_per_second": 4.467, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7929836748871136, |
|
"eval_loss": 0.7354127168655396, |
|
"eval_macro-f1": 0.5881256788610278, |
|
"eval_micro-f1": 0.7929836748871136, |
|
"eval_precision-macro": 0.616862114635611, |
|
"eval_precision-micro": 0.7929836748871136, |
|
"eval_recall-macro": 0.5756480546409108, |
|
"eval_recall-micro": 0.7929836748871136, |
|
"eval_runtime": 3.5028, |
|
"eval_samples_per_second": 8.565, |
|
"eval_steps_per_second": 4.282, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 8.064516129032258, |
|
"grad_norm": 4.143438339233398, |
|
"learning_rate": 1.7951612903225806e-05, |
|
"loss": 0.545, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7804793331017714, |
|
"eval_loss": 0.8143337965011597, |
|
"eval_macro-f1": 0.5927580984411855, |
|
"eval_micro-f1": 0.7804793331017714, |
|
"eval_precision-macro": 0.5950513529543718, |
|
"eval_precision-micro": 0.7804793331017714, |
|
"eval_recall-macro": 0.5963301506624595, |
|
"eval_recall-micro": 0.7804793331017714, |
|
"eval_runtime": 3.3219, |
|
"eval_samples_per_second": 9.031, |
|
"eval_steps_per_second": 4.515, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7794373046196597, |
|
"eval_loss": 0.8351579904556274, |
|
"eval_macro-f1": 0.5917556551043053, |
|
"eval_micro-f1": 0.7794373046196597, |
|
"eval_precision-macro": 0.602885463862158, |
|
"eval_precision-micro": 0.7794373046196597, |
|
"eval_recall-macro": 0.5915247045666512, |
|
"eval_recall-micro": 0.7794373046196597, |
|
"eval_runtime": 3.5978, |
|
"eval_samples_per_second": 8.339, |
|
"eval_steps_per_second": 4.169, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7870788468218132, |
|
"eval_loss": 0.8609802722930908, |
|
"eval_macro-f1": 0.5742187338887501, |
|
"eval_micro-f1": 0.7870788468218132, |
|
"eval_precision-macro": 0.60147377967397, |
|
"eval_precision-micro": 0.7870788468218132, |
|
"eval_recall-macro": 0.5642213023272796, |
|
"eval_recall-micro": 0.7870788468218132, |
|
"eval_runtime": 24.3263, |
|
"eval_samples_per_second": 1.233, |
|
"eval_steps_per_second": 0.617, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 1364, |
|
"total_flos": 5.301416432939827e+16, |
|
"train_loss": 0.6726446291568342, |
|
"train_runtime": 895.6751, |
|
"train_samples_per_second": 5.515, |
|
"train_steps_per_second": 2.769 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.301416432939827e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|