|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 10560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.8086087107658386, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.4434, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9455927655768658, |
|
"eval_f1": 0.7605633802816901, |
|
"eval_loss": 0.16302905976772308, |
|
"eval_precision": 0.679945054945055, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.5797, |
|
"eval_samples_per_second": 204.163, |
|
"eval_steps_per_second": 3.275, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.7114465832710266, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1462, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9566729603497963, |
|
"eval_f1": 0.8073836276083467, |
|
"eval_loss": 0.12938551604747772, |
|
"eval_precision": 0.7481408031730292, |
|
"eval_recall": 0.8768158047646717, |
|
"eval_runtime": 4.6537, |
|
"eval_samples_per_second": 200.915, |
|
"eval_steps_per_second": 3.223, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.316323161125183, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1183, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9568717082381, |
|
"eval_f1": 0.8116639914392724, |
|
"eval_loss": 0.13784636557102203, |
|
"eval_precision": 0.7521070897372335, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.903, |
|
"eval_samples_per_second": 190.701, |
|
"eval_steps_per_second": 3.059, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.4418916702270508, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1012, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9596541786743515, |
|
"eval_f1": 0.8231144872490505, |
|
"eval_loss": 0.1358918398618698, |
|
"eval_precision": 0.7720101781170483, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.7436, |
|
"eval_samples_per_second": 197.109, |
|
"eval_steps_per_second": 3.162, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.0950379371643066, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0884, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9622379012222995, |
|
"eval_f1": 0.8348926802421575, |
|
"eval_loss": 0.12661471962928772, |
|
"eval_precision": 0.7929952953476216, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.6673, |
|
"eval_samples_per_second": 200.331, |
|
"eval_steps_per_second": 3.214, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.270456314086914, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0793, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9610454138924774, |
|
"eval_f1": 0.8404432132963989, |
|
"eval_loss": 0.1408655196428299, |
|
"eval_precision": 0.803070407623081, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.6946, |
|
"eval_samples_per_second": 199.164, |
|
"eval_steps_per_second": 3.195, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.6711246967315674, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.072, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9588591871211368, |
|
"eval_f1": 0.8222222222222223, |
|
"eval_loss": 0.1545909196138382, |
|
"eval_precision": 0.7704418486541391, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.6637, |
|
"eval_samples_per_second": 200.485, |
|
"eval_steps_per_second": 3.216, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5584707260131836, |
|
"learning_rate": 3e-05, |
|
"loss": 0.067, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9608466660041737, |
|
"eval_f1": 0.8334258745141587, |
|
"eval_loss": 0.14326535165309906, |
|
"eval_precision": 0.797979797979798, |
|
"eval_recall": 0.8721673445671121, |
|
"eval_runtime": 4.6647, |
|
"eval_samples_per_second": 200.444, |
|
"eval_steps_per_second": 3.216, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.5288811326026917, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0607, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9598529265626553, |
|
"eval_f1": 0.8312328767123287, |
|
"eval_loss": 0.1468406319618225, |
|
"eval_precision": 0.7864178330741317, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.6583, |
|
"eval_samples_per_second": 200.719, |
|
"eval_steps_per_second": 3.22, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.041274070739746, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0562, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9612441617807811, |
|
"eval_f1": 0.8267029972752045, |
|
"eval_loss": 0.14973483979701996, |
|
"eval_precision": 0.7783478707029245, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.6532, |
|
"eval_samples_per_second": 200.937, |
|
"eval_steps_per_second": 3.224, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.7796343564987183, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0506, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9594554307860479, |
|
"eval_f1": 0.8332413031474324, |
|
"eval_loss": 0.1600087583065033, |
|
"eval_precision": 0.793792740662809, |
|
"eval_recall": 0.8768158047646717, |
|
"eval_runtime": 4.6756, |
|
"eval_samples_per_second": 199.975, |
|
"eval_steps_per_second": 3.208, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.6123548746109009, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0483, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9608466660041737, |
|
"eval_f1": 0.8328721638074155, |
|
"eval_loss": 0.15964019298553467, |
|
"eval_precision": 0.7950343370311674, |
|
"eval_recall": 0.8744915746658919, |
|
"eval_runtime": 4.6719, |
|
"eval_samples_per_second": 200.134, |
|
"eval_steps_per_second": 3.211, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.9525193572044373, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0443, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.96064791811587, |
|
"eval_f1": 0.8237547892720306, |
|
"eval_loss": 0.15955425798892975, |
|
"eval_precision": 0.7785825142265907, |
|
"eval_recall": 0.8744915746658919, |
|
"eval_runtime": 4.6803, |
|
"eval_samples_per_second": 199.774, |
|
"eval_steps_per_second": 3.205, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.5356388688087463, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0421, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9612441617807811, |
|
"eval_f1": 0.8350857775318208, |
|
"eval_loss": 0.16503094136714935, |
|
"eval_precision": 0.7971473851030111, |
|
"eval_recall": 0.8768158047646717, |
|
"eval_runtime": 4.671, |
|
"eval_samples_per_second": 200.172, |
|
"eval_steps_per_second": 3.211, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.6785407662391663, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0395, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9602504223392626, |
|
"eval_f1": 0.8284449363586054, |
|
"eval_loss": 0.16934077441692352, |
|
"eval_precision": 0.7908082408874801, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.6607, |
|
"eval_samples_per_second": 200.612, |
|
"eval_steps_per_second": 3.218, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.843337893486023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0375, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9594554307860479, |
|
"eval_f1": 0.8336088154269972, |
|
"eval_loss": 0.17250221967697144, |
|
"eval_precision": 0.7925615505500262, |
|
"eval_recall": 0.8791400348634515, |
|
"eval_runtime": 4.6872, |
|
"eval_samples_per_second": 199.478, |
|
"eval_steps_per_second": 3.2, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.6304071545600891, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0358, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9612441617807811, |
|
"eval_f1": 0.8321289605336298, |
|
"eval_loss": 0.17892615497112274, |
|
"eval_precision": 0.7975492807671817, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.6497, |
|
"eval_samples_per_second": 201.086, |
|
"eval_steps_per_second": 3.226, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.1330559253692627, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0339, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.960051674450959, |
|
"eval_f1": 0.8225895316804408, |
|
"eval_loss": 0.17817425727844238, |
|
"eval_precision": 0.7820848611838659, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.6927, |
|
"eval_samples_per_second": 199.247, |
|
"eval_steps_per_second": 3.196, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.08901867270469666, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0327, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9620391533339958, |
|
"eval_f1": 0.833983286908078, |
|
"eval_loss": 0.17433172464370728, |
|
"eval_precision": 0.8009630818619583, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.6614, |
|
"eval_samples_per_second": 200.583, |
|
"eval_steps_per_second": 3.218, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.3878382444381714, |
|
"learning_rate": 0.0, |
|
"loss": 0.0327, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9608466660041737, |
|
"eval_f1": 0.8321289605336298, |
|
"eval_loss": 0.17778323590755463, |
|
"eval_precision": 0.7975492807671817, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.9618, |
|
"eval_samples_per_second": 188.439, |
|
"eval_steps_per_second": 3.023, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10560, |
|
"total_flos": 5062583230111038.0, |
|
"train_loss": 0.08150525255636736, |
|
"train_runtime": 1343.1683, |
|
"train_samples_per_second": 125.628, |
|
"train_steps_per_second": 7.862 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 5062583230111038.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|