|
{ |
|
"best_metric": 1.2118161916732788, |
|
"best_model_checkpoint": "../experiments/attribute_minig_mslacerda/run-9/checkpoint-264", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 264, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.012012012012012012, |
|
"eval_f1": 0.0031978072179077205, |
|
"eval_loss": 4.749619007110596, |
|
"eval_precision": 0.002532561505065123, |
|
"eval_recall": 0.004337050805452293, |
|
"eval_runtime": 0.3642, |
|
"eval_samples_per_second": 799.034, |
|
"eval_steps_per_second": 101.595, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.03333333333333333, |
|
"eval_f1": 0.003970223325062034, |
|
"eval_loss": 4.679495334625244, |
|
"eval_precision": 0.0033112582781456954, |
|
"eval_recall": 0.004956629491945477, |
|
"eval_runtime": 0.8425, |
|
"eval_samples_per_second": 345.396, |
|
"eval_steps_per_second": 43.916, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.05795795795795796, |
|
"eval_f1": 0.0071546505228398454, |
|
"eval_loss": 4.5262579917907715, |
|
"eval_precision": 0.006435643564356435, |
|
"eval_recall": 0.0080545229244114, |
|
"eval_runtime": 0.361, |
|
"eval_samples_per_second": 806.114, |
|
"eval_steps_per_second": 102.496, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.12612612612612611, |
|
"eval_f1": 0.023522129371711546, |
|
"eval_loss": 4.221945762634277, |
|
"eval_precision": 0.02350030921459493, |
|
"eval_recall": 0.023543990086741014, |
|
"eval_runtime": 0.3669, |
|
"eval_samples_per_second": 793.061, |
|
"eval_steps_per_second": 100.836, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.23153153153153153, |
|
"eval_f1": 0.057900244157656086, |
|
"eval_loss": 3.788745880126953, |
|
"eval_precision": 0.06624102154828412, |
|
"eval_recall": 0.051425030978934326, |
|
"eval_runtime": 0.3736, |
|
"eval_samples_per_second": 778.936, |
|
"eval_steps_per_second": 99.04, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.345045045045045, |
|
"eval_f1": 0.11658518239939826, |
|
"eval_loss": 3.2482411861419678, |
|
"eval_precision": 0.14832535885167464, |
|
"eval_recall": 0.09603469640644362, |
|
"eval_runtime": 0.3627, |
|
"eval_samples_per_second": 802.223, |
|
"eval_steps_per_second": 102.001, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.44114114114114117, |
|
"eval_f1": 0.18552200800291016, |
|
"eval_loss": 2.7049763202667236, |
|
"eval_precision": 0.22466960352422907, |
|
"eval_recall": 0.1579925650557621, |
|
"eval_runtime": 0.4418, |
|
"eval_samples_per_second": 658.601, |
|
"eval_steps_per_second": 83.74, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5042042042042042, |
|
"eval_f1": 0.23968705547652916, |
|
"eval_loss": 2.2730276584625244, |
|
"eval_precision": 0.28130217028380633, |
|
"eval_recall": 0.20879801734820322, |
|
"eval_runtime": 0.3655, |
|
"eval_samples_per_second": 796.135, |
|
"eval_steps_per_second": 101.227, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5591591591591591, |
|
"eval_f1": 0.3004824259131633, |
|
"eval_loss": 1.9435880184173584, |
|
"eval_precision": 0.3385093167701863, |
|
"eval_recall": 0.2701363073110285, |
|
"eval_runtime": 0.3718, |
|
"eval_samples_per_second": 782.782, |
|
"eval_steps_per_second": 99.529, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6198198198198198, |
|
"eval_f1": 0.37284768211920527, |
|
"eval_loss": 1.6517927646636963, |
|
"eval_precision": 0.40042674253200566, |
|
"eval_recall": 0.34882280049566294, |
|
"eval_runtime": 0.3649, |
|
"eval_samples_per_second": 797.37, |
|
"eval_steps_per_second": 101.384, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6642642642642642, |
|
"eval_f1": 0.44087782509007534, |
|
"eval_loss": 1.4077345132827759, |
|
"eval_precision": 0.467685892981237, |
|
"eval_recall": 0.41697645600991323, |
|
"eval_runtime": 0.3583, |
|
"eval_samples_per_second": 812.191, |
|
"eval_steps_per_second": 103.268, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7249249249249249, |
|
"eval_f1": 0.5125080179602309, |
|
"eval_loss": 1.2118161916732788, |
|
"eval_precision": 0.53125, |
|
"eval_recall": 0.4950433705080545, |
|
"eval_runtime": 0.3636, |
|
"eval_samples_per_second": 800.329, |
|
"eval_steps_per_second": 101.76, |
|
"step": 264 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 264, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.901404269366272e-05, |
|
"num_train_epochs": 12, |
|
"per_device_train_batch_size": 64, |
|
"warmup_steps": 368, |
|
"weight_decay": 0.16924253041207893 |
|
} |
|
} |
|
|