|
{ |
|
"best_metric": 0.1553138643503189, |
|
"best_model_checkpoint": "speech-multiclassifier-run-2/checkpoint-6384", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 9576, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15664160401002505, |
|
"grad_norm": 11.20281982421875, |
|
"learning_rate": 1.8955722639933167e-05, |
|
"loss": 0.1599, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3132832080200501, |
|
"grad_norm": 16.948055267333984, |
|
"learning_rate": 1.7911445279866332e-05, |
|
"loss": 0.2132, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4699248120300752, |
|
"grad_norm": 15.359735488891602, |
|
"learning_rate": 1.68671679197995e-05, |
|
"loss": 0.2101, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6265664160401002, |
|
"grad_norm": 0.13909225165843964, |
|
"learning_rate": 1.5822890559732666e-05, |
|
"loss": 0.1868, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7832080200501254, |
|
"grad_norm": 1.7620573043823242, |
|
"learning_rate": 1.4778613199665832e-05, |
|
"loss": 0.1824, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9398496240601504, |
|
"grad_norm": 2.3290231227874756, |
|
"learning_rate": 1.3734335839598997e-05, |
|
"loss": 0.1678, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.15620405972003937, |
|
"eval_overall-accuracy": 0.9531360112755461, |
|
"eval_overall-f1": 0.9531360112755461, |
|
"eval_overall-precision": 0.9531360112755461, |
|
"eval_overall-recall": 0.9531360112755461, |
|
"eval_runtime": 5.7139, |
|
"eval_samples_per_second": 496.687, |
|
"eval_steps_per_second": 31.152, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 1.0964912280701755, |
|
"grad_norm": 0.4430443048477173, |
|
"learning_rate": 1.2690058479532166e-05, |
|
"loss": 0.1655, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2531328320802004, |
|
"grad_norm": 5.565830707550049, |
|
"learning_rate": 1.1645781119465331e-05, |
|
"loss": 0.1499, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4097744360902256, |
|
"grad_norm": 0.9018378257751465, |
|
"learning_rate": 1.0601503759398497e-05, |
|
"loss": 0.1443, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5664160401002505, |
|
"grad_norm": 0.3591144382953644, |
|
"learning_rate": 9.557226399331662e-06, |
|
"loss": 0.1399, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7230576441102756, |
|
"grad_norm": 0.4499484598636627, |
|
"learning_rate": 8.512949039264829e-06, |
|
"loss": 0.1559, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.8796992481203008, |
|
"grad_norm": 8.649208068847656, |
|
"learning_rate": 7.468671679197995e-06, |
|
"loss": 0.1522, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.1553138643503189, |
|
"eval_overall-accuracy": 0.9531360112755461, |
|
"eval_overall-f1": 0.9531360112755461, |
|
"eval_overall-precision": 0.9531360112755461, |
|
"eval_overall-recall": 0.9531360112755461, |
|
"eval_runtime": 5.7234, |
|
"eval_samples_per_second": 495.858, |
|
"eval_steps_per_second": 31.1, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 2.036340852130326, |
|
"grad_norm": 1.2995306253433228, |
|
"learning_rate": 6.424394319131162e-06, |
|
"loss": 0.1398, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.192982456140351, |
|
"grad_norm": 0.18944260478019714, |
|
"learning_rate": 5.380116959064328e-06, |
|
"loss": 0.1164, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3496240601503757, |
|
"grad_norm": 4.408690929412842, |
|
"learning_rate": 4.335839598997494e-06, |
|
"loss": 0.1154, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.506265664160401, |
|
"grad_norm": 0.7239732146263123, |
|
"learning_rate": 3.29156223893066e-06, |
|
"loss": 0.1208, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.662907268170426, |
|
"grad_norm": 6.512196063995361, |
|
"learning_rate": 2.2472848788638263e-06, |
|
"loss": 0.1115, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.819548872180451, |
|
"grad_norm": 1.97010338306427, |
|
"learning_rate": 1.2030075187969925e-06, |
|
"loss": 0.1169, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.9761904761904763, |
|
"grad_norm": 1.1876590251922607, |
|
"learning_rate": 1.5873015873015874e-07, |
|
"loss": 0.1157, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.16947361826896667, |
|
"eval_overall-accuracy": 0.9534883720930233, |
|
"eval_overall-f1": 0.9534883720930233, |
|
"eval_overall-precision": 0.9534883720930233, |
|
"eval_overall-recall": 0.9534883720930233, |
|
"eval_runtime": 5.6944, |
|
"eval_samples_per_second": 498.388, |
|
"eval_steps_per_second": 31.259, |
|
"step": 9576 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9576, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2641161396896352.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|