alexgrigore's picture
End of training
ad35605 verified
raw
history blame contribute delete
No virus
6.91 kB
{
"best_metric": 0.7875,
"best_model_checkpoint": "videomae-base-finetuned-gesturePhasev2/checkpoint-95",
"epoch": 4.125,
"eval_steps": 500,
"global_step": 237,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.026595744680851064,
"grad_norm": 9.288494110107422,
"learning_rate": 6.578947368421053e-07,
"loss": 1.4443,
"step": 10
},
{
"epoch": 0.05319148936170213,
"grad_norm": 10.334871292114258,
"learning_rate": 1.3157894736842106e-06,
"loss": 1.418,
"step": 20
},
{
"epoch": 0.0797872340425532,
"grad_norm": 7.731433391571045,
"learning_rate": 1.973684210526316e-06,
"loss": 1.3362,
"step": 30
},
{
"epoch": 0.10638297872340426,
"grad_norm": 10.242834091186523,
"learning_rate": 2.631578947368421e-06,
"loss": 1.1665,
"step": 40
},
{
"epoch": 0.125,
"eval_accuracy": 0.775,
"eval_loss": 1.0642327070236206,
"eval_runtime": 15.1556,
"eval_samples_per_second": 10.557,
"eval_steps_per_second": 1.32,
"step": 47
},
{
"epoch": 1.0066489361702127,
"grad_norm": 5.333619594573975,
"learning_rate": 3.289473684210527e-06,
"loss": 1.0034,
"step": 50
},
{
"epoch": 1.0332446808510638,
"grad_norm": 4.666859149932861,
"learning_rate": 3.947368421052632e-06,
"loss": 0.8562,
"step": 60
},
{
"epoch": 1.059840425531915,
"grad_norm": 4.381842136383057,
"learning_rate": 4.605263157894737e-06,
"loss": 0.7196,
"step": 70
},
{
"epoch": 1.086436170212766,
"grad_norm": 4.5117340087890625,
"learning_rate": 4.997807075247147e-06,
"loss": 0.7522,
"step": 80
},
{
"epoch": 1.113031914893617,
"grad_norm": 3.7782816886901855,
"learning_rate": 4.973180832407471e-06,
"loss": 0.7316,
"step": 90
},
{
"epoch": 1.1263297872340425,
"eval_accuracy": 0.7875,
"eval_loss": 0.7825992703437805,
"eval_runtime": 11.6767,
"eval_samples_per_second": 13.703,
"eval_steps_per_second": 1.713,
"step": 95
},
{
"epoch": 2.0132978723404253,
"grad_norm": 3.1058461666107178,
"learning_rate": 4.921457902821578e-06,
"loss": 0.785,
"step": 100
},
{
"epoch": 2.0398936170212765,
"grad_norm": 3.0555734634399414,
"learning_rate": 4.84320497372973e-06,
"loss": 0.7795,
"step": 110
},
{
"epoch": 2.0664893617021276,
"grad_norm": 3.8933866024017334,
"learning_rate": 4.7392794005985324e-06,
"loss": 0.774,
"step": 120
},
{
"epoch": 2.0930851063829787,
"grad_norm": 4.3550591468811035,
"learning_rate": 4.610819813755038e-06,
"loss": 0.7218,
"step": 130
},
{
"epoch": 2.11968085106383,
"grad_norm": 4.537750720977783,
"learning_rate": 4.4592336433146e-06,
"loss": 0.7259,
"step": 140
},
{
"epoch": 2.125,
"eval_accuracy": 0.7875,
"eval_loss": 0.8042387962341309,
"eval_runtime": 10.5863,
"eval_samples_per_second": 15.114,
"eval_steps_per_second": 1.889,
"step": 142
},
{
"epoch": 3.0199468085106385,
"grad_norm": 3.3319995403289795,
"learning_rate": 4.286181699082008e-06,
"loss": 0.6222,
"step": 150
},
{
"epoch": 3.046542553191489,
"grad_norm": 4.209911823272705,
"learning_rate": 4.093559974371725e-06,
"loss": 0.7533,
"step": 160
},
{
"epoch": 3.0731382978723403,
"grad_norm": 2.9892210960388184,
"learning_rate": 3.88347887310836e-06,
"loss": 0.8408,
"step": 170
},
{
"epoch": 3.0997340425531914,
"grad_norm": 4.7717814445495605,
"learning_rate": 3.658240087799655e-06,
"loss": 0.7287,
"step": 180
},
{
"epoch": 3.1263297872340425,
"grad_norm": 5.496540546417236,
"learning_rate": 3.4203113817116955e-06,
"loss": 0.6643,
"step": 190
},
{
"epoch": 3.1263297872340425,
"eval_accuracy": 0.7875,
"eval_loss": 0.8022773861885071,
"eval_runtime": 10.4825,
"eval_samples_per_second": 15.264,
"eval_steps_per_second": 1.908,
"step": 190
},
{
"epoch": 4.026595744680851,
"grad_norm": 3.0813848972320557,
"learning_rate": 3.1722995515381644e-06,
"loss": 0.6386,
"step": 200
},
{
"epoch": 4.053191489361702,
"grad_norm": 3.0726428031921387,
"learning_rate": 2.9169218667902562e-06,
"loss": 0.6708,
"step": 210
},
{
"epoch": 4.079787234042553,
"grad_norm": 5.47495174407959,
"learning_rate": 2.6569762988232838e-06,
"loss": 0.8216,
"step": 220
},
{
"epoch": 4.1063829787234045,
"grad_norm": 3.801624059677124,
"learning_rate": 2.3953108656770018e-06,
"loss": 0.761,
"step": 230
},
{
"epoch": 4.125,
"eval_accuracy": 0.7875,
"eval_loss": 0.8077355623245239,
"eval_runtime": 16.5222,
"eval_samples_per_second": 9.684,
"eval_steps_per_second": 1.21,
"step": 237
},
{
"epoch": 4.125,
"step": 237,
"total_flos": 4.691516857081528e+18,
"train_loss": 0.8600665486814604,
"train_runtime": 867.0351,
"train_samples_per_second": 6.939,
"train_steps_per_second": 0.434
},
{
"epoch": 4.125,
"eval_accuracy": 0.7633136094674556,
"eval_loss": 0.8172227144241333,
"eval_runtime": 60.5871,
"eval_samples_per_second": 2.789,
"eval_steps_per_second": 0.363,
"step": 237
},
{
"epoch": 4.125,
"eval_accuracy": 0.7633136094674556,
"eval_loss": 0.8172227144241333,
"eval_runtime": 11.0103,
"eval_samples_per_second": 15.349,
"eval_steps_per_second": 1.998,
"step": 237
}
],
"logging_steps": 10,
"max_steps": 376,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.691516857081528e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}