|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "videomae-base-finetuned-bekhoaxe/checkpoint-231", |
|
"epoch": 3.25, |
|
"eval_steps": 500, |
|
"global_step": 308, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.9310173988342285, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 0.7032, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.229051113128662, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 0.5738, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.958233833312988, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 0.3415, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 7.560524940490723, |
|
"learning_rate": 4.837545126353791e-05, |
|
"loss": 0.4689, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.026527404785156, |
|
"learning_rate": 4.657039711191336e-05, |
|
"loss": 0.2069, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.358181893825531, |
|
"learning_rate": 4.4765342960288806e-05, |
|
"loss": 0.0961, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.05433480069041252, |
|
"learning_rate": 4.296028880866426e-05, |
|
"loss": 0.2172, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.9915254237288136, |
|
"eval_loss": 0.013951542787253857, |
|
"eval_runtime": 38.5113, |
|
"eval_samples_per_second": 3.064, |
|
"eval_steps_per_second": 0.779, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.3378506898880005, |
|
"learning_rate": 4.115523465703972e-05, |
|
"loss": 0.3363, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.04731794446706772, |
|
"learning_rate": 3.935018050541516e-05, |
|
"loss": 0.195, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.04140738397836685, |
|
"learning_rate": 3.754512635379062e-05, |
|
"loss": 0.0049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.08140890300273895, |
|
"learning_rate": 3.574007220216607e-05, |
|
"loss": 0.2396, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.09178142994642258, |
|
"learning_rate": 3.3935018050541516e-05, |
|
"loss": 0.0746, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.031925346702337265, |
|
"learning_rate": 3.212996389891697e-05, |
|
"loss": 0.3279, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.11948370933532715, |
|
"learning_rate": 3.032490974729242e-05, |
|
"loss": 0.0021, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.05676674470305443, |
|
"learning_rate": 2.851985559566787e-05, |
|
"loss": 0.1826, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.9830508474576272, |
|
"eval_loss": 0.042561955749988556, |
|
"eval_runtime": 38.1931, |
|
"eval_samples_per_second": 3.09, |
|
"eval_steps_per_second": 0.785, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.11792542040348053, |
|
"learning_rate": 2.6714801444043324e-05, |
|
"loss": 0.1442, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.10978111624717712, |
|
"learning_rate": 2.4909747292418774e-05, |
|
"loss": 0.0912, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.016077380627393723, |
|
"learning_rate": 2.3104693140794227e-05, |
|
"loss": 0.1236, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.01301741786301136, |
|
"learning_rate": 2.1299638989169676e-05, |
|
"loss": 0.1501, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.026157772168517113, |
|
"learning_rate": 1.9494584837545125e-05, |
|
"loss": 0.18, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.04890529438853264, |
|
"learning_rate": 1.768953068592058e-05, |
|
"loss": 0.0038, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.038881637156009674, |
|
"learning_rate": 1.588447653429603e-05, |
|
"loss": 0.0019, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.01524051371961832, |
|
"learning_rate": 1.407942238267148e-05, |
|
"loss": 0.1427, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0019937974866479635, |
|
"eval_runtime": 38.3352, |
|
"eval_samples_per_second": 3.078, |
|
"eval_steps_per_second": 0.783, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.01709669642150402, |
|
"learning_rate": 1.2274368231046932e-05, |
|
"loss": 0.0012, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.1900138109922409, |
|
"learning_rate": 1.0469314079422383e-05, |
|
"loss": 0.1073, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.06225905194878578, |
|
"learning_rate": 8.664259927797834e-06, |
|
"loss": 0.0029, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 0.015592777170240879, |
|
"learning_rate": 6.859205776173286e-06, |
|
"loss": 0.0009, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 0.016208168119192123, |
|
"learning_rate": 5.054151624548736e-06, |
|
"loss": 0.0036, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 0.007616001646965742, |
|
"learning_rate": 3.2490974729241876e-06, |
|
"loss": 0.0008, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.008425813168287277, |
|
"learning_rate": 1.4440433212996392e-06, |
|
"loss": 0.2853, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0012017178814858198, |
|
"eval_runtime": 38.094, |
|
"eval_samples_per_second": 3.098, |
|
"eval_steps_per_second": 0.788, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"step": 308, |
|
"total_flos": 1.5351515742291886e+18, |
|
"train_loss": 0.16999648642641577, |
|
"train_runtime": 584.5872, |
|
"train_samples_per_second": 2.107, |
|
"train_steps_per_second": 0.527 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.9761904761904762, |
|
"eval_loss": 0.09526590257883072, |
|
"eval_runtime": 122.5253, |
|
"eval_samples_per_second": 1.714, |
|
"eval_steps_per_second": 0.433, |
|
"step": 308 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 308, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.5351515742291886e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|