videomae-base-finetuned-bekhoaxe / trainer_state.json
ninhnguyendx779's picture
End of training
8ebab4f verified
raw
history blame contribute delete
No virus
6.77 kB
{
"best_metric": 1.0,
"best_model_checkpoint": "videomae-base-finetuned-bekhoaxe/checkpoint-231",
"epoch": 3.25,
"eval_steps": 500,
"global_step": 308,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 4.9310173988342285,
"learning_rate": 1.6129032258064517e-05,
"loss": 0.7032,
"step": 10
},
{
"epoch": 0.06,
"grad_norm": 6.229051113128662,
"learning_rate": 3.2258064516129034e-05,
"loss": 0.5738,
"step": 20
},
{
"epoch": 0.1,
"grad_norm": 9.958233833312988,
"learning_rate": 4.8387096774193554e-05,
"loss": 0.3415,
"step": 30
},
{
"epoch": 0.13,
"grad_norm": 7.560524940490723,
"learning_rate": 4.837545126353791e-05,
"loss": 0.4689,
"step": 40
},
{
"epoch": 0.16,
"grad_norm": 5.026527404785156,
"learning_rate": 4.657039711191336e-05,
"loss": 0.2069,
"step": 50
},
{
"epoch": 0.19,
"grad_norm": 0.358181893825531,
"learning_rate": 4.4765342960288806e-05,
"loss": 0.0961,
"step": 60
},
{
"epoch": 0.23,
"grad_norm": 0.05433480069041252,
"learning_rate": 4.296028880866426e-05,
"loss": 0.2172,
"step": 70
},
{
"epoch": 0.25,
"eval_accuracy": 0.9915254237288136,
"eval_loss": 0.013951542787253857,
"eval_runtime": 38.5113,
"eval_samples_per_second": 3.064,
"eval_steps_per_second": 0.779,
"step": 77
},
{
"epoch": 1.01,
"grad_norm": 1.3378506898880005,
"learning_rate": 4.115523465703972e-05,
"loss": 0.3363,
"step": 80
},
{
"epoch": 1.04,
"grad_norm": 0.04731794446706772,
"learning_rate": 3.935018050541516e-05,
"loss": 0.195,
"step": 90
},
{
"epoch": 1.07,
"grad_norm": 0.04140738397836685,
"learning_rate": 3.754512635379062e-05,
"loss": 0.0049,
"step": 100
},
{
"epoch": 1.11,
"grad_norm": 0.08140890300273895,
"learning_rate": 3.574007220216607e-05,
"loss": 0.2396,
"step": 110
},
{
"epoch": 1.14,
"grad_norm": 0.09178142994642258,
"learning_rate": 3.3935018050541516e-05,
"loss": 0.0746,
"step": 120
},
{
"epoch": 1.17,
"grad_norm": 0.031925346702337265,
"learning_rate": 3.212996389891697e-05,
"loss": 0.3279,
"step": 130
},
{
"epoch": 1.2,
"grad_norm": 0.11948370933532715,
"learning_rate": 3.032490974729242e-05,
"loss": 0.0021,
"step": 140
},
{
"epoch": 1.24,
"grad_norm": 0.05676674470305443,
"learning_rate": 2.851985559566787e-05,
"loss": 0.1826,
"step": 150
},
{
"epoch": 1.25,
"eval_accuracy": 0.9830508474576272,
"eval_loss": 0.042561955749988556,
"eval_runtime": 38.1931,
"eval_samples_per_second": 3.09,
"eval_steps_per_second": 0.785,
"step": 154
},
{
"epoch": 2.02,
"grad_norm": 0.11792542040348053,
"learning_rate": 2.6714801444043324e-05,
"loss": 0.1442,
"step": 160
},
{
"epoch": 2.05,
"grad_norm": 0.10978111624717712,
"learning_rate": 2.4909747292418774e-05,
"loss": 0.0912,
"step": 170
},
{
"epoch": 2.08,
"grad_norm": 0.016077380627393723,
"learning_rate": 2.3104693140794227e-05,
"loss": 0.1236,
"step": 180
},
{
"epoch": 2.12,
"grad_norm": 0.01301741786301136,
"learning_rate": 2.1299638989169676e-05,
"loss": 0.1501,
"step": 190
},
{
"epoch": 2.15,
"grad_norm": 0.026157772168517113,
"learning_rate": 1.9494584837545125e-05,
"loss": 0.18,
"step": 200
},
{
"epoch": 2.18,
"grad_norm": 0.04890529438853264,
"learning_rate": 1.768953068592058e-05,
"loss": 0.0038,
"step": 210
},
{
"epoch": 2.21,
"grad_norm": 0.038881637156009674,
"learning_rate": 1.588447653429603e-05,
"loss": 0.0019,
"step": 220
},
{
"epoch": 2.25,
"grad_norm": 0.01524051371961832,
"learning_rate": 1.407942238267148e-05,
"loss": 0.1427,
"step": 230
},
{
"epoch": 2.25,
"eval_accuracy": 1.0,
"eval_loss": 0.0019937974866479635,
"eval_runtime": 38.3352,
"eval_samples_per_second": 3.078,
"eval_steps_per_second": 0.783,
"step": 231
},
{
"epoch": 3.03,
"grad_norm": 0.01709669642150402,
"learning_rate": 1.2274368231046932e-05,
"loss": 0.0012,
"step": 240
},
{
"epoch": 3.06,
"grad_norm": 0.1900138109922409,
"learning_rate": 1.0469314079422383e-05,
"loss": 0.1073,
"step": 250
},
{
"epoch": 3.09,
"grad_norm": 0.06225905194878578,
"learning_rate": 8.664259927797834e-06,
"loss": 0.0029,
"step": 260
},
{
"epoch": 3.13,
"grad_norm": 0.015592777170240879,
"learning_rate": 6.859205776173286e-06,
"loss": 0.0009,
"step": 270
},
{
"epoch": 3.16,
"grad_norm": 0.016208168119192123,
"learning_rate": 5.054151624548736e-06,
"loss": 0.0036,
"step": 280
},
{
"epoch": 3.19,
"grad_norm": 0.007616001646965742,
"learning_rate": 3.2490974729241876e-06,
"loss": 0.0008,
"step": 290
},
{
"epoch": 3.22,
"grad_norm": 0.008425813168287277,
"learning_rate": 1.4440433212996392e-06,
"loss": 0.2853,
"step": 300
},
{
"epoch": 3.25,
"eval_accuracy": 1.0,
"eval_loss": 0.0012017178814858198,
"eval_runtime": 38.094,
"eval_samples_per_second": 3.098,
"eval_steps_per_second": 0.788,
"step": 308
},
{
"epoch": 3.25,
"step": 308,
"total_flos": 1.5351515742291886e+18,
"train_loss": 0.16999648642641577,
"train_runtime": 584.5872,
"train_samples_per_second": 2.107,
"train_steps_per_second": 0.527
},
{
"epoch": 3.25,
"eval_accuracy": 0.9761904761904762,
"eval_loss": 0.09526590257883072,
"eval_runtime": 122.5253,
"eval_samples_per_second": 1.714,
"eval_steps_per_second": 0.433,
"step": 308
}
],
"logging_steps": 10,
"max_steps": 308,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 1.5351515742291886e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}