Video-ChatGPT-7B / trainer_state.json
mmaaz60's picture
Upload pretrained weights file
095de7a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 8823,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 7.5471698113207555e-06,
"loss": 6.6425,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 1.5094339622641511e-05,
"loss": 4.8785,
"step": 200
},
{
"epoch": 0.1,
"learning_rate": 1.9999174617418052e-05,
"loss": 2.0576,
"step": 300
},
{
"epoch": 0.14,
"learning_rate": 1.9987722672633802e-05,
"loss": 1.7927,
"step": 400
},
{
"epoch": 0.17,
"learning_rate": 1.9962812964571567e-05,
"loss": 1.7317,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 1.9924479057334537e-05,
"loss": 1.7022,
"step": 600
},
{
"epoch": 0.24,
"learning_rate": 1.9872772603202818e-05,
"loss": 1.6741,
"step": 700
},
{
"epoch": 0.27,
"learning_rate": 1.9807763273035574e-05,
"loss": 1.6608,
"step": 800
},
{
"epoch": 0.31,
"learning_rate": 1.9729538662394363e-05,
"loss": 1.6597,
"step": 900
},
{
"epoch": 0.34,
"learning_rate": 1.9638204173514217e-05,
"loss": 1.6598,
"step": 1000
},
{
"epoch": 0.37,
"learning_rate": 1.953388287328142e-05,
"loss": 1.6501,
"step": 1100
},
{
"epoch": 0.41,
"learning_rate": 1.9416715327409453e-05,
"loss": 1.637,
"step": 1200
},
{
"epoch": 0.44,
"learning_rate": 1.9286859411036396e-05,
"loss": 1.6317,
"step": 1300
},
{
"epoch": 0.48,
"learning_rate": 1.914449009599919e-05,
"loss": 1.6292,
"step": 1400
},
{
"epoch": 0.51,
"learning_rate": 1.898979921507119e-05,
"loss": 1.6279,
"step": 1500
},
{
"epoch": 0.54,
"learning_rate": 1.8822995203480823e-05,
"loss": 1.6225,
"step": 1600
},
{
"epoch": 0.58,
"learning_rate": 1.86443028180596e-05,
"loss": 1.6234,
"step": 1700
},
{
"epoch": 0.61,
"learning_rate": 1.8453962834397847e-05,
"loss": 1.6073,
"step": 1800
},
{
"epoch": 0.65,
"learning_rate": 1.8252231722416328e-05,
"loss": 1.6119,
"step": 1900
},
{
"epoch": 0.68,
"learning_rate": 1.8039381300790812e-05,
"loss": 1.5936,
"step": 2000
},
{
"epoch": 0.71,
"learning_rate": 1.781569837069528e-05,
"loss": 1.6175,
"step": 2100
},
{
"epoch": 0.75,
"learning_rate": 1.758148432935723e-05,
"loss": 1.6093,
"step": 2200
},
{
"epoch": 0.78,
"learning_rate": 1.7337054763945823e-05,
"loss": 1.6081,
"step": 2300
},
{
"epoch": 0.82,
"learning_rate": 1.7082739026340097e-05,
"loss": 1.6064,
"step": 2400
},
{
"epoch": 0.85,
"learning_rate": 1.6818879789350134e-05,
"loss": 1.6016,
"step": 2500
},
{
"epoch": 0.88,
"learning_rate": 1.6545832584989235e-05,
"loss": 1.6062,
"step": 2600
},
{
"epoch": 0.92,
"learning_rate": 1.6263965325419206e-05,
"loss": 1.6042,
"step": 2700
},
{
"epoch": 0.95,
"learning_rate": 1.5973657807214245e-05,
"loss": 1.5967,
"step": 2800
},
{
"epoch": 0.99,
"learning_rate": 1.5675301199611413e-05,
"loss": 1.6095,
"step": 2900
},
{
"epoch": 1.02,
"learning_rate": 1.536929751743723e-05,
"loss": 1.5958,
"step": 3000
},
{
"epoch": 1.05,
"learning_rate": 1.5056059079420575e-05,
"loss": 1.6012,
"step": 3100
},
{
"epoch": 1.09,
"learning_rate": 1.4736007952621852e-05,
"loss": 1.5872,
"step": 3200
},
{
"epoch": 1.12,
"learning_rate": 1.4409575383726852e-05,
"loss": 1.5967,
"step": 3300
},
{
"epoch": 1.16,
"learning_rate": 1.4077201217971817e-05,
"loss": 1.5913,
"step": 3400
},
{
"epoch": 1.19,
"learning_rate": 1.3739333306482481e-05,
"loss": 1.5902,
"step": 3500
},
{
"epoch": 1.22,
"learning_rate": 1.3396426902825753e-05,
"loss": 1.5908,
"step": 3600
},
{
"epoch": 1.26,
"learning_rate": 1.3048944049587138e-05,
"loss": 1.5883,
"step": 3700
},
{
"epoch": 1.29,
"learning_rate": 1.2697352955800396e-05,
"loss": 1.5907,
"step": 3800
},
{
"epoch": 1.33,
"learning_rate": 1.2342127366068364e-05,
"loss": 1.5864,
"step": 3900
},
{
"epoch": 1.36,
"learning_rate": 1.1983745922224985e-05,
"loss": 1.5912,
"step": 4000
},
{
"epoch": 1.39,
"learning_rate": 1.1622691518398636e-05,
"loss": 1.59,
"step": 4100
},
{
"epoch": 1.43,
"learning_rate": 1.1259450650345798e-05,
"loss": 1.5837,
"step": 4200
},
{
"epoch": 1.46,
"learning_rate": 1.0894512759931785e-05,
"loss": 1.5805,
"step": 4300
},
{
"epoch": 1.5,
"learning_rate": 1.0528369575641793e-05,
"loss": 1.5916,
"step": 4400
},
{
"epoch": 1.53,
"learning_rate": 1.0161514450010882e-05,
"loss": 1.5975,
"step": 4500
},
{
"epoch": 1.56,
"learning_rate": 9.794441694865673e-06,
"loss": 1.5885,
"step": 4600
},
{
"epoch": 1.6,
"learning_rate": 9.427645915273446e-06,
"loss": 1.5849,
"step": 4700
},
{
"epoch": 1.63,
"learning_rate": 9.061621343096156e-06,
"loss": 1.5754,
"step": 4800
},
{
"epoch": 1.67,
"learning_rate": 8.696861171047268e-06,
"loss": 1.5906,
"step": 4900
},
{
"epoch": 1.7,
"learning_rate": 8.33385688814881e-06,
"loss": 1.5837,
"step": 5000
},
{
"epoch": 1.73,
"learning_rate": 7.97309761748402e-06,
"loss": 1.5807,
"step": 5100
},
{
"epoch": 1.77,
"learning_rate": 7.615069457137927e-06,
"loss": 1.5907,
"step": 5200
},
{
"epoch": 1.8,
"learning_rate": 7.260254825213902e-06,
"loss": 1.5826,
"step": 5300
},
{
"epoch": 1.84,
"learning_rate": 6.909131809808755e-06,
"loss": 1.5796,
"step": 5400
},
{
"epoch": 1.87,
"learning_rate": 6.562173524822188e-06,
"loss": 1.5814,
"step": 5500
},
{
"epoch": 1.9,
"learning_rate": 6.219847472468641e-06,
"loss": 1.5861,
"step": 5600
},
{
"epoch": 1.94,
"learning_rate": 5.882614913350499e-06,
"loss": 1.5744,
"step": 5700
},
{
"epoch": 1.97,
"learning_rate": 5.550930244941448e-06,
"loss": 1.5797,
"step": 5800
},
{
"epoch": 2.01,
"learning_rate": 5.2252403893173835e-06,
"loss": 1.5857,
"step": 5900
},
{
"epoch": 2.04,
"learning_rate": 4.9059841909599456e-06,
"loss": 1.5728,
"step": 6000
},
{
"epoch": 2.07,
"learning_rate": 4.593591825444028e-06,
"loss": 1.5701,
"step": 6100
},
{
"epoch": 2.11,
"learning_rate": 4.288484219806016e-06,
"loss": 1.5846,
"step": 6200
},
{
"epoch": 2.14,
"learning_rate": 3.991072485373858e-06,
"loss": 1.5716,
"step": 6300
},
{
"epoch": 2.18,
"learning_rate": 3.7017573638230296e-06,
"loss": 1.5798,
"step": 6400
},
{
"epoch": 2.21,
"learning_rate": 3.420928687204965e-06,
"loss": 1.5739,
"step": 6500
},
{
"epoch": 2.24,
"learning_rate": 3.1489648526753913e-06,
"loss": 1.5845,
"step": 6600
},
{
"epoch": 2.28,
"learning_rate": 2.8862323126304427e-06,
"loss": 1.5808,
"step": 6700
},
{
"epoch": 2.31,
"learning_rate": 2.6330850809374685e-06,
"loss": 1.5704,
"step": 6800
},
{
"epoch": 2.35,
"learning_rate": 2.389864255925913e-06,
"loss": 1.5769,
"step": 6900
},
{
"epoch": 2.38,
"learning_rate": 2.1568975607809895e-06,
"loss": 1.5765,
"step": 7000
},
{
"epoch": 2.41,
"learning_rate": 1.934498901959424e-06,
"loss": 1.5665,
"step": 7100
},
{
"epoch": 2.45,
"learning_rate": 1.722967946222277e-06,
"loss": 1.5822,
"step": 7200
},
{
"epoch": 2.48,
"learning_rate": 1.5225897168548032e-06,
"loss": 1.5931,
"step": 7300
},
{
"epoch": 2.52,
"learning_rate": 1.3336342096173239e-06,
"loss": 1.5855,
"step": 7400
},
{
"epoch": 2.55,
"learning_rate": 1.1563560289446819e-06,
"loss": 1.5736,
"step": 7500
},
{
"epoch": 2.58,
"learning_rate": 9.909940448844412e-07,
"loss": 1.5942,
"step": 7600
},
{
"epoch": 2.62,
"learning_rate": 8.377710712360631e-07,
"loss": 1.5792,
"step": 7700
},
{
"epoch": 2.65,
"learning_rate": 6.968935653247766e-07,
"loss": 1.5715,
"step": 7800
},
{
"epoch": 2.69,
"learning_rate": 5.685513498146533e-07,
"loss": 1.5777,
"step": 7900
},
{
"epoch": 2.72,
"learning_rate": 4.529173569357459e-07,
"loss": 1.5795,
"step": 8000
},
{
"epoch": 2.75,
"learning_rate": 3.5014739546990087e-07,
"loss": 1.5799,
"step": 8100
},
{
"epoch": 2.79,
"learning_rate": 2.603799408092389e-07,
"loss": 1.5743,
"step": 8200
},
{
"epoch": 2.82,
"learning_rate": 1.8373594837017505e-07,
"loss": 1.5779,
"step": 8300
},
{
"epoch": 2.86,
"learning_rate": 1.2031869061438494e-07,
"loss": 1.5689,
"step": 8400
},
{
"epoch": 2.89,
"learning_rate": 7.02136178963242e-08,
"loss": 1.5854,
"step": 8500
},
{
"epoch": 2.92,
"learning_rate": 3.3488243324814044e-08,
"loss": 1.5654,
"step": 8600
},
{
"epoch": 2.96,
"learning_rate": 1.0192051793809221e-08,
"loss": 1.5818,
"step": 8700
},
{
"epoch": 2.99,
"learning_rate": 3.5643330492995953e-10,
"loss": 1.5853,
"step": 8800
},
{
"epoch": 3.0,
"step": 8823,
"total_flos": 6.175868485067866e+18,
"train_loss": 1.6989395520347432,
"train_runtime": 13309.6743,
"train_samples_per_second": 21.208,
"train_steps_per_second": 0.663
}
],
"max_steps": 8823,
"num_train_epochs": 3,
"total_flos": 6.175868485067866e+18,
"trial_name": null,
"trial_params": null
}