dima806's picture
Upload folder using huggingface_hub
a60ae20 verified
raw
history blame
15 kB
{
"best_metric": 2.5461041927337646,
"best_model_checkpoint": "car_models_image_detection/checkpoint-32650",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 32650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15313935681470137,
"grad_norm": 1.442165732383728,
"learning_rate": 8.875766871165644e-06,
"loss": 5.7501,
"step": 500
},
{
"epoch": 0.30627871362940273,
"grad_norm": 1.5294591188430786,
"learning_rate": 8.737730061349693e-06,
"loss": 5.66,
"step": 1000
},
{
"epoch": 0.45941807044410415,
"grad_norm": 1.7971255779266357,
"learning_rate": 8.599693251533743e-06,
"loss": 5.5686,
"step": 1500
},
{
"epoch": 0.6125574272588055,
"grad_norm": 1.7858527898788452,
"learning_rate": 8.461656441717792e-06,
"loss": 5.4714,
"step": 2000
},
{
"epoch": 0.7656967840735069,
"grad_norm": 1.6970139741897583,
"learning_rate": 8.32361963190184e-06,
"loss": 5.3715,
"step": 2500
},
{
"epoch": 0.9188361408882083,
"grad_norm": 1.9722312688827515,
"learning_rate": 8.18558282208589e-06,
"loss": 5.2844,
"step": 3000
},
{
"epoch": 1.0,
"eval_accuracy": 0.20365025344993468,
"eval_loss": 5.212795734405518,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 948.361,
"eval_samples_per_second": 73.431,
"eval_steps_per_second": 9.179,
"step": 3265
},
{
"epoch": 1.0719754977029097,
"grad_norm": 2.317599296569824,
"learning_rate": 8.047546012269938e-06,
"loss": 5.1866,
"step": 3500
},
{
"epoch": 1.225114854517611,
"grad_norm": 1.872455358505249,
"learning_rate": 7.909509202453989e-06,
"loss": 5.1003,
"step": 4000
},
{
"epoch": 1.3782542113323124,
"grad_norm": 2.192427396774292,
"learning_rate": 7.771472392638037e-06,
"loss": 5.0184,
"step": 4500
},
{
"epoch": 1.5313935681470139,
"grad_norm": 2.1170101165771484,
"learning_rate": 7.633435582822086e-06,
"loss": 4.9414,
"step": 5000
},
{
"epoch": 1.6845329249617151,
"grad_norm": 2.908296823501587,
"learning_rate": 7.495398773006135e-06,
"loss": 4.863,
"step": 5500
},
{
"epoch": 1.8376722817764164,
"grad_norm": 3.4065804481506348,
"learning_rate": 7.3573619631901846e-06,
"loss": 4.779,
"step": 6000
},
{
"epoch": 1.9908116385911179,
"grad_norm": 2.6088645458221436,
"learning_rate": 7.219325153374233e-06,
"loss": 4.7011,
"step": 6500
},
{
"epoch": 2.0,
"eval_accuracy": 0.31753758669710935,
"eval_loss": 4.691849231719971,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 713.5751,
"eval_samples_per_second": 97.592,
"eval_steps_per_second": 12.199,
"step": 6530
},
{
"epoch": 2.1439509954058193,
"grad_norm": 2.4476678371429443,
"learning_rate": 7.081288343558283e-06,
"loss": 4.5978,
"step": 7000
},
{
"epoch": 2.2970903522205206,
"grad_norm": 3.1015167236328125,
"learning_rate": 6.9432515337423315e-06,
"loss": 4.53,
"step": 7500
},
{
"epoch": 2.450229709035222,
"grad_norm": 3.4957149028778076,
"learning_rate": 6.80521472392638e-06,
"loss": 4.437,
"step": 8000
},
{
"epoch": 2.6033690658499236,
"grad_norm": 3.372995138168335,
"learning_rate": 6.66717791411043e-06,
"loss": 4.3886,
"step": 8500
},
{
"epoch": 2.756508422664625,
"grad_norm": 2.965057849884033,
"learning_rate": 6.529141104294479e-06,
"loss": 4.3013,
"step": 9000
},
{
"epoch": 2.909647779479326,
"grad_norm": 3.4717111587524414,
"learning_rate": 6.391104294478528e-06,
"loss": 4.2275,
"step": 9500
},
{
"epoch": 3.0,
"eval_accuracy": 0.3973635462887175,
"eval_loss": 4.212672710418701,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 702.2225,
"eval_samples_per_second": 99.169,
"eval_steps_per_second": 12.396,
"step": 9795
},
{
"epoch": 3.0627871362940278,
"grad_norm": 3.6480553150177,
"learning_rate": 6.253067484662577e-06,
"loss": 4.1497,
"step": 10000
},
{
"epoch": 3.215926493108729,
"grad_norm": 2.944525718688965,
"learning_rate": 6.1150306748466255e-06,
"loss": 4.0626,
"step": 10500
},
{
"epoch": 3.3690658499234303,
"grad_norm": 3.6789698600769043,
"learning_rate": 5.976993865030675e-06,
"loss": 3.9861,
"step": 11000
},
{
"epoch": 3.522205206738132,
"grad_norm": 3.7942678928375244,
"learning_rate": 5.838957055214725e-06,
"loss": 3.9302,
"step": 11500
},
{
"epoch": 3.6753445635528332,
"grad_norm": 2.889557123184204,
"learning_rate": 5.700920245398773e-06,
"loss": 3.8667,
"step": 12000
},
{
"epoch": 3.8284839203675345,
"grad_norm": 4.875265121459961,
"learning_rate": 5.562883435582822e-06,
"loss": 3.7844,
"step": 12500
},
{
"epoch": 3.9816232771822357,
"grad_norm": 4.279688835144043,
"learning_rate": 5.424846625766871e-06,
"loss": 3.7406,
"step": 13000
},
{
"epoch": 4.0,
"eval_accuracy": 0.4593833914904005,
"eval_loss": 3.774059772491455,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 705.2879,
"eval_samples_per_second": 98.738,
"eval_steps_per_second": 12.342,
"step": 13060
},
{
"epoch": 4.134762633996937,
"grad_norm": 4.041619777679443,
"learning_rate": 5.286809815950921e-06,
"loss": 3.6478,
"step": 13500
},
{
"epoch": 4.287901990811639,
"grad_norm": 5.805275917053223,
"learning_rate": 5.14877300613497e-06,
"loss": 3.58,
"step": 14000
},
{
"epoch": 4.44104134762634,
"grad_norm": 3.922706365585327,
"learning_rate": 5.010736196319019e-06,
"loss": 3.543,
"step": 14500
},
{
"epoch": 4.594180704441041,
"grad_norm": 3.550119638442993,
"learning_rate": 4.872699386503067e-06,
"loss": 3.4574,
"step": 15000
},
{
"epoch": 4.747320061255743,
"grad_norm": 5.960729122161865,
"learning_rate": 4.734662576687116e-06,
"loss": 3.4116,
"step": 15500
},
{
"epoch": 4.900459418070444,
"grad_norm": 4.563659191131592,
"learning_rate": 4.5966257668711664e-06,
"loss": 3.3544,
"step": 16000
},
{
"epoch": 5.0,
"eval_accuracy": 0.5196800643317682,
"eval_loss": 3.398108959197998,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 700.4362,
"eval_samples_per_second": 99.422,
"eval_steps_per_second": 12.428,
"step": 16325
},
{
"epoch": 5.053598774885145,
"grad_norm": 5.071287631988525,
"learning_rate": 4.458588957055215e-06,
"loss": 3.2835,
"step": 16500
},
{
"epoch": 5.206738131699847,
"grad_norm": 5.6899003982543945,
"learning_rate": 4.320552147239264e-06,
"loss": 3.2203,
"step": 17000
},
{
"epoch": 5.359877488514548,
"grad_norm": 5.455111980438232,
"learning_rate": 4.1825153374233126e-06,
"loss": 3.1822,
"step": 17500
},
{
"epoch": 5.51301684532925,
"grad_norm": 7.294569969177246,
"learning_rate": 4.044478527607362e-06,
"loss": 3.1201,
"step": 18000
},
{
"epoch": 5.666156202143951,
"grad_norm": 4.797060012817383,
"learning_rate": 3.906441717791411e-06,
"loss": 3.0764,
"step": 18500
},
{
"epoch": 5.819295558958652,
"grad_norm": 9.175103187561035,
"learning_rate": 3.7684049079754604e-06,
"loss": 3.0317,
"step": 19000
},
{
"epoch": 5.972434915773354,
"grad_norm": 5.574986934661865,
"learning_rate": 3.630368098159509e-06,
"loss": 3.0015,
"step": 19500
},
{
"epoch": 6.0,
"eval_accuracy": 0.5613951952210686,
"eval_loss": 3.0899131298065186,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 705.6276,
"eval_samples_per_second": 98.691,
"eval_steps_per_second": 12.337,
"step": 19590
},
{
"epoch": 6.1255742725880555,
"grad_norm": 6.0056352615356445,
"learning_rate": 3.4923312883435583e-06,
"loss": 2.921,
"step": 20000
},
{
"epoch": 6.278713629402756,
"grad_norm": 12.546520233154297,
"learning_rate": 3.3542944785276074e-06,
"loss": 2.8971,
"step": 20500
},
{
"epoch": 6.431852986217458,
"grad_norm": 4.947099208831787,
"learning_rate": 3.2162576687116565e-06,
"loss": 2.8553,
"step": 21000
},
{
"epoch": 6.584992343032159,
"grad_norm": 8.054845809936523,
"learning_rate": 3.0782208588957057e-06,
"loss": 2.8151,
"step": 21500
},
{
"epoch": 6.738131699846861,
"grad_norm": 6.752479076385498,
"learning_rate": 2.940184049079755e-06,
"loss": 2.7747,
"step": 22000
},
{
"epoch": 6.891271056661562,
"grad_norm": 6.941985607147217,
"learning_rate": 2.8021472392638035e-06,
"loss": 2.7401,
"step": 22500
},
{
"epoch": 7.0,
"eval_accuracy": 0.5933313229655797,
"eval_loss": 2.853682279586792,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 697.9759,
"eval_samples_per_second": 99.773,
"eval_steps_per_second": 12.472,
"step": 22855
},
{
"epoch": 7.044410413476263,
"grad_norm": 4.710619926452637,
"learning_rate": 2.664110429447853e-06,
"loss": 2.7003,
"step": 23000
},
{
"epoch": 7.197549770290965,
"grad_norm": 5.312741756439209,
"learning_rate": 2.5260736196319018e-06,
"loss": 2.6404,
"step": 23500
},
{
"epoch": 7.3506891271056665,
"grad_norm": 6.6971845626831055,
"learning_rate": 2.3880368098159513e-06,
"loss": 2.6377,
"step": 24000
},
{
"epoch": 7.503828483920367,
"grad_norm": 6.931722164154053,
"learning_rate": 2.25e-06,
"loss": 2.5981,
"step": 24500
},
{
"epoch": 7.656967840735069,
"grad_norm": 7.1908040046691895,
"learning_rate": 2.111963190184049e-06,
"loss": 2.5702,
"step": 25000
},
{
"epoch": 7.810107197549771,
"grad_norm": 5.63743257522583,
"learning_rate": 1.9739263803680983e-06,
"loss": 2.5676,
"step": 25500
},
{
"epoch": 7.9632465543644715,
"grad_norm": 6.103708267211914,
"learning_rate": 1.8358895705521473e-06,
"loss": 2.5566,
"step": 26000
},
{
"epoch": 8.0,
"eval_accuracy": 0.6183460417294907,
"eval_loss": 2.6820664405822754,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 747.6734,
"eval_samples_per_second": 93.141,
"eval_steps_per_second": 11.643,
"step": 26120
},
{
"epoch": 8.116385911179172,
"grad_norm": 7.664525508880615,
"learning_rate": 1.6978527607361964e-06,
"loss": 2.4933,
"step": 26500
},
{
"epoch": 8.269525267993874,
"grad_norm": 8.328133583068848,
"learning_rate": 1.5598159509202455e-06,
"loss": 2.4762,
"step": 27000
},
{
"epoch": 8.422664624808576,
"grad_norm": 8.003011703491211,
"learning_rate": 1.4217791411042944e-06,
"loss": 2.4585,
"step": 27500
},
{
"epoch": 8.575803981623277,
"grad_norm": 6.738711833953857,
"learning_rate": 1.2837423312883436e-06,
"loss": 2.4542,
"step": 28000
},
{
"epoch": 8.728943338437979,
"grad_norm": 6.110630512237549,
"learning_rate": 1.1457055214723925e-06,
"loss": 2.4222,
"step": 28500
},
{
"epoch": 8.88208269525268,
"grad_norm": 5.14086389541626,
"learning_rate": 1.0076687116564419e-06,
"loss": 2.4216,
"step": 29000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6327058113987851,
"eval_loss": 2.581773042678833,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 754.0093,
"eval_samples_per_second": 92.358,
"eval_steps_per_second": 11.545,
"step": 29385
},
{
"epoch": 9.03522205206738,
"grad_norm": 7.1302971839904785,
"learning_rate": 8.696319018404908e-07,
"loss": 2.4115,
"step": 29500
},
{
"epoch": 9.188361408882082,
"grad_norm": 5.568242073059082,
"learning_rate": 7.315950920245399e-07,
"loss": 2.3616,
"step": 30000
},
{
"epoch": 9.341500765696784,
"grad_norm": 4.705562591552734,
"learning_rate": 5.93558282208589e-07,
"loss": 2.3858,
"step": 30500
},
{
"epoch": 9.494640122511486,
"grad_norm": 6.591212749481201,
"learning_rate": 4.5552147239263803e-07,
"loss": 2.3935,
"step": 31000
},
{
"epoch": 9.647779479326188,
"grad_norm": 7.563518524169922,
"learning_rate": 3.174846625766871e-07,
"loss": 2.3611,
"step": 31500
},
{
"epoch": 9.800918836140887,
"grad_norm": 7.720861434936523,
"learning_rate": 1.794478527607362e-07,
"loss": 2.3667,
"step": 32000
},
{
"epoch": 9.95405819295559,
"grad_norm": 5.801350116729736,
"learning_rate": 4.141104294478528e-08,
"loss": 2.3529,
"step": 32500
},
{
"epoch": 10.0,
"eval_accuracy": 0.6372434986142822,
"eval_loss": 2.5461041927337646,
"eval_model_preparation_time": 0.0035,
"eval_runtime": 700.2631,
"eval_samples_per_second": 99.447,
"eval_steps_per_second": 12.431,
"step": 32650
}
],
"logging_steps": 500,
"max_steps": 32650,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.117946756716657e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}