VILA-2.7b / trainer_state.json
klldmofashi's picture
Upload files with huggingface_hub
b401d23 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 527,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 18.569529999247134,
"learning_rate": 6.25e-06,
"loss": 2.2344,
"step": 1
},
{
"epoch": 0.0,
"grad_norm": 17.774880014109563,
"learning_rate": 1.25e-05,
"loss": 2.2188,
"step": 2
},
{
"epoch": 0.01,
"grad_norm": 16.126910180369833,
"learning_rate": 1.8750000000000002e-05,
"loss": 2.0938,
"step": 3
},
{
"epoch": 0.01,
"grad_norm": 4.004205399706303,
"learning_rate": 2.5e-05,
"loss": 1.5625,
"step": 4
},
{
"epoch": 0.01,
"grad_norm": 4.475863116578534,
"learning_rate": 3.125e-05,
"loss": 1.5547,
"step": 5
},
{
"epoch": 0.01,
"grad_norm": 3.9418457877357183,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.5312,
"step": 6
},
{
"epoch": 0.01,
"grad_norm": 3.35497476295258,
"learning_rate": 4.375e-05,
"loss": 1.5312,
"step": 7
},
{
"epoch": 0.02,
"grad_norm": 2.0238486012287824,
"learning_rate": 5e-05,
"loss": 1.4453,
"step": 8
},
{
"epoch": 0.02,
"grad_norm": 1.3025920158121342,
"learning_rate": 5.6250000000000005e-05,
"loss": 1.4062,
"step": 9
},
{
"epoch": 0.02,
"grad_norm": 2.1578221590331648,
"learning_rate": 6.25e-05,
"loss": 1.4141,
"step": 10
},
{
"epoch": 0.02,
"grad_norm": 1.639048247445056,
"learning_rate": 6.875e-05,
"loss": 1.3906,
"step": 11
},
{
"epoch": 0.02,
"grad_norm": 1.0758955455274364,
"learning_rate": 7.500000000000001e-05,
"loss": 1.3672,
"step": 12
},
{
"epoch": 0.02,
"grad_norm": 0.9988841624058318,
"learning_rate": 8.125000000000001e-05,
"loss": 1.3516,
"step": 13
},
{
"epoch": 0.03,
"grad_norm": 1.036582323851064,
"learning_rate": 8.75e-05,
"loss": 1.3438,
"step": 14
},
{
"epoch": 0.03,
"grad_norm": 1.0040586950026587,
"learning_rate": 9.375e-05,
"loss": 1.3438,
"step": 15
},
{
"epoch": 0.03,
"grad_norm": 0.8112402128458829,
"learning_rate": 0.0001,
"loss": 1.3047,
"step": 16
},
{
"epoch": 0.03,
"grad_norm": 0.806608090114202,
"learning_rate": 9.999905507663936e-05,
"loss": 1.3203,
"step": 17
},
{
"epoch": 0.03,
"grad_norm": 0.8164159045230494,
"learning_rate": 9.99962203422726e-05,
"loss": 1.2812,
"step": 18
},
{
"epoch": 0.04,
"grad_norm": 0.7335139492736167,
"learning_rate": 9.9991495904044e-05,
"loss": 1.2969,
"step": 19
},
{
"epoch": 0.04,
"grad_norm": 0.799772371827826,
"learning_rate": 9.998488194052287e-05,
"loss": 1.2734,
"step": 20
},
{
"epoch": 0.04,
"grad_norm": 0.6718755777667486,
"learning_rate": 9.997637870169672e-05,
"loss": 1.2734,
"step": 21
},
{
"epoch": 0.04,
"grad_norm": 0.7599864750693303,
"learning_rate": 9.996598650896192e-05,
"loss": 1.2578,
"step": 22
},
{
"epoch": 0.04,
"grad_norm": 0.6886726902695142,
"learning_rate": 9.995370575511151e-05,
"loss": 1.2656,
"step": 23
},
{
"epoch": 0.05,
"grad_norm": 0.6823417918062775,
"learning_rate": 9.993953690432031e-05,
"loss": 1.2734,
"step": 24
},
{
"epoch": 0.05,
"grad_norm": 0.6382222948594751,
"learning_rate": 9.99234804921275e-05,
"loss": 1.2422,
"step": 25
},
{
"epoch": 0.05,
"grad_norm": 0.6452753693126838,
"learning_rate": 9.990553712541617e-05,
"loss": 1.2422,
"step": 26
},
{
"epoch": 0.05,
"grad_norm": 0.5904936283655213,
"learning_rate": 9.988570748239062e-05,
"loss": 1.2266,
"step": 27
},
{
"epoch": 0.05,
"grad_norm": 0.5640365175871925,
"learning_rate": 9.986399231255056e-05,
"loss": 1.2031,
"step": 28
},
{
"epoch": 0.06,
"grad_norm": 0.514358456822939,
"learning_rate": 9.984039243666283e-05,
"loss": 1.2344,
"step": 29
},
{
"epoch": 0.06,
"grad_norm": 0.636209974460906,
"learning_rate": 9.981490874673039e-05,
"loss": 1.1953,
"step": 30
},
{
"epoch": 0.06,
"grad_norm": 0.5479990507502773,
"learning_rate": 9.978754220595861e-05,
"loss": 1.2031,
"step": 31
},
{
"epoch": 0.06,
"grad_norm": 0.5561261900343224,
"learning_rate": 9.975829384871884e-05,
"loss": 1.2344,
"step": 32
},
{
"epoch": 0.06,
"grad_norm": 0.4948569883144301,
"learning_rate": 9.97271647805093e-05,
"loss": 1.2109,
"step": 33
},
{
"epoch": 0.06,
"grad_norm": 0.5167119119909488,
"learning_rate": 9.969415617791336e-05,
"loss": 1.2266,
"step": 34
},
{
"epoch": 0.07,
"grad_norm": 0.508354119180698,
"learning_rate": 9.965926928855499e-05,
"loss": 1.2109,
"step": 35
},
{
"epoch": 0.07,
"grad_norm": 0.5326133584880987,
"learning_rate": 9.962250543105167e-05,
"loss": 1.1797,
"step": 36
},
{
"epoch": 0.07,
"grad_norm": 0.47996354720772333,
"learning_rate": 9.95838659949645e-05,
"loss": 1.1719,
"step": 37
},
{
"epoch": 0.07,
"grad_norm": 0.4584403899961059,
"learning_rate": 9.954335244074574e-05,
"loss": 1.1797,
"step": 38
},
{
"epoch": 0.07,
"grad_norm": 0.478024134726985,
"learning_rate": 9.950096629968352e-05,
"loss": 1.1953,
"step": 39
},
{
"epoch": 0.08,
"grad_norm": 0.4831225145909792,
"learning_rate": 9.945670917384403e-05,
"loss": 1.1797,
"step": 40
},
{
"epoch": 0.08,
"grad_norm": 0.47054504799219404,
"learning_rate": 9.941058273601096e-05,
"loss": 1.1719,
"step": 41
},
{
"epoch": 0.08,
"grad_norm": 0.4940788339187542,
"learning_rate": 9.936258872962228e-05,
"loss": 1.1719,
"step": 42
},
{
"epoch": 0.08,
"grad_norm": 0.44655071198353546,
"learning_rate": 9.931272896870426e-05,
"loss": 1.1953,
"step": 43
},
{
"epoch": 0.08,
"grad_norm": 0.4620978351861828,
"learning_rate": 9.926100533780303e-05,
"loss": 1.1797,
"step": 44
},
{
"epoch": 0.09,
"grad_norm": 0.4777738095085538,
"learning_rate": 9.920741979191331e-05,
"loss": 1.1641,
"step": 45
},
{
"epoch": 0.09,
"grad_norm": 0.46687195896903444,
"learning_rate": 9.91519743564044e-05,
"loss": 1.1562,
"step": 46
},
{
"epoch": 0.09,
"grad_norm": 0.4572029233269296,
"learning_rate": 9.909467112694384e-05,
"loss": 1.1719,
"step": 47
},
{
"epoch": 0.09,
"grad_norm": 0.4379619610466461,
"learning_rate": 9.903551226941801e-05,
"loss": 1.1562,
"step": 48
},
{
"epoch": 0.09,
"grad_norm": 0.4625672515990539,
"learning_rate": 9.897450001985039e-05,
"loss": 1.1406,
"step": 49
},
{
"epoch": 0.09,
"grad_norm": 0.45763509639124433,
"learning_rate": 9.891163668431695e-05,
"loss": 1.1719,
"step": 50
},
{
"epoch": 0.1,
"grad_norm": 0.4759593297993926,
"learning_rate": 9.88469246388591e-05,
"loss": 1.1484,
"step": 51
},
{
"epoch": 0.1,
"grad_norm": 0.47690674058291993,
"learning_rate": 9.878036632939374e-05,
"loss": 1.2031,
"step": 52
},
{
"epoch": 0.1,
"grad_norm": 0.44532349751989236,
"learning_rate": 9.871196427162092e-05,
"loss": 1.1172,
"step": 53
},
{
"epoch": 0.1,
"grad_norm": 0.452106382176349,
"learning_rate": 9.86417210509288e-05,
"loss": 1.1094,
"step": 54
},
{
"epoch": 0.1,
"grad_norm": 0.452753191760464,
"learning_rate": 9.85696393222957e-05,
"loss": 1.1953,
"step": 55
},
{
"epoch": 0.11,
"grad_norm": 0.45441530517528445,
"learning_rate": 9.849572181019007e-05,
"loss": 1.1406,
"step": 56
},
{
"epoch": 0.11,
"grad_norm": 0.5007896403646297,
"learning_rate": 9.84199713084672e-05,
"loss": 1.1406,
"step": 57
},
{
"epoch": 0.11,
"grad_norm": 0.4608644722591853,
"learning_rate": 9.834239068026387e-05,
"loss": 1.1328,
"step": 58
},
{
"epoch": 0.11,
"grad_norm": 0.4968111745587068,
"learning_rate": 9.826298285789002e-05,
"loss": 1.1328,
"step": 59
},
{
"epoch": 0.11,
"grad_norm": 0.44573050581792,
"learning_rate": 9.818175084271786e-05,
"loss": 1.125,
"step": 60
},
{
"epoch": 0.12,
"grad_norm": 0.46589557608902915,
"learning_rate": 9.809869770506856e-05,
"loss": 1.1328,
"step": 61
},
{
"epoch": 0.12,
"grad_norm": 0.45678602237538535,
"learning_rate": 9.80138265840961e-05,
"loss": 1.1328,
"step": 62
},
{
"epoch": 0.12,
"grad_norm": 0.43959375518292026,
"learning_rate": 9.792714068766872e-05,
"loss": 1.125,
"step": 63
},
{
"epoch": 0.12,
"grad_norm": 0.41984631355693813,
"learning_rate": 9.783864329224752e-05,
"loss": 1.1406,
"step": 64
},
{
"epoch": 0.12,
"grad_norm": 0.43389373514903795,
"learning_rate": 9.774833774276278e-05,
"loss": 1.1172,
"step": 65
},
{
"epoch": 0.13,
"grad_norm": 0.4336892341975021,
"learning_rate": 9.765622745248739e-05,
"loss": 1.1406,
"step": 66
},
{
"epoch": 0.13,
"grad_norm": 0.4410380396458464,
"learning_rate": 9.7562315902908e-05,
"loss": 1.1172,
"step": 67
},
{
"epoch": 0.13,
"grad_norm": 0.5141753129119535,
"learning_rate": 9.746660664359326e-05,
"loss": 1.1172,
"step": 68
},
{
"epoch": 0.13,
"grad_norm": 0.46159702650365997,
"learning_rate": 9.736910329205978e-05,
"loss": 1.1328,
"step": 69
},
{
"epoch": 0.13,
"grad_norm": 0.45575998013176866,
"learning_rate": 9.726980953363536e-05,
"loss": 1.1328,
"step": 70
},
{
"epoch": 0.13,
"grad_norm": 0.4458400134106401,
"learning_rate": 9.716872912131964e-05,
"loss": 1.1172,
"step": 71
},
{
"epoch": 0.14,
"grad_norm": 0.4403442516042718,
"learning_rate": 9.706586587564237e-05,
"loss": 1.1406,
"step": 72
},
{
"epoch": 0.14,
"grad_norm": 0.4513799624319814,
"learning_rate": 9.696122368451886e-05,
"loss": 1.1406,
"step": 73
},
{
"epoch": 0.14,
"grad_norm": 0.4348698465314392,
"learning_rate": 9.685480650310318e-05,
"loss": 1.1641,
"step": 74
},
{
"epoch": 0.14,
"grad_norm": 0.4498345537005005,
"learning_rate": 9.674661835363858e-05,
"loss": 1.125,
"step": 75
},
{
"epoch": 0.14,
"grad_norm": 0.43144309981302603,
"learning_rate": 9.663666332530541e-05,
"loss": 1.1172,
"step": 76
},
{
"epoch": 0.15,
"grad_norm": 0.4399921408069473,
"learning_rate": 9.652494557406666e-05,