gemma2B-vi-RAG / trainer_state.json
kookie14's picture
Upload 12 files
d29f5b2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.6,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 1.2292424440383911,
"learning_rate": 9.949748743718594e-05,
"loss": 2.6316,
"step": 100
},
{
"epoch": 0.032,
"grad_norm": 1.361342191696167,
"learning_rate": 9.84924623115578e-05,
"loss": 1.9567,
"step": 200
},
{
"epoch": 0.048,
"grad_norm": 1.3794206380844116,
"learning_rate": 9.748743718592965e-05,
"loss": 1.8415,
"step": 300
},
{
"epoch": 0.064,
"grad_norm": 1.2263352870941162,
"learning_rate": 9.64824120603015e-05,
"loss": 1.8072,
"step": 400
},
{
"epoch": 0.08,
"grad_norm": 1.1767858266830444,
"learning_rate": 9.547738693467337e-05,
"loss": 1.7898,
"step": 500
},
{
"epoch": 0.096,
"grad_norm": 1.173898696899414,
"learning_rate": 9.447236180904523e-05,
"loss": 1.7314,
"step": 600
},
{
"epoch": 0.112,
"grad_norm": 1.2088936567306519,
"learning_rate": 9.34673366834171e-05,
"loss": 1.7011,
"step": 700
},
{
"epoch": 0.128,
"grad_norm": 0.9866878986358643,
"learning_rate": 9.246231155778895e-05,
"loss": 1.6663,
"step": 800
},
{
"epoch": 0.144,
"grad_norm": 1.2384347915649414,
"learning_rate": 9.14572864321608e-05,
"loss": 1.6507,
"step": 900
},
{
"epoch": 0.16,
"grad_norm": 1.065297245979309,
"learning_rate": 9.045226130653267e-05,
"loss": 1.6705,
"step": 1000
},
{
"epoch": 0.176,
"grad_norm": 1.1226449012756348,
"learning_rate": 8.944723618090453e-05,
"loss": 1.6577,
"step": 1100
},
{
"epoch": 0.192,
"grad_norm": 0.9142518639564514,
"learning_rate": 8.84422110552764e-05,
"loss": 1.612,
"step": 1200
},
{
"epoch": 0.208,
"grad_norm": 1.1804460287094116,
"learning_rate": 8.743718592964825e-05,
"loss": 1.6083,
"step": 1300
},
{
"epoch": 0.224,
"grad_norm": 1.1100006103515625,
"learning_rate": 8.64321608040201e-05,
"loss": 1.6242,
"step": 1400
},
{
"epoch": 0.24,
"grad_norm": 1.1566694974899292,
"learning_rate": 8.542713567839196e-05,
"loss": 1.6111,
"step": 1500
},
{
"epoch": 0.256,
"grad_norm": 1.094859004020691,
"learning_rate": 8.442211055276383e-05,
"loss": 1.5816,
"step": 1600
},
{
"epoch": 0.272,
"grad_norm": 1.2286021709442139,
"learning_rate": 8.341708542713568e-05,
"loss": 1.578,
"step": 1700
},
{
"epoch": 0.288,
"grad_norm": 1.0682488679885864,
"learning_rate": 8.241206030150754e-05,
"loss": 1.5795,
"step": 1800
},
{
"epoch": 0.304,
"grad_norm": 1.1403608322143555,
"learning_rate": 8.14070351758794e-05,
"loss": 1.5676,
"step": 1900
},
{
"epoch": 0.32,
"grad_norm": 1.0942330360412598,
"learning_rate": 8.040201005025126e-05,
"loss": 1.5749,
"step": 2000
},
{
"epoch": 0.336,
"grad_norm": 1.060088872909546,
"learning_rate": 7.939698492462313e-05,
"loss": 1.5184,
"step": 2100
},
{
"epoch": 0.352,
"grad_norm": 1.085312008857727,
"learning_rate": 7.839195979899498e-05,
"loss": 1.5339,
"step": 2200
},
{
"epoch": 0.368,
"grad_norm": 1.303536295890808,
"learning_rate": 7.738693467336684e-05,
"loss": 1.515,
"step": 2300
},
{
"epoch": 0.384,
"grad_norm": 0.9337490797042847,
"learning_rate": 7.638190954773869e-05,
"loss": 1.5243,
"step": 2400
},
{
"epoch": 0.4,
"grad_norm": 1.2959569692611694,
"learning_rate": 7.537688442211056e-05,
"loss": 1.4846,
"step": 2500
},
{
"epoch": 0.416,
"grad_norm": 1.1419408321380615,
"learning_rate": 7.437185929648241e-05,
"loss": 1.5158,
"step": 2600
},
{
"epoch": 0.432,
"grad_norm": 0.9983295202255249,
"learning_rate": 7.336683417085427e-05,
"loss": 1.4873,
"step": 2700
},
{
"epoch": 0.448,
"grad_norm": 1.1773889064788818,
"learning_rate": 7.236180904522614e-05,
"loss": 1.4894,
"step": 2800
},
{
"epoch": 0.464,
"grad_norm": 1.2258810997009277,
"learning_rate": 7.135678391959799e-05,
"loss": 1.5015,
"step": 2900
},
{
"epoch": 0.48,
"grad_norm": 1.1287764310836792,
"learning_rate": 7.035175879396985e-05,
"loss": 1.5166,
"step": 3000
},
{
"epoch": 0.496,
"grad_norm": 1.1293085813522339,
"learning_rate": 6.93467336683417e-05,
"loss": 1.5117,
"step": 3100
},
{
"epoch": 0.512,
"grad_norm": 1.0602566003799438,
"learning_rate": 6.834170854271357e-05,
"loss": 1.455,
"step": 3200
},
{
"epoch": 0.528,
"grad_norm": 1.2482367753982544,
"learning_rate": 6.733668341708544e-05,
"loss": 1.4356,
"step": 3300
},
{
"epoch": 0.544,
"grad_norm": 1.35064697265625,
"learning_rate": 6.633165829145729e-05,
"loss": 1.4528,
"step": 3400
},
{
"epoch": 0.56,
"grad_norm": 1.065523386001587,
"learning_rate": 6.532663316582915e-05,
"loss": 1.4706,
"step": 3500
},
{
"epoch": 0.576,
"grad_norm": 1.4030505418777466,
"learning_rate": 6.4321608040201e-05,
"loss": 1.4325,
"step": 3600
},
{
"epoch": 0.592,
"grad_norm": 1.1023573875427246,
"learning_rate": 6.331658291457287e-05,
"loss": 1.455,
"step": 3700
},
{
"epoch": 0.608,
"grad_norm": 1.179084062576294,
"learning_rate": 6.231155778894473e-05,
"loss": 1.4552,
"step": 3800
},
{
"epoch": 0.624,
"grad_norm": 1.0885223150253296,
"learning_rate": 6.130653266331658e-05,
"loss": 1.4178,
"step": 3900
},
{
"epoch": 0.64,
"grad_norm": 1.3725833892822266,
"learning_rate": 6.030150753768844e-05,
"loss": 1.456,
"step": 4000
},
{
"epoch": 0.656,
"grad_norm": 1.1671427488327026,
"learning_rate": 5.929648241206031e-05,
"loss": 1.4552,
"step": 4100
},
{
"epoch": 0.672,
"grad_norm": 1.0521718263626099,
"learning_rate": 5.829145728643216e-05,
"loss": 1.4236,
"step": 4200
},
{
"epoch": 0.688,
"grad_norm": 1.1262151002883911,
"learning_rate": 5.728643216080403e-05,
"loss": 1.456,
"step": 4300
},
{
"epoch": 0.704,
"grad_norm": 1.090331792831421,
"learning_rate": 5.628140703517588e-05,
"loss": 1.4021,
"step": 4400
},
{
"epoch": 0.72,
"grad_norm": 1.1581507921218872,
"learning_rate": 5.527638190954774e-05,
"loss": 1.4708,
"step": 4500
},
{
"epoch": 0.736,
"grad_norm": 1.1916351318359375,
"learning_rate": 5.4271356783919604e-05,
"loss": 1.4283,
"step": 4600
},
{
"epoch": 0.752,
"grad_norm": 1.2623261213302612,
"learning_rate": 5.3266331658291455e-05,
"loss": 1.4593,
"step": 4700
},
{
"epoch": 0.768,
"grad_norm": 1.2002214193344116,
"learning_rate": 5.226130653266332e-05,
"loss": 1.4387,
"step": 4800
},
{
"epoch": 0.784,
"grad_norm": 1.0627392530441284,
"learning_rate": 5.125628140703518e-05,
"loss": 1.4313,
"step": 4900
},
{
"epoch": 0.8,
"grad_norm": 1.2739390134811401,
"learning_rate": 5.0251256281407036e-05,
"loss": 1.4024,
"step": 5000
},
{
"epoch": 0.816,
"grad_norm": 1.3108317852020264,
"learning_rate": 4.92462311557789e-05,
"loss": 1.4385,
"step": 5100
},
{
"epoch": 0.832,
"grad_norm": 1.4682525396347046,
"learning_rate": 4.824120603015075e-05,
"loss": 1.4015,
"step": 5200
},
{
"epoch": 0.848,
"grad_norm": 1.301832675933838,
"learning_rate": 4.723618090452262e-05,
"loss": 1.3995,
"step": 5300
},
{
"epoch": 0.864,
"grad_norm": 1.3100578784942627,
"learning_rate": 4.6231155778894475e-05,
"loss": 1.4203,
"step": 5400
},
{
"epoch": 0.88,
"grad_norm": 1.2472883462905884,
"learning_rate": 4.522613065326633e-05,
"loss": 1.3984,
"step": 5500
},
{
"epoch": 0.896,
"grad_norm": 1.1501699686050415,
"learning_rate": 4.42211055276382e-05,
"loss": 1.4177,
"step": 5600
},
{
"epoch": 0.912,
"grad_norm": 1.306634783744812,
"learning_rate": 4.321608040201005e-05,
"loss": 1.4013,
"step": 5700
},
{
"epoch": 0.928,
"grad_norm": 1.199546217918396,
"learning_rate": 4.2211055276381914e-05,
"loss": 1.3998,
"step": 5800
},
{
"epoch": 0.944,
"grad_norm": 1.4669443368911743,
"learning_rate": 4.120603015075377e-05,
"loss": 1.3858,
"step": 5900
},
{
"epoch": 0.96,
"grad_norm": 1.1618568897247314,
"learning_rate": 4.020100502512563e-05,
"loss": 1.3952,
"step": 6000
},
{
"epoch": 0.976,
"grad_norm": 1.3658894300460815,
"learning_rate": 3.919597989949749e-05,
"loss": 1.34,
"step": 6100
},
{
"epoch": 0.992,
"grad_norm": 1.1548917293548584,
"learning_rate": 3.8190954773869346e-05,
"loss": 1.3753,
"step": 6200
},
{
"epoch": 1.008,
"grad_norm": 1.250981092453003,
"learning_rate": 3.7185929648241204e-05,
"loss": 1.363,
"step": 6300
},
{
"epoch": 1.024,
"grad_norm": 1.1988142728805542,
"learning_rate": 3.618090452261307e-05,
"loss": 1.2739,
"step": 6400
},
{
"epoch": 1.04,
"grad_norm": 1.3094350099563599,
"learning_rate": 3.517587939698493e-05,
"loss": 1.3268,
"step": 6500
},
{
"epoch": 1.056,
"grad_norm": 1.4513778686523438,
"learning_rate": 3.4170854271356785e-05,
"loss": 1.3114,
"step": 6600
},
{
"epoch": 1.072,
"grad_norm": 1.2981783151626587,
"learning_rate": 3.3165829145728643e-05,
"loss": 1.2866,
"step": 6700
},
{
"epoch": 1.088,
"grad_norm": 1.350372314453125,
"learning_rate": 3.21608040201005e-05,
"loss": 1.2909,
"step": 6800
},
{
"epoch": 1.104,
"grad_norm": 1.1077184677124023,
"learning_rate": 3.1155778894472366e-05,
"loss": 1.2278,
"step": 6900
},
{
"epoch": 1.12,
"grad_norm": 1.3056607246398926,
"learning_rate": 3.015075376884422e-05,
"loss": 1.2573,
"step": 7000
},
{
"epoch": 1.1360000000000001,
"grad_norm": 1.38368558883667,
"learning_rate": 2.914572864321608e-05,
"loss": 1.3041,
"step": 7100
},
{
"epoch": 1.152,
"grad_norm": 1.7526077032089233,
"learning_rate": 2.814070351758794e-05,
"loss": 1.3056,
"step": 7200
},
{
"epoch": 1.168,
"grad_norm": 1.1916877031326294,
"learning_rate": 2.7135678391959802e-05,
"loss": 1.2359,
"step": 7300
},
{
"epoch": 1.184,
"grad_norm": 1.326968789100647,
"learning_rate": 2.613065326633166e-05,
"loss": 1.2529,
"step": 7400
},
{
"epoch": 1.2,
"grad_norm": 1.502866506576538,
"learning_rate": 2.5125628140703518e-05,
"loss": 1.3043,
"step": 7500
},
{
"epoch": 1.216,
"grad_norm": 1.7037489414215088,
"learning_rate": 2.4120603015075376e-05,
"loss": 1.3254,
"step": 7600
},
{
"epoch": 1.232,
"grad_norm": 1.3369475603103638,
"learning_rate": 2.3115577889447238e-05,
"loss": 1.3274,
"step": 7700
},
{
"epoch": 1.248,
"grad_norm": 1.3407210111618042,
"learning_rate": 2.21105527638191e-05,
"loss": 1.2879,
"step": 7800
},
{
"epoch": 1.264,
"grad_norm": 1.5996978282928467,
"learning_rate": 2.1105527638190957e-05,
"loss": 1.2853,
"step": 7900
},
{
"epoch": 1.28,
"grad_norm": 1.3061344623565674,
"learning_rate": 2.0100502512562815e-05,
"loss": 1.274,
"step": 8000
},
{
"epoch": 1.296,
"grad_norm": 1.335577130317688,
"learning_rate": 1.9105527638190956e-05,
"loss": 1.2482,
"step": 8100
},
{
"epoch": 1.312,
"grad_norm": 1.632110834121704,
"learning_rate": 1.8100502512562814e-05,
"loss": 1.2849,
"step": 8200
},
{
"epoch": 1.328,
"grad_norm": 1.457372784614563,
"learning_rate": 1.7095477386934675e-05,
"loss": 1.27,
"step": 8300
},
{
"epoch": 1.3439999999999999,
"grad_norm": 1.3104965686798096,
"learning_rate": 1.6090452261306533e-05,
"loss": 1.2698,
"step": 8400
},
{
"epoch": 1.3599999999999999,
"grad_norm": 1.350401520729065,
"learning_rate": 1.5085427135678393e-05,
"loss": 1.2337,
"step": 8500
},
{
"epoch": 1.376,
"grad_norm": 1.3079415559768677,
"learning_rate": 1.4080402010050253e-05,
"loss": 1.2904,
"step": 8600
},
{
"epoch": 1.392,
"grad_norm": 1.3506203889846802,
"learning_rate": 1.3075376884422111e-05,
"loss": 1.2847,
"step": 8700
},
{
"epoch": 1.408,
"grad_norm": 1.4178451299667358,
"learning_rate": 1.2070351758793969e-05,
"loss": 1.2713,
"step": 8800
},
{
"epoch": 1.424,
"grad_norm": 1.2672168016433716,
"learning_rate": 1.106532663316583e-05,
"loss": 1.2634,
"step": 8900
},
{
"epoch": 1.44,
"grad_norm": 1.4467307329177856,
"learning_rate": 1.0070351758793971e-05,
"loss": 1.2868,
"step": 9000
},
{
"epoch": 1.456,
"grad_norm": 1.5032036304473877,
"learning_rate": 9.06532663316583e-06,
"loss": 1.2323,
"step": 9100
},
{
"epoch": 1.472,
"grad_norm": 1.1872940063476562,
"learning_rate": 8.060301507537689e-06,
"loss": 1.2868,
"step": 9200
},
{
"epoch": 1.488,
"grad_norm": 1.6626771688461304,
"learning_rate": 7.055276381909548e-06,
"loss": 1.27,
"step": 9300
},
{
"epoch": 1.504,
"grad_norm": 1.3130452632904053,
"learning_rate": 6.050251256281407e-06,
"loss": 1.2542,
"step": 9400
},
{
"epoch": 1.52,
"grad_norm": 1.4746296405792236,
"learning_rate": 5.045226130653267e-06,
"loss": 1.257,
"step": 9500
},
{
"epoch": 1.536,
"grad_norm": 1.3648103475570679,
"learning_rate": 4.0402010050251256e-06,
"loss": 1.2487,
"step": 9600
},
{
"epoch": 1.552,
"grad_norm": 1.3191380500793457,
"learning_rate": 3.035175879396985e-06,
"loss": 1.2557,
"step": 9700
},
{
"epoch": 1.568,
"grad_norm": 1.806413173675537,
"learning_rate": 2.0301507537688442e-06,
"loss": 1.2323,
"step": 9800
},
{
"epoch": 1.584,
"grad_norm": 1.6092606782913208,
"learning_rate": 1.0251256281407035e-06,
"loss": 1.2321,
"step": 9900
},
{
"epoch": 1.6,
"grad_norm": 1.3367202281951904,
"learning_rate": 2.0100502512562817e-08,
"loss": 1.2361,
"step": 10000
}
],
"logging_steps": 100,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.995709021001564e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}