yotta-embeddings / trainer_state.json
djovak's picture
add model v2, finetuned with triplets
d407180
raw
history blame contribute delete
No virus
12.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.13123359580052493,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 4.396,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 3.826919265136599e-07,
"loss": 4.21,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 6.065523528528873e-07,
"loss": 4.3296,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 7.653838530273198e-07,
"loss": 4.334,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 8.885831358586367e-07,
"loss": 4.3884,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 9.892442793665471e-07,
"loss": 4.32,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 1.07435206352983e-06,
"loss": 4.2667,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 1.1480757795409794e-06,
"loss": 4.2037,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 1.2131047057057746e-06,
"loss": 4.4414,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 1.2712750623722968e-06,
"loss": 4.1682,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 1.3238965507785759e-06,
"loss": 4.2204,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 1.371936205880207e-06,
"loss": 4.2052,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 1.4161284046830791e-06,
"loss": 4.1135,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 1.4570439900434897e-06,
"loss": 3.9597,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 1.495135488711524e-06,
"loss": 3.8066,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 1.5307677060546395e-06,
"loss": 4.2143,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 1.5642390292710904e-06,
"loss": 4.0728,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 1.5957966322194344e-06,
"loss": 4.3082,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 1.6256475638101063e-06,
"loss": 4.2929,
"step": 19
},
{
"epoch": 0.03,
"learning_rate": 1.6539669888859565e-06,
"loss": 3.6756,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.6809044163827172e-06,
"loss": 4.0849,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 1.706588477292236e-06,
"loss": 4.216,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 1.7311306396673579e-06,
"loss": 4.128,
"step": 23
},
{
"epoch": 0.03,
"learning_rate": 1.7546281323938668e-06,
"loss": 3.9854,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 1.7771662717172735e-06,
"loss": 4.1697,
"step": 25
},
{
"epoch": 0.03,
"learning_rate": 1.7988203311967389e-06,
"loss": 4.0979,
"step": 26
},
{
"epoch": 0.04,
"learning_rate": 1.8196570585586616e-06,
"loss": 3.9488,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 1.8397359165571497e-06,
"loss": 4.0495,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 1.859110105992117e-06,
"loss": 4.101,
"step": 29
},
{
"epoch": 0.04,
"learning_rate": 1.8778274152251838e-06,
"loss": 3.8262,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 1.8959309303488189e-06,
"loss": 3.9416,
"step": 31
},
{
"epoch": 0.04,
"learning_rate": 1.913459632568299e-06,
"loss": 3.7671,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 1.9304489036314633e-06,
"loss": 4.0306,
"step": 33
},
{
"epoch": 0.04,
"learning_rate": 1.9469309557847503e-06,
"loss": 3.9613,
"step": 34
},
{
"epoch": 0.05,
"learning_rate": 1.962935199388467e-06,
"loss": 3.7291,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 1.9784885587330943e-06,
"loss": 3.793,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 1.993615744575612e-06,
"loss": 3.6288,
"step": 37
},
{
"epoch": 0.05,
"learning_rate": 2.0083394903237663e-06,
"loss": 3.9299,
"step": 38
},
{
"epoch": 0.05,
"learning_rate": 2.022680757535966e-06,
"loss": 3.8319,
"step": 39
},
{
"epoch": 0.05,
"learning_rate": 2.0366589153996162e-06,
"loss": 3.7277,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 2.050291898044415e-06,
"loss": 3.7845,
"step": 41
},
{
"epoch": 0.06,
"learning_rate": 2.063596342896377e-06,
"loss": 3.5937,
"step": 42
},
{
"epoch": 0.06,
"learning_rate": 2.076587712750118e-06,
"loss": 3.5983,
"step": 43
},
{
"epoch": 0.06,
"learning_rate": 2.0892804038058957e-06,
"loss": 3.7483,
"step": 44
},
{
"epoch": 0.06,
"learning_rate": 2.101687841564411e-06,
"loss": 3.6843,
"step": 45
},
{
"epoch": 0.06,
"learning_rate": 2.113822566181018e-06,
"loss": 3.7431,
"step": 46
},
{
"epoch": 0.06,
"learning_rate": 2.1256963086398127e-06,
"loss": 3.5885,
"step": 47
},
{
"epoch": 0.06,
"learning_rate": 2.1373200589075267e-06,
"loss": 3.6473,
"step": 48
},
{
"epoch": 0.06,
"learning_rate": 2.14870412705966e-06,
"loss": 3.3018,
"step": 49
},
{
"epoch": 0.07,
"learning_rate": 2.159858198230933e-06,
"loss": 3.5044,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 2.170791382123977e-06,
"loss": 3.7595,
"step": 51
},
{
"epoch": 0.07,
"learning_rate": 2.181512257710399e-06,
"loss": 3.5821,
"step": 52
},
{
"epoch": 0.07,
"learning_rate": 2.192028913673789e-06,
"loss": 3.752,
"step": 53
},
{
"epoch": 0.07,
"learning_rate": 2.2023489850723216e-06,
"loss": 3.5635,
"step": 54
},
{
"epoch": 0.07,
"learning_rate": 2.2124796866372125e-06,
"loss": 3.5492,
"step": 55
},
{
"epoch": 0.07,
"learning_rate": 2.22242784307081e-06,
"loss": 3.4708,
"step": 56
},
{
"epoch": 0.07,
"learning_rate": 2.2321999166629935e-06,
"loss": 3.4797,
"step": 57
},
{
"epoch": 0.08,
"learning_rate": 2.2418020325057765e-06,
"loss": 3.5858,
"step": 58
},
{
"epoch": 0.08,
"learning_rate": 2.251240001552474e-06,
"loss": 3.5595,
"step": 59
},
{
"epoch": 0.08,
"learning_rate": 2.2605193417388435e-06,
"loss": 3.5123,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 2.269645297358435e-06,
"loss": 3.4325,
"step": 61
},
{
"epoch": 0.08,
"learning_rate": 2.278622856862479e-06,
"loss": 3.4439,
"step": 62
},
{
"epoch": 0.08,
"learning_rate": 2.2874567692356042e-06,
"loss": 3.4072,
"step": 63
},
{
"epoch": 0.08,
"learning_rate": 2.2961515590819588e-06,
"loss": 3.6176,
"step": 64
},
{
"epoch": 0.09,
"learning_rate": 2.3047115405417158e-06,
"loss": 3.5513,
"step": 65
},
{
"epoch": 0.09,
"learning_rate": 2.313140830145123e-06,
"loss": 3.5387,
"step": 66
},
{
"epoch": 0.09,
"learning_rate": 2.3214433586999722e-06,
"loss": 3.3643,
"step": 67
},
{
"epoch": 0.09,
"learning_rate": 2.32962288229841e-06,
"loss": 3.329,
"step": 68
},
{
"epoch": 0.09,
"learning_rate": 2.3376829925202453e-06,
"loss": 3.3631,
"step": 69
},
{
"epoch": 0.09,
"learning_rate": 2.3456271259021266e-06,
"loss": 3.505,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 2.3534585727350773e-06,
"loss": 3.3134,
"step": 71
},
{
"epoch": 0.09,
"learning_rate": 2.361180485246754e-06,
"loss": 3.4492,
"step": 72
},
{
"epoch": 0.1,
"learning_rate": 2.3687958852193585e-06,
"loss": 3.2279,
"step": 73
},
{
"epoch": 0.1,
"learning_rate": 2.376307671089272e-06,
"loss": 3.3059,
"step": 74
},
{
"epoch": 0.1,
"learning_rate": 2.3837186245701603e-06,
"loss": 3.4335,
"step": 75
},
{
"epoch": 0.1,
"learning_rate": 2.391031416837426e-06,
"loss": 3.3363,
"step": 76
},
{
"epoch": 0.1,
"learning_rate": 2.398248614308406e-06,
"loss": 3.2824,
"step": 77
},
{
"epoch": 0.1,
"learning_rate": 2.4053726840496263e-06,
"loss": 3.3852,
"step": 78
},
{
"epoch": 0.1,
"learning_rate": 2.4124059988396154e-06,
"loss": 3.5303,
"step": 79
},
{
"epoch": 0.1,
"learning_rate": 2.419350841913276e-06,
"loss": 3.3088,
"step": 80
},
{
"epoch": 0.11,
"learning_rate": 2.4262094114115493e-06,
"loss": 3.3774,
"step": 81
},
{
"epoch": 0.11,
"learning_rate": 2.4329838245580745e-06,
"loss": 3.3502,
"step": 82
},
{
"epoch": 0.11,
"learning_rate": 2.4396761215827013e-06,
"loss": 3.3218,
"step": 83
},
{
"epoch": 0.11,
"learning_rate": 2.4462882694100367e-06,
"loss": 3.4322,
"step": 84
},
{
"epoch": 0.11,
"learning_rate": 2.452822165129727e-06,
"loss": 3.297,
"step": 85
},
{
"epoch": 0.11,
"learning_rate": 2.4592796392637773e-06,
"loss": 3.297,
"step": 86
},
{
"epoch": 0.11,
"learning_rate": 2.4656624588450042e-06,
"loss": 3.3604,
"step": 87
},
{
"epoch": 0.12,
"learning_rate": 2.471972330319556e-06,
"loss": 3.2638,
"step": 88
},
{
"epoch": 0.12,
"learning_rate": 2.478210902285443e-06,
"loss": 3.186,
"step": 89
},
{
"epoch": 0.12,
"learning_rate": 2.484379768078071e-06,
"loss": 3.3819,
"step": 90
},
{
"epoch": 0.12,
"learning_rate": 2.490480468212909e-06,
"loss": 3.362,
"step": 91
},
{
"epoch": 0.12,
"learning_rate": 2.4965144926946778e-06,
"loss": 3.3608,
"step": 92
},
{
"epoch": 0.12,
"learning_rate": 2.5024832832017063e-06,
"loss": 3.2796,
"step": 93
},
{
"epoch": 0.12,
"learning_rate": 2.5083882351534724e-06,
"loss": 3.2159,
"step": 94
},
{
"epoch": 0.12,
"learning_rate": 2.514230699668743e-06,
"loss": 3.2004,
"step": 95
},
{
"epoch": 0.13,
"learning_rate": 2.5200119854211865e-06,
"loss": 3.1361,
"step": 96
},
{
"epoch": 0.13,
"learning_rate": 2.5257333603988366e-06,
"loss": 3.354,
"step": 97
},
{
"epoch": 0.13,
"learning_rate": 2.5313960535733198e-06,
"loss": 3.1423,
"step": 98
},
{
"epoch": 0.13,
"learning_rate": 2.5370012564843503e-06,
"loss": 3.2997,
"step": 99
},
{
"epoch": 0.13,
"learning_rate": 2.5425501247445936e-06,
"loss": 3.3779,
"step": 100
}
],
"logging_steps": 1.0,
"max_steps": 762,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}