food-recipe-generation / trainer_state.json
Shresthadev403's picture
End of training
ecc733f verified
raw
history blame
12.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.888001784832113,
"eval_steps": 50000,
"global_step": 1750000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8,
"learning_rate": 4.9920355054102726e-05,
"loss": 1.1412,
"step": 50000
},
{
"epoch": 0.8,
"eval_loss": 1.011365294456482,
"eval_runtime": 2006.5534,
"eval_samples_per_second": 111.193,
"eval_steps_per_second": 1.738,
"step": 50000
},
{
"epoch": 1.59,
"learning_rate": 4.992035346050262e-05,
"loss": 1.0123,
"step": 100000
},
{
"epoch": 1.59,
"eval_loss": 0.9693423509597778,
"eval_runtime": 1995.4615,
"eval_samples_per_second": 111.811,
"eval_steps_per_second": 1.747,
"step": 100000
},
{
"epoch": 2.39,
"learning_rate": 4.992034708610221e-05,
"loss": 0.9754,
"step": 150000
},
{
"epoch": 2.39,
"eval_loss": 0.9472731351852417,
"eval_runtime": 1997.2157,
"eval_samples_per_second": 111.713,
"eval_steps_per_second": 1.746,
"step": 150000
},
{
"epoch": 3.19,
"learning_rate": 4.992034549250211e-05,
"loss": 0.9539,
"step": 200000
},
{
"epoch": 3.19,
"eval_loss": 0.9325647354125977,
"eval_runtime": 2015.1208,
"eval_samples_per_second": 110.72,
"eval_steps_per_second": 1.73,
"step": 200000
},
{
"epoch": 3.98,
"learning_rate": 4.992035027330242e-05,
"loss": 0.9387,
"step": 250000
},
{
"epoch": 3.98,
"eval_loss": 0.9212433099746704,
"eval_runtime": 2011.4269,
"eval_samples_per_second": 110.924,
"eval_steps_per_second": 1.734,
"step": 250000
},
{
"epoch": 4.78,
"learning_rate": 4.992035027330242e-05,
"loss": 0.9243,
"step": 300000
},
{
"epoch": 4.78,
"eval_loss": 0.9138051271438599,
"eval_runtime": 2011.3973,
"eval_samples_per_second": 110.925,
"eval_steps_per_second": 1.734,
"step": 300000
},
{
"epoch": 5.58,
"learning_rate": 4.992035186690252e-05,
"loss": 0.9144,
"step": 350000
},
{
"epoch": 5.58,
"eval_loss": 0.9093130826950073,
"eval_runtime": 1998.4573,
"eval_samples_per_second": 111.644,
"eval_steps_per_second": 1.745,
"step": 350000
},
{
"epoch": 6.37,
"learning_rate": 4.9920355054102726e-05,
"loss": 0.906,
"step": 400000
},
{
"epoch": 6.37,
"eval_loss": 0.9041373133659363,
"eval_runtime": 1998.7351,
"eval_samples_per_second": 111.628,
"eval_steps_per_second": 1.745,
"step": 400000
},
{
"epoch": 7.17,
"learning_rate": 4.9920355054102726e-05,
"loss": 0.8994,
"step": 450000
},
{
"epoch": 7.17,
"eval_loss": 0.9003444910049438,
"eval_runtime": 1982.6092,
"eval_samples_per_second": 112.536,
"eval_steps_per_second": 1.759,
"step": 450000
},
{
"epoch": 7.97,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8933,
"step": 500000
},
{
"epoch": 7.97,
"eval_loss": 0.8956149220466614,
"eval_runtime": 2002.7479,
"eval_samples_per_second": 111.404,
"eval_steps_per_second": 1.741,
"step": 500000
},
{
"epoch": 8.76,
"learning_rate": 4.9920355054102726e-05,
"loss": 0.8856,
"step": 550000
},
{
"epoch": 8.76,
"eval_loss": 0.8930546045303345,
"eval_runtime": 1996.5839,
"eval_samples_per_second": 111.748,
"eval_steps_per_second": 1.746,
"step": 550000
},
{
"epoch": 9.56,
"learning_rate": 4.992035346050262e-05,
"loss": 0.8802,
"step": 600000
},
{
"epoch": 9.56,
"eval_loss": 0.89084392786026,
"eval_runtime": 1991.4984,
"eval_samples_per_second": 112.034,
"eval_steps_per_second": 1.751,
"step": 600000
},
{
"epoch": 10.36,
"learning_rate": 4.992035346050262e-05,
"loss": 0.8763,
"step": 650000
},
{
"epoch": 10.36,
"eval_loss": 0.8895950317382812,
"eval_runtime": 2006.0064,
"eval_samples_per_second": 111.223,
"eval_steps_per_second": 1.738,
"step": 650000
},
{
"epoch": 11.16,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8725,
"step": 700000
},
{
"epoch": 11.16,
"eval_loss": 0.8886296153068542,
"eval_runtime": 2027.9335,
"eval_samples_per_second": 110.021,
"eval_steps_per_second": 1.719,
"step": 700000
},
{
"epoch": 11.95,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8688,
"step": 750000
},
{
"epoch": 11.95,
"eval_loss": 0.885003924369812,
"eval_runtime": 1989.8507,
"eval_samples_per_second": 112.127,
"eval_steps_per_second": 1.752,
"step": 750000
},
{
"epoch": 12.75,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8628,
"step": 800000
},
{
"epoch": 12.75,
"eval_loss": 0.8833887577056885,
"eval_runtime": 2010.1701,
"eval_samples_per_second": 110.993,
"eval_steps_per_second": 1.735,
"step": 800000
},
{
"epoch": 13.55,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8599,
"step": 850000
},
{
"epoch": 13.55,
"eval_loss": 0.883805513381958,
"eval_runtime": 2005.9725,
"eval_samples_per_second": 111.225,
"eval_steps_per_second": 1.738,
"step": 850000
},
{
"epoch": 14.34,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8572,
"step": 900000
},
{
"epoch": 14.34,
"eval_loss": 0.8837567567825317,
"eval_runtime": 2005.535,
"eval_samples_per_second": 111.25,
"eval_steps_per_second": 1.739,
"step": 900000
},
{
"epoch": 15.14,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8548,
"step": 950000
},
{
"epoch": 15.14,
"eval_loss": 0.8825677037239075,
"eval_runtime": 1984.894,
"eval_samples_per_second": 112.407,
"eval_steps_per_second": 1.757,
"step": 950000
},
{
"epoch": 15.94,
"learning_rate": 4.992034549250211e-05,
"loss": 0.8502,
"step": 1000000
},
{
"epoch": 15.94,
"eval_loss": 0.8808427453041077,
"eval_runtime": 2006.4913,
"eval_samples_per_second": 111.197,
"eval_steps_per_second": 1.738,
"step": 1000000
},
{
"epoch": 16.73,
"learning_rate": 4.992034708610221e-05,
"loss": 0.8471,
"step": 1050000
},
{
"epoch": 16.73,
"eval_loss": 0.8812766075134277,
"eval_runtime": 1998.6292,
"eval_samples_per_second": 111.634,
"eval_steps_per_second": 1.745,
"step": 1050000
},
{
"epoch": 17.53,
"learning_rate": 4.992034708610221e-05,
"loss": 0.8427,
"step": 1100000
},
{
"epoch": 17.53,
"eval_loss": 0.8817498683929443,
"eval_runtime": 1994.6872,
"eval_samples_per_second": 111.855,
"eval_steps_per_second": 1.748,
"step": 1100000
},
{
"epoch": 18.33,
"learning_rate": 4.992034549250211e-05,
"loss": 0.841,
"step": 1150000
},
{
"epoch": 18.33,
"eval_loss": 0.8802331686019897,
"eval_runtime": 1993.2844,
"eval_samples_per_second": 111.933,
"eval_steps_per_second": 1.749,
"step": 1150000
},
{
"epoch": 19.12,
"learning_rate": 4.992034549250211e-05,
"loss": 0.8399,
"step": 1200000
},
{
"epoch": 19.12,
"eval_loss": 0.8813353180885315,
"eval_runtime": 2003.661,
"eval_samples_per_second": 111.354,
"eval_steps_per_second": 1.74,
"step": 1200000
},
{
"epoch": 19.92,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8382,
"step": 1250000
},
{
"epoch": 19.92,
"eval_loss": 0.8779821991920471,
"eval_runtime": 1999.9414,
"eval_samples_per_second": 111.561,
"eval_steps_per_second": 1.744,
"step": 1250000
},
{
"epoch": 20.72,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8356,
"step": 1300000
},
{
"epoch": 20.72,
"eval_loss": 0.878333330154419,
"eval_runtime": 1991.7989,
"eval_samples_per_second": 112.017,
"eval_steps_per_second": 1.751,
"step": 1300000
},
{
"epoch": 21.51,
"learning_rate": 4.992034708610221e-05,
"loss": 0.8311,
"step": 1350000
},
{
"epoch": 21.51,
"eval_loss": 0.8799993991851807,
"eval_runtime": 2009.116,
"eval_samples_per_second": 111.051,
"eval_steps_per_second": 1.736,
"step": 1350000
},
{
"epoch": 22.31,
"learning_rate": 4.992035027330242e-05,
"loss": 0.8297,
"step": 1400000
},
{
"epoch": 22.31,
"eval_loss": 0.8792157769203186,
"eval_runtime": 1985.6931,
"eval_samples_per_second": 112.361,
"eval_steps_per_second": 1.756,
"step": 1400000
},
{
"epoch": 23.11,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8294,
"step": 1450000
},
{
"epoch": 23.11,
"eval_loss": 0.8799900412559509,
"eval_runtime": 2021.1167,
"eval_samples_per_second": 110.392,
"eval_steps_per_second": 1.725,
"step": 1450000
},
{
"epoch": 23.9,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8277,
"step": 1500000
},
{
"epoch": 23.9,
"eval_loss": 0.8771235346794128,
"eval_runtime": 1995.2079,
"eval_samples_per_second": 111.825,
"eval_steps_per_second": 1.748,
"step": 1500000
},
{
"epoch": 24.7,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8254,
"step": 1550000
},
{
"epoch": 24.7,
"eval_loss": 0.8784825205802917,
"eval_runtime": 1994.3392,
"eval_samples_per_second": 111.874,
"eval_steps_per_second": 1.748,
"step": 1550000
},
{
"epoch": 25.5,
"learning_rate": 4.992034708610221e-05,
"loss": 0.821,
"step": 1600000
},
{
"epoch": 25.5,
"eval_loss": 0.8786540627479553,
"eval_runtime": 1998.0285,
"eval_samples_per_second": 111.668,
"eval_steps_per_second": 1.745,
"step": 1600000
},
{
"epoch": 26.29,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8201,
"step": 1650000
},
{
"epoch": 26.29,
"eval_loss": 0.8791316151618958,
"eval_runtime": 2008.615,
"eval_samples_per_second": 111.079,
"eval_steps_per_second": 1.736,
"step": 1650000
},
{
"epoch": 27.09,
"learning_rate": 4.9920348679702315e-05,
"loss": 0.8204,
"step": 1700000
},
{
"epoch": 27.09,
"eval_loss": 0.8804346323013306,
"eval_runtime": 2001.1642,
"eval_samples_per_second": 111.493,
"eval_steps_per_second": 1.742,
"step": 1700000
},
{
"epoch": 27.89,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8188,
"step": 1750000
},
{
"epoch": 27.89,
"eval_loss": 0.875482976436615,
"eval_runtime": 2011.2204,
"eval_samples_per_second": 110.935,
"eval_steps_per_second": 1.734,
"step": 1750000
}
],
"logging_steps": 50000,
"max_steps": 31375500,
"num_input_tokens_seen": 0,
"num_train_epochs": 500,
"save_steps": 50000,
"total_flos": 3.65807962939392e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}