yesj1234's picture
Upload folder using huggingface_hub
e818adf
raw
history blame
No virus
6.48 kB
{
"best_metric": 1.1251049041748047,
"best_model_checkpoint": "./mbartLarge_koja_mid2_run1/checkpoint-22708",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 22708,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 4.977981328166285e-05,
"loss": 1.9241,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 4.95596265633257e-05,
"loss": 1.6494,
"step": 1000
},
{
"epoch": 0.13,
"learning_rate": 4.933943984498855e-05,
"loss": 1.5434,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 4.91192531266514e-05,
"loss": 1.4836,
"step": 2000
},
{
"epoch": 0.22,
"learning_rate": 4.889906640831425e-05,
"loss": 1.4499,
"step": 2500
},
{
"epoch": 0.26,
"learning_rate": 4.86788796899771e-05,
"loss": 1.3934,
"step": 3000
},
{
"epoch": 0.31,
"learning_rate": 4.845869297163995e-05,
"loss": 1.3857,
"step": 3500
},
{
"epoch": 0.35,
"learning_rate": 4.82385062533028e-05,
"loss": 1.3396,
"step": 4000
},
{
"epoch": 0.4,
"learning_rate": 4.801831953496565e-05,
"loss": 1.3322,
"step": 4500
},
{
"epoch": 0.44,
"learning_rate": 4.77981328166285e-05,
"loss": 1.3106,
"step": 5000
},
{
"epoch": 0.48,
"learning_rate": 4.757794609829135e-05,
"loss": 1.2924,
"step": 5500
},
{
"epoch": 0.53,
"learning_rate": 4.73577593799542e-05,
"loss": 1.2834,
"step": 6000
},
{
"epoch": 0.57,
"learning_rate": 4.713757266161705e-05,
"loss": 1.2791,
"step": 6500
},
{
"epoch": 0.62,
"learning_rate": 4.69173859432799e-05,
"loss": 1.255,
"step": 7000
},
{
"epoch": 0.66,
"learning_rate": 4.669719922494275e-05,
"loss": 1.2573,
"step": 7500
},
{
"epoch": 0.7,
"learning_rate": 4.64770125066056e-05,
"loss": 1.2382,
"step": 8000
},
{
"epoch": 0.75,
"learning_rate": 4.625682578826845e-05,
"loss": 1.223,
"step": 8500
},
{
"epoch": 0.79,
"learning_rate": 4.6036639069931303e-05,
"loss": 1.2145,
"step": 9000
},
{
"epoch": 0.84,
"learning_rate": 4.5816452351594153e-05,
"loss": 1.219,
"step": 9500
},
{
"epoch": 0.88,
"learning_rate": 4.5596265633257004e-05,
"loss": 1.2125,
"step": 10000
},
{
"epoch": 0.92,
"learning_rate": 4.5376078914919854e-05,
"loss": 1.1996,
"step": 10500
},
{
"epoch": 0.97,
"learning_rate": 4.5155892196582704e-05,
"loss": 1.1823,
"step": 11000
},
{
"epoch": 1.0,
"eval_bleu": 29.4501,
"eval_gen_len": 18.8118,
"eval_loss": 1.1695398092269897,
"eval_runtime": 1716.1373,
"eval_samples_per_second": 13.231,
"eval_steps_per_second": 0.827,
"step": 11354
},
{
"epoch": 1.01,
"learning_rate": 4.4935705478245554e-05,
"loss": 1.1596,
"step": 11500
},
{
"epoch": 1.06,
"learning_rate": 4.4715518759908404e-05,
"loss": 1.0777,
"step": 12000
},
{
"epoch": 1.1,
"learning_rate": 4.4495332041571254e-05,
"loss": 1.0658,
"step": 12500
},
{
"epoch": 1.14,
"learning_rate": 4.427514532323411e-05,
"loss": 1.0296,
"step": 13000
},
{
"epoch": 1.19,
"learning_rate": 4.4054958604896954e-05,
"loss": 1.0269,
"step": 13500
},
{
"epoch": 1.23,
"learning_rate": 4.3834771886559804e-05,
"loss": 1.0192,
"step": 14000
},
{
"epoch": 1.28,
"learning_rate": 4.3614585168222654e-05,
"loss": 0.9946,
"step": 14500
},
{
"epoch": 1.32,
"learning_rate": 4.3394398449885504e-05,
"loss": 0.9996,
"step": 15000
},
{
"epoch": 1.37,
"learning_rate": 4.3174211731548354e-05,
"loss": 0.975,
"step": 15500
},
{
"epoch": 1.41,
"learning_rate": 4.295402501321121e-05,
"loss": 0.9874,
"step": 16000
},
{
"epoch": 1.45,
"learning_rate": 4.2733838294874054e-05,
"loss": 0.9683,
"step": 16500
},
{
"epoch": 1.5,
"learning_rate": 4.2513651576536904e-05,
"loss": 0.9675,
"step": 17000
},
{
"epoch": 1.54,
"learning_rate": 4.2293464858199754e-05,
"loss": 0.969,
"step": 17500
},
{
"epoch": 1.59,
"learning_rate": 4.2073278139862604e-05,
"loss": 0.9548,
"step": 18000
},
{
"epoch": 1.63,
"learning_rate": 4.1853091421525454e-05,
"loss": 0.9563,
"step": 18500
},
{
"epoch": 1.67,
"learning_rate": 4.163290470318831e-05,
"loss": 0.9536,
"step": 19000
},
{
"epoch": 1.72,
"learning_rate": 4.1412717984851155e-05,
"loss": 0.941,
"step": 19500
},
{
"epoch": 1.76,
"learning_rate": 4.1192531266514005e-05,
"loss": 0.9285,
"step": 20000
},
{
"epoch": 1.81,
"learning_rate": 4.0972344548176855e-05,
"loss": 0.9303,
"step": 20500
},
{
"epoch": 1.85,
"learning_rate": 4.0752157829839705e-05,
"loss": 0.9428,
"step": 21000
},
{
"epoch": 1.89,
"learning_rate": 4.0531971111502555e-05,
"loss": 0.9359,
"step": 21500
},
{
"epoch": 1.94,
"learning_rate": 4.031178439316541e-05,
"loss": 0.9302,
"step": 22000
},
{
"epoch": 1.98,
"learning_rate": 4.0091597674828255e-05,
"loss": 0.9207,
"step": 22500
},
{
"epoch": 2.0,
"eval_bleu": 30.842,
"eval_gen_len": 18.0892,
"eval_loss": 1.1251049041748047,
"eval_runtime": 1535.7063,
"eval_samples_per_second": 14.786,
"eval_steps_per_second": 0.925,
"step": 22708
}
],
"logging_steps": 500,
"max_steps": 113540,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 7.874829468219474e+17,
"trial_name": null,
"trial_params": null
}