TinyLlama-1.1B-Chat-rust-cpp-encodings
/
LORAs
/tinyllama-encoder_1e-4
/checkpoint-742
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.0, | |
"eval_steps": 500, | |
"global_step": 742, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.01, | |
"learning_rate": 9.999875511956753e-05, | |
"loss": 1.7561, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 9.999502054025923e-05, | |
"loss": 1.7194, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 9.998879644803927e-05, | |
"loss": 1.7705, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 9.998008315283767e-05, | |
"loss": 1.5094, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 9.996888108853486e-05, | |
"loss": 1.5201, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 9.995519081294009e-05, | |
"loss": 1.637, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 9.993901300776359e-05, | |
"loss": 1.6338, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 9.992034847858267e-05, | |
"loss": 1.5496, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 9.989919815480165e-05, | |
"loss": 1.5669, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 9.987556308960549e-05, | |
"loss": 1.5622, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 9.984944445990735e-05, | |
"loss": 1.5836, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 9.982084356629015e-05, | |
"loss": 1.5877, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 9.978976183294153e-05, | |
"loss": 1.5465, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 9.97562008075832e-05, | |
"loss": 1.4834, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 9.97201621613937e-05, | |
"loss": 1.4956, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 9.968164768892526e-05, | |
"loss": 1.3877, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 9.964065930801438e-05, | |
"loss": 1.3466, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 9.95971990596864e-05, | |
"loss": 1.4023, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 9.955126910805386e-05, | |
"loss": 1.351, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 9.950287174020863e-05, | |
"loss": 1.424, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 9.94520093661082e-05, | |
"loss": 1.5038, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 9.939868451845554e-05, | |
"loss": 1.3436, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.31, | |
"learning_rate": 9.934289985257299e-05, | |
"loss": 1.3446, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 9.928465814627014e-05, | |
"loss": 1.4063, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.34, | |
"learning_rate": 9.922396229970541e-05, | |
"loss": 1.3803, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 9.916081533524167e-05, | |
"loss": 1.2975, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 9.909522039729571e-05, | |
"loss": 1.3689, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.38, | |
"learning_rate": 9.902718075218176e-05, | |
"loss": 1.4446, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 9.895669978794869e-05, | |
"loss": 1.237, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 9.888378101421147e-05, | |
"loss": 1.333, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 9.880842806197625e-05, | |
"loss": 1.3933, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 9.873064468345969e-05, | |
"loss": 1.3735, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 9.865043475190201e-05, | |
"loss": 1.172, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 9.856780226137419e-05, | |
"loss": 1.2538, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.47, | |
"learning_rate": 9.848275132657903e-05, | |
"loss": 1.2588, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 9.839528618264633e-05, | |
"loss": 1.3531, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 9.830541118492193e-05, | |
"loss": 1.4203, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.51, | |
"learning_rate": 9.821313080875088e-05, | |
"loss": 1.4292, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 9.811844964925454e-05, | |
"loss": 1.3098, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.54, | |
"learning_rate": 9.802137242110185e-05, | |
"loss": 1.2093, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 9.792190395827447e-05, | |
"loss": 1.2182, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.57, | |
"learning_rate": 9.782004921382612e-05, | |
"loss": 1.3148, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.58, | |
"learning_rate": 9.771581325963594e-05, | |
"loss": 1.4185, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 9.760920128615591e-05, | |
"loss": 1.409, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 9.750021860215241e-05, | |
"loss": 1.2089, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 9.738887063444188e-05, | |
"loss": 1.3114, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.63, | |
"learning_rate": 9.727516292762058e-05, | |
"loss": 1.2528, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 9.715910114378845e-05, | |
"loss": 1.1718, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.66, | |
"learning_rate": 9.704069106226727e-05, | |
"loss": 1.2825, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 9.691993857931277e-05, | |
"loss": 1.1629, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.69, | |
"learning_rate": 9.679684970782106e-05, | |
"loss": 1.2463, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.7, | |
"learning_rate": 9.667143057702926e-05, | |
"loss": 1.3297, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.71, | |
"learning_rate": 9.654368743221022e-05, | |
"loss": 1.1995, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 9.641362663436161e-05, | |
"loss": 1.1135, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 9.628125465988913e-05, | |
"loss": 1.1311, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.75, | |
"learning_rate": 9.614657810028403e-05, | |
"loss": 1.2915, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.77, | |
"learning_rate": 9.60096036617948e-05, | |
"loss": 1.1898, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 9.587033816509341e-05, | |
"loss": 1.211, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 9.572878854493553e-05, | |
"loss": 1.1389, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 9.558496184981525e-05, | |
"loss": 1.0623, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.82, | |
"learning_rate": 9.543886524161409e-05, | |
"loss": 1.1475, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.84, | |
"learning_rate": 9.529050599524443e-05, | |
"loss": 1.0662, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 9.513989149828718e-05, | |
"loss": 1.2403, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.86, | |
"learning_rate": 9.498702925062393e-05, | |
"loss": 1.3078, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.88, | |
"learning_rate": 9.48319268640635e-05, | |
"loss": 1.2456, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 9.467459206196298e-05, | |
"loss": 1.1248, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.9, | |
"learning_rate": 9.451503267884299e-05, | |
"loss": 1.0899, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.92, | |
"learning_rate": 9.435325665999771e-05, | |
"loss": 1.3189, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.93, | |
"learning_rate": 9.418927206109913e-05, | |
"loss": 0.9791, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 9.402308704779599e-05, | |
"loss": 1.112, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 9.385470989530716e-05, | |
"loss": 1.0378, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 9.368414898800952e-05, | |
"loss": 1.2503, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.98, | |
"learning_rate": 9.35114128190205e-05, | |
"loss": 1.0507, | |
"step": 730 | |
}, | |
{ | |
"epoch": 1.0, | |
"learning_rate": 9.333650998977518e-05, | |
"loss": 1.0279, | |
"step": 740 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 4452, | |
"num_train_epochs": 6, | |
"save_steps": 500, | |
"total_flos": 3877680160604160.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |