Spaces:
Build error
Build error
machine-translation
/
llama-factory
/saves
/Llama3.1-70B-Chinese-Chat
/checkpoint-420
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 5.989304812834225, | |
"eval_steps": 70, | |
"global_step": 420, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.07130124777183601, | |
"grad_norm": 1.4087971448898315, | |
"learning_rate": 1.1904761904761905e-05, | |
"loss": 2.5781, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.14260249554367202, | |
"grad_norm": 1.1262208223342896, | |
"learning_rate": 2.380952380952381e-05, | |
"loss": 2.5765, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.21390374331550802, | |
"grad_norm": 1.2945618629455566, | |
"learning_rate": 3.571428571428572e-05, | |
"loss": 2.342, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.28520499108734404, | |
"grad_norm": 0.7618772387504578, | |
"learning_rate": 4.761904761904762e-05, | |
"loss": 1.9415, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.35650623885918004, | |
"grad_norm": 0.7050806879997253, | |
"learning_rate": 5.9523809523809524e-05, | |
"loss": 1.6927, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.42780748663101603, | |
"grad_norm": 0.9037391543388367, | |
"learning_rate": 7.142857142857143e-05, | |
"loss": 1.6323, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.49910873440285203, | |
"grad_norm": 0.8459873795509338, | |
"learning_rate": 8.333333333333334e-05, | |
"loss": 1.556, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.5704099821746881, | |
"grad_norm": 0.7082933783531189, | |
"learning_rate": 9.523809523809524e-05, | |
"loss": 1.4586, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.6417112299465241, | |
"grad_norm": 0.625400722026825, | |
"learning_rate": 9.998445910004082e-05, | |
"loss": 1.4528, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.7130124777183601, | |
"grad_norm": 2.810605764389038, | |
"learning_rate": 9.988952191691925e-05, | |
"loss": 1.4471, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.7843137254901961, | |
"grad_norm": 2.9866161346435547, | |
"learning_rate": 9.97084451044556e-05, | |
"loss": 1.4675, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.8556149732620321, | |
"grad_norm": 3.7440223693847656, | |
"learning_rate": 9.944154131125642e-05, | |
"loss": 1.4057, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.9269162210338681, | |
"grad_norm": 0.6605167984962463, | |
"learning_rate": 9.90892713754483e-05, | |
"loss": 1.4725, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.9982174688057041, | |
"grad_norm": 0.5990611910820007, | |
"learning_rate": 9.865224352899119e-05, | |
"loss": 1.4367, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.9982174688057041, | |
"eval_loss": 1.3730539083480835, | |
"eval_runtime": 7.9538, | |
"eval_samples_per_second": 5.783, | |
"eval_steps_per_second": 1.509, | |
"step": 70 | |
}, | |
{ | |
"epoch": 1.0695187165775402, | |
"grad_norm": 0.7159441113471985, | |
"learning_rate": 9.81312123475006e-05, | |
"loss": 1.3834, | |
"step": 75 | |
}, | |
{ | |
"epoch": 1.1408199643493762, | |
"grad_norm": 0.5568172335624695, | |
"learning_rate": 9.752707744739145e-05, | |
"loss": 1.3219, | |
"step": 80 | |
}, | |
{ | |
"epoch": 1.2121212121212122, | |
"grad_norm": 0.6985956430435181, | |
"learning_rate": 9.684088193259355e-05, | |
"loss": 1.2813, | |
"step": 85 | |
}, | |
{ | |
"epoch": 1.2834224598930482, | |
"grad_norm": 0.6956667304039001, | |
"learning_rate": 9.607381059352038e-05, | |
"loss": 1.3074, | |
"step": 90 | |
}, | |
{ | |
"epoch": 1.3547237076648841, | |
"grad_norm": 0.6513645648956299, | |
"learning_rate": 9.522718786140097e-05, | |
"loss": 1.2976, | |
"step": 95 | |
}, | |
{ | |
"epoch": 1.4260249554367201, | |
"grad_norm": 0.8437972068786621, | |
"learning_rate": 9.430247552150673e-05, | |
"loss": 1.2588, | |
"step": 100 | |
}, | |
{ | |
"epoch": 1.4973262032085561, | |
"grad_norm": 0.7591404318809509, | |
"learning_rate": 9.330127018922194e-05, | |
"loss": 1.2601, | |
"step": 105 | |
}, | |
{ | |
"epoch": 1.5686274509803921, | |
"grad_norm": 0.830049991607666, | |
"learning_rate": 9.22253005533154e-05, | |
"loss": 1.2587, | |
"step": 110 | |
}, | |
{ | |
"epoch": 1.6399286987522281, | |
"grad_norm": 0.9140297174453735, | |
"learning_rate": 9.107642439117321e-05, | |
"loss": 1.2856, | |
"step": 115 | |
}, | |
{ | |
"epoch": 1.7112299465240641, | |
"grad_norm": 0.9149733185768127, | |
"learning_rate": 8.985662536114613e-05, | |
"loss": 1.2615, | |
"step": 120 | |
}, | |
{ | |
"epoch": 1.7825311942959001, | |
"grad_norm": 0.7160300612449646, | |
"learning_rate": 8.856800957755e-05, | |
"loss": 1.2669, | |
"step": 125 | |
}, | |
{ | |
"epoch": 1.8538324420677363, | |
"grad_norm": 0.9131708145141602, | |
"learning_rate": 8.721280197423258e-05, | |
"loss": 1.2372, | |
"step": 130 | |
}, | |
{ | |
"epoch": 1.9251336898395723, | |
"grad_norm": 0.8047693967819214, | |
"learning_rate": 8.579334246298593e-05, | |
"loss": 1.2928, | |
"step": 135 | |
}, | |
{ | |
"epoch": 1.9964349376114083, | |
"grad_norm": 0.7446454167366028, | |
"learning_rate": 8.43120818934367e-05, | |
"loss": 1.2601, | |
"step": 140 | |
}, | |
{ | |
"epoch": 1.9964349376114083, | |
"eval_loss": 1.3131123781204224, | |
"eval_runtime": 7.9209, | |
"eval_samples_per_second": 5.807, | |
"eval_steps_per_second": 1.515, | |
"step": 140 | |
}, | |
{ | |
"epoch": 2.0677361853832443, | |
"grad_norm": 0.8385710716247559, | |
"learning_rate": 8.27715778213905e-05, | |
"loss": 0.9909, | |
"step": 145 | |
}, | |
{ | |
"epoch": 2.1390374331550803, | |
"grad_norm": 1.6324430704116821, | |
"learning_rate": 8.117449009293668e-05, | |
"loss": 0.9616, | |
"step": 150 | |
}, | |
{ | |
"epoch": 2.2103386809269163, | |
"grad_norm": 1.1424412727355957, | |
"learning_rate": 7.952357625193749e-05, | |
"loss": 0.8888, | |
"step": 155 | |
}, | |
{ | |
"epoch": 2.2816399286987523, | |
"grad_norm": 1.1564297676086426, | |
"learning_rate": 7.782168677883206e-05, | |
"loss": 0.9144, | |
"step": 160 | |
}, | |
{ | |
"epoch": 2.3529411764705883, | |
"grad_norm": 1.4812451601028442, | |
"learning_rate": 7.60717601689749e-05, | |
"loss": 0.9302, | |
"step": 165 | |
}, | |
{ | |
"epoch": 2.4242424242424243, | |
"grad_norm": 1.4938651323318481, | |
"learning_rate": 7.427681785900761e-05, | |
"loss": 0.9106, | |
"step": 170 | |
}, | |
{ | |
"epoch": 2.4955436720142603, | |
"grad_norm": 1.4754260778427124, | |
"learning_rate": 7.243995901002312e-05, | |
"loss": 0.9016, | |
"step": 175 | |
}, | |
{ | |
"epoch": 2.5668449197860963, | |
"grad_norm": 1.40531325340271, | |
"learning_rate": 7.056435515653059e-05, | |
"loss": 0.8809, | |
"step": 180 | |
}, | |
{ | |
"epoch": 2.6381461675579323, | |
"grad_norm": 1.495160698890686, | |
"learning_rate": 6.86532447304597e-05, | |
"loss": 0.8862, | |
"step": 185 | |
}, | |
{ | |
"epoch": 2.7094474153297683, | |
"grad_norm": 1.7604504823684692, | |
"learning_rate": 6.670992746965938e-05, | |
"loss": 0.8497, | |
"step": 190 | |
}, | |
{ | |
"epoch": 2.7807486631016043, | |
"grad_norm": 1.8376922607421875, | |
"learning_rate": 6.473775872054521e-05, | |
"loss": 0.8764, | |
"step": 195 | |
}, | |
{ | |
"epoch": 2.8520499108734403, | |
"grad_norm": 1.4825749397277832, | |
"learning_rate": 6.274014364473274e-05, | |
"loss": 0.862, | |
"step": 200 | |
}, | |
{ | |
"epoch": 2.9233511586452763, | |
"grad_norm": 1.5822840929031372, | |
"learning_rate": 6.072053133965938e-05, | |
"loss": 0.8981, | |
"step": 205 | |
}, | |
{ | |
"epoch": 2.9946524064171123, | |
"grad_norm": 1.6038023233413696, | |
"learning_rate": 5.868240888334653e-05, | |
"loss": 0.8929, | |
"step": 210 | |
}, | |
{ | |
"epoch": 2.9946524064171123, | |
"eval_loss": 1.4369069337844849, | |
"eval_runtime": 7.8778, | |
"eval_samples_per_second": 5.839, | |
"eval_steps_per_second": 1.523, | |
"step": 210 | |
}, | |
{ | |
"epoch": 3.0659536541889483, | |
"grad_norm": 1.7548197507858276, | |
"learning_rate": 5.6629295313583974e-05, | |
"loss": 0.5184, | |
"step": 215 | |
}, | |
{ | |
"epoch": 3.1372549019607843, | |
"grad_norm": 3.564553737640381, | |
"learning_rate": 5.456473555193242e-05, | |
"loss": 0.4442, | |
"step": 220 | |
}, | |
{ | |
"epoch": 3.2085561497326203, | |
"grad_norm": 1.992806077003479, | |
"learning_rate": 5.249229428303486e-05, | |
"loss": 0.4308, | |
"step": 225 | |
}, | |
{ | |
"epoch": 3.2798573975044563, | |
"grad_norm": 1.7995655536651611, | |
"learning_rate": 5.041554979980486e-05, | |
"loss": 0.4337, | |
"step": 230 | |
}, | |
{ | |
"epoch": 3.3511586452762923, | |
"grad_norm": 2.152865409851074, | |
"learning_rate": 4.8338087825118675e-05, | |
"loss": 0.46, | |
"step": 235 | |
}, | |
{ | |
"epoch": 3.4224598930481283, | |
"grad_norm": 2.166696786880493, | |
"learning_rate": 4.626349532067879e-05, | |
"loss": 0.4018, | |
"step": 240 | |
}, | |
{ | |
"epoch": 3.4937611408199643, | |
"grad_norm": 2.4105169773101807, | |
"learning_rate": 4.4195354293738484e-05, | |
"loss": 0.4261, | |
"step": 245 | |
}, | |
{ | |
"epoch": 3.5650623885918002, | |
"grad_norm": 2.031759262084961, | |
"learning_rate": 4.213723561238074e-05, | |
"loss": 0.4336, | |
"step": 250 | |
}, | |
{ | |
"epoch": 3.6363636363636362, | |
"grad_norm": 2.1313636302948, | |
"learning_rate": 4.0092692840030134e-05, | |
"loss": 0.4168, | |
"step": 255 | |
}, | |
{ | |
"epoch": 3.7076648841354722, | |
"grad_norm": 2.2295522689819336, | |
"learning_rate": 3.806525609984312e-05, | |
"loss": 0.3951, | |
"step": 260 | |
}, | |
{ | |
"epoch": 3.7789661319073082, | |
"grad_norm": 2.494436264038086, | |
"learning_rate": 3.6058425979570485e-05, | |
"loss": 0.4217, | |
"step": 265 | |
}, | |
{ | |
"epoch": 3.8502673796791442, | |
"grad_norm": 2.1299421787261963, | |
"learning_rate": 3.4075667487415785e-05, | |
"loss": 0.3893, | |
"step": 270 | |
}, | |
{ | |
"epoch": 3.9215686274509802, | |
"grad_norm": 2.3325648307800293, | |
"learning_rate": 3.212040406932569e-05, | |
"loss": 0.4139, | |
"step": 275 | |
}, | |
{ | |
"epoch": 3.9928698752228167, | |
"grad_norm": 2.421335458755493, | |
"learning_rate": 3.019601169804216e-05, | |
"loss": 0.383, | |
"step": 280 | |
}, | |
{ | |
"epoch": 3.9928698752228167, | |
"eval_loss": 1.7250484228134155, | |
"eval_runtime": 7.9394, | |
"eval_samples_per_second": 5.794, | |
"eval_steps_per_second": 1.511, | |
"step": 280 | |
}, | |
{ | |
"epoch": 4.064171122994653, | |
"grad_norm": 1.449973225593567, | |
"learning_rate": 2.8305813044122097e-05, | |
"loss": 0.1879, | |
"step": 285 | |
}, | |
{ | |
"epoch": 4.135472370766489, | |
"grad_norm": 2.451524257659912, | |
"learning_rate": 2.645307173898901e-05, | |
"loss": 0.1716, | |
"step": 290 | |
}, | |
{ | |
"epoch": 4.206773618538325, | |
"grad_norm": 2.600327730178833, | |
"learning_rate": 2.464098673992205e-05, | |
"loss": 0.1615, | |
"step": 295 | |
}, | |
{ | |
"epoch": 4.278074866310161, | |
"grad_norm": 1.9811835289001465, | |
"learning_rate": 2.2872686806712035e-05, | |
"loss": 0.1722, | |
"step": 300 | |
}, | |
{ | |
"epoch": 4.349376114081997, | |
"grad_norm": 2.220919132232666, | |
"learning_rate": 2.115122509952085e-05, | |
"loss": 0.1524, | |
"step": 305 | |
}, | |
{ | |
"epoch": 4.420677361853833, | |
"grad_norm": 1.6138368844985962, | |
"learning_rate": 1.947957390727185e-05, | |
"loss": 0.1375, | |
"step": 310 | |
}, | |
{ | |
"epoch": 4.491978609625669, | |
"grad_norm": 1.8483514785766602, | |
"learning_rate": 1.7860619515673033e-05, | |
"loss": 0.1338, | |
"step": 315 | |
}, | |
{ | |
"epoch": 4.563279857397505, | |
"grad_norm": 2.0787084102630615, | |
"learning_rate": 1.629715722373423e-05, | |
"loss": 0.1552, | |
"step": 320 | |
}, | |
{ | |
"epoch": 4.634581105169341, | |
"grad_norm": 2.408238649368286, | |
"learning_rate": 1.4791886517382413e-05, | |
"loss": 0.1533, | |
"step": 325 | |
}, | |
{ | |
"epoch": 4.705882352941177, | |
"grad_norm": 2.4509053230285645, | |
"learning_rate": 1.3347406408508695e-05, | |
"loss": 0.1482, | |
"step": 330 | |
}, | |
{ | |
"epoch": 4.777183600713013, | |
"grad_norm": 2.2968966960906982, | |
"learning_rate": 1.1966210947494583e-05, | |
"loss": 0.136, | |
"step": 335 | |
}, | |
{ | |
"epoch": 4.848484848484849, | |
"grad_norm": 2.0114998817443848, | |
"learning_rate": 1.0650684916965559e-05, | |
"loss": 0.1371, | |
"step": 340 | |
}, | |
{ | |
"epoch": 4.919786096256685, | |
"grad_norm": 2.2050883769989014, | |
"learning_rate": 9.403099714207175e-06, | |
"loss": 0.1613, | |
"step": 345 | |
}, | |
{ | |
"epoch": 4.991087344028521, | |
"grad_norm": 1.6016651391983032, | |
"learning_rate": 8.225609429353187e-06, | |
"loss": 0.1431, | |
"step": 350 | |
}, | |
{ | |
"epoch": 4.991087344028521, | |
"eval_loss": 2.0896739959716797, | |
"eval_runtime": 7.9077, | |
"eval_samples_per_second": 5.817, | |
"eval_steps_per_second": 1.518, | |
"step": 350 | |
}, | |
{ | |
"epoch": 5.062388591800357, | |
"grad_norm": 0.9385374188423157, | |
"learning_rate": 7.1202471261170245e-06, | |
"loss": 0.0821, | |
"step": 355 | |
}, | |
{ | |
"epoch": 5.133689839572193, | |
"grad_norm": 1.2368322610855103, | |
"learning_rate": 6.088921331488568e-06, | |
"loss": 0.0605, | |
"step": 360 | |
}, | |
{ | |
"epoch": 5.204991087344029, | |
"grad_norm": 1.4328949451446533, | |
"learning_rate": 5.133412740456806e-06, | |
"loss": 0.0635, | |
"step": 365 | |
}, | |
{ | |
"epoch": 5.276292335115865, | |
"grad_norm": 1.5870749950408936, | |
"learning_rate": 4.255371141448272e-06, | |
"loss": 0.0708, | |
"step": 370 | |
}, | |
{ | |
"epoch": 5.347593582887701, | |
"grad_norm": 1.4723175764083862, | |
"learning_rate": 3.4563125677897932e-06, | |
"loss": 0.06, | |
"step": 375 | |
}, | |
{ | |
"epoch": 5.418894830659537, | |
"grad_norm": 1.732033610343933, | |
"learning_rate": 2.737616680113758e-06, | |
"loss": 0.0521, | |
"step": 380 | |
}, | |
{ | |
"epoch": 5.490196078431373, | |
"grad_norm": 1.3570785522460938, | |
"learning_rate": 2.100524384225555e-06, | |
"loss": 0.0774, | |
"step": 385 | |
}, | |
{ | |
"epoch": 5.561497326203209, | |
"grad_norm": 1.214942455291748, | |
"learning_rate": 1.5461356885461075e-06, | |
"loss": 0.0699, | |
"step": 390 | |
}, | |
{ | |
"epoch": 5.632798573975045, | |
"grad_norm": 1.4315098524093628, | |
"learning_rate": 1.0754078048289374e-06, | |
"loss": 0.0616, | |
"step": 395 | |
}, | |
{ | |
"epoch": 5.704099821746881, | |
"grad_norm": 1.8974279165267944, | |
"learning_rate": 6.891534954310885e-07, | |
"loss": 0.054, | |
"step": 400 | |
}, | |
{ | |
"epoch": 5.775401069518717, | |
"grad_norm": 1.6353405714035034, | |
"learning_rate": 3.8803966999139684e-07, | |
"loss": 0.0711, | |
"step": 405 | |
}, | |
{ | |
"epoch": 5.846702317290553, | |
"grad_norm": 1.0998703241348267, | |
"learning_rate": 1.725862339392259e-07, | |
"loss": 0.0504, | |
"step": 410 | |
}, | |
{ | |
"epoch": 5.918003565062389, | |
"grad_norm": 1.5217969417572021, | |
"learning_rate": 4.316519082179227e-08, | |
"loss": 0.0629, | |
"step": 415 | |
}, | |
{ | |
"epoch": 5.989304812834225, | |
"grad_norm": 1.7096792459487915, | |
"learning_rate": 0.0, | |
"loss": 0.0691, | |
"step": 420 | |
}, | |
{ | |
"epoch": 5.989304812834225, | |
"eval_loss": 2.507080078125, | |
"eval_runtime": 7.9749, | |
"eval_samples_per_second": 5.768, | |
"eval_steps_per_second": 1.505, | |
"step": 420 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 420, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 6, | |
"save_steps": 70, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.8114394260190003e+18, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |