| { |
| "best_metric": 0.03272353485226631, |
| "best_model_checkpoint": "./codellama_sql_model_forestry/checkpoint-400", |
| "epoch": 2.5006257822277846, |
| "eval_steps": 100, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05006257822277847, |
| "grad_norm": 7.185255527496338, |
| "learning_rate": 0.0001979899497487437, |
| "loss": 32.456, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10012515644555695, |
| "grad_norm": 0.1251622438430786, |
| "learning_rate": 0.0001946398659966499, |
| "loss": 0.4541, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15018773466833543, |
| "grad_norm": 0.20880717039108276, |
| "learning_rate": 0.0001912897822445561, |
| "loss": 0.3029, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2002503128911139, |
| "grad_norm": 0.16143666207790375, |
| "learning_rate": 0.0001879396984924623, |
| "loss": 0.2429, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2503128911138924, |
| "grad_norm": 0.13474377989768982, |
| "learning_rate": 0.0001845896147403685, |
| "loss": 0.2332, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.30037546933667086, |
| "grad_norm": 0.17662972211837769, |
| "learning_rate": 0.0001812395309882747, |
| "loss": 0.2085, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3504380475594493, |
| "grad_norm": 0.17377126216888428, |
| "learning_rate": 0.0001778894472361809, |
| "loss": 0.1961, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4005006257822278, |
| "grad_norm": 0.1938253790140152, |
| "learning_rate": 0.0001745393634840871, |
| "loss": 0.2032, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.45056320400500627, |
| "grad_norm": 0.1416705697774887, |
| "learning_rate": 0.0001711892797319933, |
| "loss": 0.198, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5006257822277848, |
| "grad_norm": 0.16024866700172424, |
| "learning_rate": 0.0001678391959798995, |
| "loss": 0.185, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5006257822277848, |
| "eval_loss": 0.042159534990787506, |
| "eval_runtime": 125.0369, |
| "eval_samples_per_second": 0.344, |
| "eval_steps_per_second": 0.344, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5506883604505632, |
| "grad_norm": 0.1869831383228302, |
| "learning_rate": 0.0001644891122278057, |
| "loss": 0.1921, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6007509386733417, |
| "grad_norm": 0.15546876192092896, |
| "learning_rate": 0.0001611390284757119, |
| "loss": 0.1741, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6508135168961201, |
| "grad_norm": 0.1712736338376999, |
| "learning_rate": 0.0001577889447236181, |
| "loss": 0.174, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7008760951188986, |
| "grad_norm": 0.1676545888185501, |
| "learning_rate": 0.0001544388609715243, |
| "loss": 0.1571, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7509386733416771, |
| "grad_norm": 0.17751628160476685, |
| "learning_rate": 0.00015108877721943048, |
| "loss": 0.1778, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8010012515644556, |
| "grad_norm": 0.1387346237897873, |
| "learning_rate": 0.00014773869346733668, |
| "loss": 0.1532, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.17603643238544464, |
| "learning_rate": 0.00014438860971524288, |
| "loss": 0.1577, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9011264080100125, |
| "grad_norm": 0.1522763967514038, |
| "learning_rate": 0.00014103852596314908, |
| "loss": 0.1501, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.951188986232791, |
| "grad_norm": 0.14780306816101074, |
| "learning_rate": 0.00013768844221105528, |
| "loss": 0.1499, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.11175049841403961, |
| "learning_rate": 0.00013433835845896147, |
| "loss": 0.1509, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.037981580942869186, |
| "eval_runtime": 125.2439, |
| "eval_samples_per_second": 0.343, |
| "eval_steps_per_second": 0.343, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0500625782227784, |
| "grad_norm": 0.15822191536426544, |
| "learning_rate": 0.00013098827470686767, |
| "loss": 0.1307, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.100125156445557, |
| "grad_norm": 0.19546131789684296, |
| "learning_rate": 0.00012763819095477387, |
| "loss": 0.1189, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1501877346683353, |
| "grad_norm": 0.17685039341449738, |
| "learning_rate": 0.00012428810720268007, |
| "loss": 0.1296, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.200250312891114, |
| "grad_norm": 0.17825502157211304, |
| "learning_rate": 0.00012093802345058627, |
| "loss": 0.1291, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2503128911138923, |
| "grad_norm": 0.19515497982501984, |
| "learning_rate": 0.00011758793969849247, |
| "loss": 0.1283, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.300375469336671, |
| "grad_norm": 0.22185975313186646, |
| "learning_rate": 0.00011423785594639866, |
| "loss": 0.1358, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3504380475594493, |
| "grad_norm": 0.18015995621681213, |
| "learning_rate": 0.00011088777219430486, |
| "loss": 0.1284, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.400500625782228, |
| "grad_norm": 0.2559189796447754, |
| "learning_rate": 0.00010753768844221106, |
| "loss": 0.1188, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4505632040050063, |
| "grad_norm": 0.17111122608184814, |
| "learning_rate": 0.00010418760469011726, |
| "loss": 0.1243, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5006257822277846, |
| "grad_norm": 0.19879887998104095, |
| "learning_rate": 0.00010083752093802346, |
| "loss": 0.1364, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5006257822277846, |
| "eval_loss": 0.03517143055796623, |
| "eval_runtime": 124.8471, |
| "eval_samples_per_second": 0.344, |
| "eval_steps_per_second": 0.344, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5506883604505632, |
| "grad_norm": 0.21239443123340607, |
| "learning_rate": 9.748743718592965e-05, |
| "loss": 0.1198, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6007509386733418, |
| "grad_norm": 0.21676813066005707, |
| "learning_rate": 9.413735343383585e-05, |
| "loss": 0.1309, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.65081351689612, |
| "grad_norm": 0.2111431062221527, |
| "learning_rate": 9.078726968174205e-05, |
| "loss": 0.123, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7008760951188986, |
| "grad_norm": 0.17868830263614655, |
| "learning_rate": 8.743718592964825e-05, |
| "loss": 0.1133, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7509386733416772, |
| "grad_norm": 0.20656318962574005, |
| "learning_rate": 8.408710217755445e-05, |
| "loss": 0.119, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8010012515644556, |
| "grad_norm": 0.25555238127708435, |
| "learning_rate": 8.073701842546064e-05, |
| "loss": 0.1142, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.851063829787234, |
| "grad_norm": 0.17207714915275574, |
| "learning_rate": 7.738693467336684e-05, |
| "loss": 0.1085, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9011264080100125, |
| "grad_norm": 0.16670793294906616, |
| "learning_rate": 7.403685092127304e-05, |
| "loss": 0.11, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.9511889862327911, |
| "grad_norm": 0.20777879655361176, |
| "learning_rate": 7.068676716917924e-05, |
| "loss": 0.1094, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.20488321781158447, |
| "learning_rate": 6.733668341708544e-05, |
| "loss": 0.1182, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.03272353485226631, |
| "eval_runtime": 125.3359, |
| "eval_samples_per_second": 0.343, |
| "eval_steps_per_second": 0.343, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0500625782227786, |
| "grad_norm": 0.19323211908340454, |
| "learning_rate": 6.398659966499163e-05, |
| "loss": 0.0897, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.1001251564455568, |
| "grad_norm": 0.21595624089241028, |
| "learning_rate": 6.063651591289783e-05, |
| "loss": 0.0761, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.1501877346683353, |
| "grad_norm": 0.28912344574928284, |
| "learning_rate": 5.728643216080403e-05, |
| "loss": 0.0819, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.200250312891114, |
| "grad_norm": 0.21744051575660706, |
| "learning_rate": 5.393634840871022e-05, |
| "loss": 0.0793, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.2503128911138925, |
| "grad_norm": 0.2404051125049591, |
| "learning_rate": 5.058626465661642e-05, |
| "loss": 0.0793, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.3003754693366707, |
| "grad_norm": 0.2571396827697754, |
| "learning_rate": 4.723618090452262e-05, |
| "loss": 0.0841, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.3504380475594493, |
| "grad_norm": 0.19428133964538574, |
| "learning_rate": 4.3886097152428815e-05, |
| "loss": 0.0795, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.400500625782228, |
| "grad_norm": 0.2305491715669632, |
| "learning_rate": 4.053601340033501e-05, |
| "loss": 0.0835, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.4505632040050065, |
| "grad_norm": 0.25262993574142456, |
| "learning_rate": 3.7185929648241204e-05, |
| "loss": 0.0803, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.5006257822277846, |
| "grad_norm": 0.22605903446674347, |
| "learning_rate": 3.38358458961474e-05, |
| "loss": 0.0852, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.5006257822277846, |
| "eval_loss": 0.03292727842926979, |
| "eval_runtime": 125.3579, |
| "eval_samples_per_second": 0.343, |
| "eval_steps_per_second": 0.343, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 597, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.632027949274235e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|