{ "best_metric": 0.45609042048454285, "best_model_checkpoint": "Phi-3.5-mini-instruct_text_to_sql\\checkpoint-1500", "epoch": 2.235469448584203, "eval_steps": 50, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07451564828614009, "grad_norm": 0.05202275142073631, "learning_rate": 0.0002, "loss": 0.8765, "step": 50 }, { "epoch": 0.07451564828614009, "eval_loss": 0.5696993470191956, "eval_runtime": 27.4333, "eval_samples_per_second": 4.593, "eval_steps_per_second": 0.583, "step": 50 }, { "epoch": 0.14903129657228018, "grad_norm": 0.038158852607011795, "learning_rate": 0.0001996800092633612, "loss": 0.5364, "step": 100 }, { "epoch": 0.14903129657228018, "eval_loss": 0.5185158848762512, "eval_runtime": 26.2689, "eval_samples_per_second": 4.797, "eval_steps_per_second": 0.609, "step": 100 }, { "epoch": 0.22354694485842028, "grad_norm": 0.12953701615333557, "learning_rate": 0.00019872208493487546, "loss": 0.5099, "step": 150 }, { "epoch": 0.22354694485842028, "eval_loss": 0.5075405240058899, "eval_runtime": 26.9551, "eval_samples_per_second": 4.674, "eval_steps_per_second": 0.594, "step": 150 }, { "epoch": 0.29806259314456035, "grad_norm": 0.036496683955192566, "learning_rate": 0.0001971323575527731, "loss": 0.5012, "step": 200 }, { "epoch": 0.29806259314456035, "eval_loss": 0.49924516677856445, "eval_runtime": 26.2588, "eval_samples_per_second": 4.798, "eval_steps_per_second": 0.609, "step": 200 }, { "epoch": 0.37257824143070045, "grad_norm": 0.047366924583911896, "learning_rate": 0.0001949210010777752, "loss": 0.4915, "step": 250 }, { "epoch": 0.37257824143070045, "eval_loss": 0.49416425824165344, "eval_runtime": 26.2568, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.609, "step": 250 }, { "epoch": 0.44709388971684055, "grad_norm": 0.035322971642017365, "learning_rate": 0.00019210216778162994, "loss": 0.4908, "step": 300 }, { "epoch": 0.44709388971684055, "eval_loss": 0.48849040269851685, "eval_runtime": 26.2674, "eval_samples_per_second": 4.797, "eval_steps_per_second": 0.609, "step": 300 }, { "epoch": 0.5216095380029806, "grad_norm": 0.04145614430308342, "learning_rate": 0.0001886938976751951, "loss": 0.486, "step": 350 }, { "epoch": 0.5216095380029806, "eval_loss": 0.48535990715026855, "eval_runtime": 26.2577, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.609, "step": 350 }, { "epoch": 0.5961251862891207, "grad_norm": 0.04975809529423714, "learning_rate": 0.00018471800305571129, "loss": 0.4867, "step": 400 }, { "epoch": 0.5961251862891207, "eval_loss": 0.4834245443344116, "eval_runtime": 26.1969, "eval_samples_per_second": 4.81, "eval_steps_per_second": 0.611, "step": 400 }, { "epoch": 0.6706408345752608, "grad_norm": 0.05116498842835426, "learning_rate": 0.00018019992891214008, "loss": 0.4792, "step": 450 }, { "epoch": 0.6706408345752608, "eval_loss": 0.4825398325920105, "eval_runtime": 26.1757, "eval_samples_per_second": 4.814, "eval_steps_per_second": 0.611, "step": 450 }, { "epoch": 0.7451564828614009, "grad_norm": 0.039921361953020096, "learning_rate": 0.00017516859008194938, "loss": 0.4781, "step": 500 }, { "epoch": 0.7451564828614009, "eval_loss": 0.4772830307483673, "eval_runtime": 26.1948, "eval_samples_per_second": 4.81, "eval_steps_per_second": 0.611, "step": 500 }, { "epoch": 0.819672131147541, "grad_norm": 0.04263721778988838, "learning_rate": 0.00016965618620151017, "loss": 0.4779, "step": 550 }, { "epoch": 0.819672131147541, "eval_loss": 0.4743432402610779, "eval_runtime": 26.2176, "eval_samples_per_second": 4.806, "eval_steps_per_second": 0.61, "step": 550 }, { "epoch": 0.8941877794336811, "grad_norm": 0.04496975615620613, "learning_rate": 0.00016369799563438958, "loss": 0.4738, "step": 600 }, { "epoch": 0.8941877794336811, "eval_loss": 0.47231313586235046, "eval_runtime": 26.1874, "eval_samples_per_second": 4.811, "eval_steps_per_second": 0.611, "step": 600 }, { "epoch": 0.9687034277198212, "grad_norm": 0.052979908883571625, "learning_rate": 0.00015733214969635968, "loss": 0.4727, "step": 650 }, { "epoch": 0.9687034277198212, "eval_loss": 0.47076416015625, "eval_runtime": 26.1834, "eval_samples_per_second": 4.812, "eval_steps_per_second": 0.611, "step": 650 }, { "epoch": 1.0432190760059612, "grad_norm": 0.037279509007930756, "learning_rate": 0.00015059938862204127, "loss": 0.4623, "step": 700 }, { "epoch": 1.0432190760059612, "eval_loss": 0.47042036056518555, "eval_runtime": 26.1969, "eval_samples_per_second": 4.81, "eval_steps_per_second": 0.611, "step": 700 }, { "epoch": 1.1177347242921014, "grad_norm": 0.044078536331653595, "learning_rate": 0.00014354280083495006, "loss": 0.4578, "step": 750 }, { "epoch": 1.1177347242921014, "eval_loss": 0.4691551625728607, "eval_runtime": 26.1904, "eval_samples_per_second": 4.811, "eval_steps_per_second": 0.611, "step": 750 }, { "epoch": 1.1922503725782414, "grad_norm": 0.03993101418018341, "learning_rate": 0.000136207547189569, "loss": 0.459, "step": 800 }, { "epoch": 1.1922503725782414, "eval_loss": 0.4670138359069824, "eval_runtime": 26.8468, "eval_samples_per_second": 4.693, "eval_steps_per_second": 0.596, "step": 800 }, { "epoch": 1.2667660208643814, "grad_norm": 0.050710201263427734, "learning_rate": 0.00012864057195024643, "loss": 0.4572, "step": 850 }, { "epoch": 1.2667660208643814, "eval_loss": 0.4661361575126648, "eval_runtime": 26.2737, "eval_samples_per_second": 4.796, "eval_steps_per_second": 0.609, "step": 850 }, { "epoch": 1.3412816691505216, "grad_norm": 0.03542506694793701, "learning_rate": 0.00012089030235660155, "loss": 0.4543, "step": 900 }, { "epoch": 1.3412816691505216, "eval_loss": 0.4644615650177002, "eval_runtime": 26.2789, "eval_samples_per_second": 4.795, "eval_steps_per_second": 0.609, "step": 900 }, { "epoch": 1.4157973174366618, "grad_norm": 0.03939700126647949, "learning_rate": 0.00011300633869816275, "loss": 0.456, "step": 950 }, { "epoch": 1.4157973174366618, "eval_loss": 0.46352747082710266, "eval_runtime": 26.4386, "eval_samples_per_second": 4.766, "eval_steps_per_second": 0.605, "step": 950 }, { "epoch": 1.4903129657228018, "grad_norm": 0.03840464726090431, "learning_rate": 0.00010503913688170396, "loss": 0.457, "step": 1000 }, { "epoch": 1.4903129657228018, "eval_loss": 0.46207883954048157, "eval_runtime": 26.2982, "eval_samples_per_second": 4.791, "eval_steps_per_second": 0.608, "step": 1000 }, { "epoch": 1.5648286140089418, "grad_norm": 0.04269999638199806, "learning_rate": 9.703968552278915e-05, "loss": 0.4459, "step": 1050 }, { "epoch": 1.5648286140089418, "eval_loss": 0.4622659385204315, "eval_runtime": 26.1338, "eval_samples_per_second": 4.783, "eval_steps_per_second": 0.612, "step": 1050 }, { "epoch": 1.639344262295082, "grad_norm": 0.03784380853176117, "learning_rate": 8.905917962807927e-05, "loss": 0.4491, "step": 1100 }, { "epoch": 1.639344262295082, "eval_loss": 0.46070215106010437, "eval_runtime": 26.2581, "eval_samples_per_second": 4.76, "eval_steps_per_second": 0.609, "step": 1100 }, { "epoch": 1.713859910581222, "grad_norm": 0.06169111654162407, "learning_rate": 8.114869295677425e-05, "loss": 0.4469, "step": 1150 }, { "epoch": 1.713859910581222, "eval_loss": 0.45989012718200684, "eval_runtime": 26.0654, "eval_samples_per_second": 4.796, "eval_steps_per_second": 0.614, "step": 1150 }, { "epoch": 1.788375558867362, "grad_norm": 0.04420630633831024, "learning_rate": 7.335885115801656e-05, "loss": 0.4436, "step": 1200 }, { "epoch": 1.788375558867362, "eval_loss": 0.4588496685028076, "eval_runtime": 26.1244, "eval_samples_per_second": 4.785, "eval_steps_per_second": 0.612, "step": 1200 }, { "epoch": 1.8628912071535022, "grad_norm": 0.04250326752662659, "learning_rate": 6.573950777611587e-05, "loss": 0.4437, "step": 1250 }, { "epoch": 1.8628912071535022, "eval_loss": 0.4583075940608978, "eval_runtime": 26.1348, "eval_samples_per_second": 4.783, "eval_steps_per_second": 0.612, "step": 1250 }, { "epoch": 1.9374068554396424, "grad_norm": 0.04410432279109955, "learning_rate": 5.833942519710177e-05, "loss": 0.443, "step": 1300 }, { "epoch": 1.9374068554396424, "eval_loss": 0.45751953125, "eval_runtime": 26.366, "eval_samples_per_second": 4.741, "eval_steps_per_second": 0.607, "step": 1300 }, { "epoch": 2.0119225037257826, "grad_norm": 0.04371315613389015, "learning_rate": 5.1205962578487155e-05, "loss": 0.4411, "step": 1350 }, { "epoch": 2.0119225037257826, "eval_loss": 0.45691409707069397, "eval_runtime": 26.3933, "eval_samples_per_second": 4.736, "eval_steps_per_second": 0.606, "step": 1350 }, { "epoch": 2.0864381520119224, "grad_norm": 0.051224980503320694, "learning_rate": 4.4384772759434425e-05, "loss": 0.4348, "step": 1400 }, { "epoch": 2.0864381520119224, "eval_loss": 0.4565175175666809, "eval_runtime": 26.0441, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.614, "step": 1400 }, { "epoch": 2.1609538002980626, "grad_norm": 0.043918948620557785, "learning_rate": 3.7919510091042566e-05, "loss": 0.4347, "step": 1450 }, { "epoch": 2.1609538002980626, "eval_loss": 0.45600426197052, "eval_runtime": 26.8649, "eval_samples_per_second": 4.653, "eval_steps_per_second": 0.596, "step": 1450 }, { "epoch": 2.235469448584203, "grad_norm": 0.04387347772717476, "learning_rate": 3.185155105658798e-05, "loss": 0.4345, "step": 1500 }, { "epoch": 2.235469448584203, "eval_loss": 0.45609042048454285, "eval_runtime": 26.3484, "eval_samples_per_second": 4.744, "eval_steps_per_second": 0.607, "step": 1500 } ], "logging_steps": 50, "max_steps": 2013, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.710634159009956e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }