{ "best_metric": 0.6586877107620239, "best_model_checkpoint": "saves/Gemma-7B/lora/train_1/checkpoint-100", "epoch": 0.21875854525567404, "eval_steps": 10, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010937927262783703, "grad_norm": 16.269868850708008, "learning_rate": 0.00029999015487222375, "loss": 3.1586, "step": 10 }, { "epoch": 0.010937927262783703, "eval_loss": 0.7906885147094727, "eval_runtime": 170.0021, "eval_samples_per_second": 27.717, "eval_steps_per_second": 0.871, "step": 10 }, { "epoch": 0.021875854525567406, "grad_norm": 1.8665393590927124, "learning_rate": 0.00029996062078124905, "loss": 0.6717, "step": 20 }, { "epoch": 0.021875854525567406, "eval_loss": 0.7608615159988403, "eval_runtime": 168.7111, "eval_samples_per_second": 27.929, "eval_steps_per_second": 0.877, "step": 20 }, { "epoch": 0.03281378178835111, "grad_norm": 2.046764373779297, "learning_rate": 0.0002999114016039678, "loss": 0.5741, "step": 30 }, { "epoch": 0.03281378178835111, "eval_loss": 0.7403788566589355, "eval_runtime": 169.3669, "eval_samples_per_second": 27.821, "eval_steps_per_second": 0.874, "step": 30 }, { "epoch": 0.04375170905113481, "grad_norm": 0.8763797879219055, "learning_rate": 0.00029984250380130117, "loss": 0.5809, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.7739446759223938, "eval_runtime": 169.4581, "eval_samples_per_second": 27.806, "eval_steps_per_second": 0.873, "step": 40 }, { "epoch": 0.05468963631391851, "grad_norm": 1.452701449394226, "learning_rate": 0.0002997539364173515, "loss": 0.5313, "step": 50 }, { "epoch": 0.05468963631391851, "eval_loss": 0.7001951932907104, "eval_runtime": 169.6448, "eval_samples_per_second": 27.776, "eval_steps_per_second": 0.872, "step": 50 }, { "epoch": 0.06562756357670221, "grad_norm": 2.0603904724121094, "learning_rate": 0.00029964571107821494, "loss": 0.4879, "step": 60 }, { "epoch": 0.06562756357670221, "eval_loss": 0.7159304022789001, "eval_runtime": 169.4832, "eval_samples_per_second": 27.802, "eval_steps_per_second": 0.873, "step": 60 }, { "epoch": 0.07656549083948591, "grad_norm": 1.048985242843628, "learning_rate": 0.00029951784199045534, "loss": 0.4665, "step": 70 }, { "epoch": 0.07656549083948591, "eval_loss": 0.7063425779342651, "eval_runtime": 169.1768, "eval_samples_per_second": 27.853, "eval_steps_per_second": 0.875, "step": 70 }, { "epoch": 0.08750341810226962, "grad_norm": 0.8847436904907227, "learning_rate": 0.0002993703459392396, "loss": 0.4509, "step": 80 }, { "epoch": 0.08750341810226962, "eval_loss": 0.6991614103317261, "eval_runtime": 169.2261, "eval_samples_per_second": 27.844, "eval_steps_per_second": 0.875, "step": 80 }, { "epoch": 0.09844134536505332, "grad_norm": 1.09526789188385, "learning_rate": 0.00029920324228613376, "loss": 0.4542, "step": 90 }, { "epoch": 0.09844134536505332, "eval_loss": 0.6915357708930969, "eval_runtime": 169.6804, "eval_samples_per_second": 27.77, "eval_steps_per_second": 0.872, "step": 90 }, { "epoch": 0.10937927262783702, "grad_norm": 0.9263470768928528, "learning_rate": 0.0002990165529665622, "loss": 0.4188, "step": 100 }, { "epoch": 0.10937927262783702, "eval_loss": 0.6586877107620239, "eval_runtime": 169.6821, "eval_samples_per_second": 27.77, "eval_steps_per_second": 0.872, "step": 100 }, { "epoch": 0.12031719989062073, "grad_norm": 1.2452396154403687, "learning_rate": 0.0002988103024869277, "loss": 0.4131, "step": 110 }, { "epoch": 0.12031719989062073, "eval_loss": 0.6637363433837891, "eval_runtime": 169.6991, "eval_samples_per_second": 27.767, "eval_steps_per_second": 0.872, "step": 110 }, { "epoch": 0.13125512715340443, "grad_norm": 0.8261873722076416, "learning_rate": 0.00029858451792139453, "loss": 0.4137, "step": 120 }, { "epoch": 0.13125512715340443, "eval_loss": 0.6902170181274414, "eval_runtime": 169.6732, "eval_samples_per_second": 27.771, "eval_steps_per_second": 0.872, "step": 120 }, { "epoch": 0.14219305441618812, "grad_norm": 0.6665583848953247, "learning_rate": 0.0002983392289083346, "loss": 0.4087, "step": 130 }, { "epoch": 0.14219305441618812, "eval_loss": 0.6948944330215454, "eval_runtime": 169.5204, "eval_samples_per_second": 27.796, "eval_steps_per_second": 0.873, "step": 130 }, { "epoch": 0.15313098167897182, "grad_norm": 0.841665506362915, "learning_rate": 0.0002980744676464371, "loss": 0.3968, "step": 140 }, { "epoch": 0.15313098167897182, "eval_loss": 0.6712561845779419, "eval_runtime": 169.6453, "eval_samples_per_second": 27.776, "eval_steps_per_second": 0.872, "step": 140 }, { "epoch": 0.16406890894175555, "grad_norm": 1.1116673946380615, "learning_rate": 0.0002977902688904813, "loss": 0.4048, "step": 150 }, { "epoch": 0.16406890894175555, "eval_loss": 0.6878468990325928, "eval_runtime": 169.3026, "eval_samples_per_second": 27.832, "eval_steps_per_second": 0.874, "step": 150 }, { "epoch": 0.17500683620453925, "grad_norm": 0.7878606915473938, "learning_rate": 0.00029748666994677467, "loss": 0.3953, "step": 160 }, { "epoch": 0.17500683620453925, "eval_loss": 0.6906653642654419, "eval_runtime": 168.3921, "eval_samples_per_second": 27.982, "eval_steps_per_second": 0.879, "step": 160 }, { "epoch": 0.18594476346732294, "grad_norm": 0.9028112888336182, "learning_rate": 0.00029716371066825593, "loss": 0.3873, "step": 170 }, { "epoch": 0.18594476346732294, "eval_loss": 0.6937726140022278, "eval_runtime": 169.1341, "eval_samples_per_second": 27.86, "eval_steps_per_second": 0.875, "step": 170 }, { "epoch": 0.19688269073010664, "grad_norm": 0.797646701335907, "learning_rate": 0.0002968214334492632, "loss": 0.3821, "step": 180 }, { "epoch": 0.19688269073010664, "eval_loss": 0.684823751449585, "eval_runtime": 169.2007, "eval_samples_per_second": 27.849, "eval_steps_per_second": 0.875, "step": 180 }, { "epoch": 0.20782061799289034, "grad_norm": 0.7943875193595886, "learning_rate": 0.00029645988321996917, "loss": 0.394, "step": 190 }, { "epoch": 0.20782061799289034, "eval_loss": 0.7038875222206116, "eval_runtime": 169.1579, "eval_samples_per_second": 27.856, "eval_steps_per_second": 0.875, "step": 190 }, { "epoch": 0.21875854525567404, "grad_norm": 0.797392725944519, "learning_rate": 0.00029607910744048336, "loss": 0.3893, "step": 200 }, { "epoch": 0.21875854525567404, "eval_loss": 0.6831381916999817, "eval_runtime": 169.2814, "eval_samples_per_second": 27.835, "eval_steps_per_second": 0.874, "step": 200 }, { "epoch": 0.21875854525567404, "step": 200, "total_flos": 3.6702510649442304e+17, "train_loss": 0.5889949607849121, "train_runtime": 10495.9143, "train_samples_per_second": 66.895, "train_steps_per_second": 0.261 } ], "logging_steps": 10, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.6702510649442304e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }