{ "best_metric": 2.358070135116577, "best_model_checkpoint": "./Qwen2-4B-Chat-hinglish-sft/checkpoint-275", "epoch": 1.0923535253227408, "eval_steps": 25, "global_step": 275, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 1.1830133199691772, "learning_rate": 0.00019999629591162656, "loss": 4.7862, "step": 25 }, { "epoch": 0.1, "eval_loss": 4.098670482635498, "eval_runtime": 4.1063, "eval_samples_per_second": 2.679, "eval_steps_per_second": 2.679, "step": 25 }, { "epoch": 0.2, "grad_norm": 1.0619295835494995, "learning_rate": 0.00019932568492674144, "loss": 2.9161, "step": 50 }, { "epoch": 0.2, "eval_loss": 3.1406357288360596, "eval_runtime": 4.0777, "eval_samples_per_second": 2.698, "eval_steps_per_second": 2.698, "step": 50 }, { "epoch": 0.3, "grad_norm": 1.1028335094451904, "learning_rate": 0.0001975064532257195, "loss": 2.4282, "step": 75 }, { "epoch": 0.3, "eval_loss": 2.8003995418548584, "eval_runtime": 4.076, "eval_samples_per_second": 2.699, "eval_steps_per_second": 2.699, "step": 75 }, { "epoch": 0.4, "grad_norm": 0.9855464100837708, "learning_rate": 0.00019455963874271426, "loss": 2.2815, "step": 100 }, { "epoch": 0.4, "eval_loss": 2.6393470764160156, "eval_runtime": 4.084, "eval_samples_per_second": 2.693, "eval_steps_per_second": 2.693, "step": 100 }, { "epoch": 0.5, "grad_norm": 1.154259204864502, "learning_rate": 0.00019051931898913976, "loss": 2.1485, "step": 125 }, { "epoch": 0.5, "eval_loss": 2.5862808227539062, "eval_runtime": 4.0324, "eval_samples_per_second": 2.728, "eval_steps_per_second": 2.728, "step": 125 }, { "epoch": 0.6, "grad_norm": 1.2115691900253296, "learning_rate": 0.0001854322169749827, "loss": 2.0884, "step": 150 }, { "epoch": 0.6, "eval_loss": 2.491671323776245, "eval_runtime": 4.0837, "eval_samples_per_second": 2.694, "eval_steps_per_second": 2.694, "step": 150 }, { "epoch": 0.7, "grad_norm": 1.0713191032409668, "learning_rate": 0.00017935716089521474, "loss": 2.1055, "step": 175 }, { "epoch": 0.7, "eval_loss": 2.4541499614715576, "eval_runtime": 4.0493, "eval_samples_per_second": 2.717, "eval_steps_per_second": 2.717, "step": 175 }, { "epoch": 0.79, "grad_norm": 1.1404445171356201, "learning_rate": 0.00017236440382959126, "loss": 2.1012, "step": 200 }, { "epoch": 0.79, "eval_loss": 2.42814040184021, "eval_runtime": 4.0309, "eval_samples_per_second": 2.729, "eval_steps_per_second": 2.729, "step": 200 }, { "epoch": 0.89, "grad_norm": 1.1801480054855347, "learning_rate": 0.00016453481132295506, "loss": 1.9956, "step": 225 }, { "epoch": 0.89, "eval_loss": 2.3967058658599854, "eval_runtime": 4.0145, "eval_samples_per_second": 2.74, "eval_steps_per_second": 2.74, "step": 225 }, { "epoch": 0.99, "grad_norm": 1.2651640176773071, "learning_rate": 0.00015595892624101765, "loss": 1.9931, "step": 250 }, { "epoch": 0.99, "eval_loss": 2.3804991245269775, "eval_runtime": 4.0509, "eval_samples_per_second": 2.715, "eval_steps_per_second": 2.715, "step": 250 }, { "epoch": 1.09, "grad_norm": 1.2018358707427979, "learning_rate": 0.00014673592171580025, "loss": 1.7605, "step": 275 }, { "epoch": 1.09, "eval_loss": 2.358070135116577, "eval_runtime": 4.0302, "eval_samples_per_second": 2.729, "eval_steps_per_second": 2.729, "step": 275 } ], "logging_steps": 25, "max_steps": 753, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 25, "total_flos": 1972985693091840.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }