|
{ |
|
"best_metric": 0.19687849283218384, |
|
"best_model_checkpoint": "autotrain-y5fnj-qjq2l/checkpoint-66", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 66, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 10.229093551635742, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 0.2103, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 22.845165252685547, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.0428, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 4.096180438995361, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.1254, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 17.625638961791992, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.934, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 12.085053443908691, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.8395, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 18.615018844604492, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.5229, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 18.63450050354004, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5533, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 17.580198287963867, |
|
"learning_rate": 4.915254237288136e-05, |
|
"loss": 0.8831, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 8.56959056854248, |
|
"learning_rate": 4.8305084745762714e-05, |
|
"loss": 0.0529, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 16.458581924438477, |
|
"learning_rate": 4.745762711864407e-05, |
|
"loss": 0.676, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 18.144201278686523, |
|
"learning_rate": 4.6610169491525425e-05, |
|
"loss": 0.6531, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 5.568901062011719, |
|
"learning_rate": 4.5762711864406784e-05, |
|
"loss": 0.1753, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 8.597810745239258, |
|
"learning_rate": 4.491525423728814e-05, |
|
"loss": 0.5251, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 6.5929694175720215, |
|
"learning_rate": 4.4067796610169495e-05, |
|
"loss": 0.1306, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 5.372793674468994, |
|
"learning_rate": 4.3220338983050854e-05, |
|
"loss": 0.3919, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 12.74032974243164, |
|
"learning_rate": 4.2372881355932206e-05, |
|
"loss": 0.6617, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 4.323923110961914, |
|
"learning_rate": 4.152542372881356e-05, |
|
"loss": 0.5658, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 10.929731369018555, |
|
"learning_rate": 4.067796610169492e-05, |
|
"loss": 0.7611, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 7.1887006759643555, |
|
"learning_rate": 3.983050847457627e-05, |
|
"loss": 0.1684, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 5.684032440185547, |
|
"learning_rate": 3.898305084745763e-05, |
|
"loss": 0.5265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 13.379312515258789, |
|
"learning_rate": 3.813559322033898e-05, |
|
"loss": 0.9614, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.999670028686523, |
|
"learning_rate": 3.728813559322034e-05, |
|
"loss": 0.5465, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.03604179620742798, |
|
"eval_loss": 0.2765257656574249, |
|
"eval_mae": 0.449463814496994, |
|
"eval_mse": 0.2765257656574249, |
|
"eval_r2": -0.037507448771466834, |
|
"eval_rmse": 0.525857150554657, |
|
"eval_runtime": 0.6632, |
|
"eval_samples_per_second": 66.345, |
|
"eval_steps_per_second": 4.524, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 7.3439531326293945, |
|
"learning_rate": 3.644067796610169e-05, |
|
"loss": 0.3011, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 5.934049129486084, |
|
"learning_rate": 3.559322033898305e-05, |
|
"loss": 0.4443, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 13.472294807434082, |
|
"learning_rate": 3.474576271186441e-05, |
|
"loss": 1.0194, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 3.9795279502868652, |
|
"learning_rate": 3.389830508474576e-05, |
|
"loss": 0.4727, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 14.359926223754883, |
|
"learning_rate": 3.305084745762712e-05, |
|
"loss": 0.147, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 12.326262474060059, |
|
"learning_rate": 3.2203389830508473e-05, |
|
"loss": 0.8002, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 9.748495101928711, |
|
"learning_rate": 3.135593220338983e-05, |
|
"loss": 0.7462, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 7.03469705581665, |
|
"learning_rate": 3.050847457627119e-05, |
|
"loss": 0.1601, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 83.44405364990234, |
|
"learning_rate": 2.9661016949152544e-05, |
|
"loss": 1.3339, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 10.450496673583984, |
|
"learning_rate": 2.88135593220339e-05, |
|
"loss": 0.5066, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 17.28018569946289, |
|
"learning_rate": 2.7966101694915255e-05, |
|
"loss": 0.1735, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 9.559082984924316, |
|
"learning_rate": 2.711864406779661e-05, |
|
"loss": 0.138, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 4.343846797943115, |
|
"learning_rate": 2.627118644067797e-05, |
|
"loss": 0.3694, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 3.035878896713257, |
|
"learning_rate": 2.5423728813559322e-05, |
|
"loss": 0.172, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 8.587645530700684, |
|
"learning_rate": 2.457627118644068e-05, |
|
"loss": 0.6394, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 4.444578647613525, |
|
"learning_rate": 2.3728813559322036e-05, |
|
"loss": 0.5123, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 13.137991905212402, |
|
"learning_rate": 2.2881355932203392e-05, |
|
"loss": 0.8485, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 3.9930429458618164, |
|
"learning_rate": 2.2033898305084748e-05, |
|
"loss": 0.4104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 12.926916122436523, |
|
"learning_rate": 2.1186440677966103e-05, |
|
"loss": 0.133, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 340.0639953613281, |
|
"learning_rate": 2.033898305084746e-05, |
|
"loss": 0.2554, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 3.31027889251709, |
|
"learning_rate": 1.9491525423728814e-05, |
|
"loss": 0.1767, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.212459564208984, |
|
"learning_rate": 1.864406779661017e-05, |
|
"loss": 0.3367, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.19774705171585083, |
|
"eval_loss": 0.23613856732845306, |
|
"eval_mae": 0.4021577537059784, |
|
"eval_mse": 0.23613858222961426, |
|
"eval_r2": 0.11402264383641758, |
|
"eval_rmse": 0.48594093322753906, |
|
"eval_runtime": 0.1551, |
|
"eval_samples_per_second": 283.767, |
|
"eval_steps_per_second": 19.348, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 12.102192878723145, |
|
"learning_rate": 1.7796610169491526e-05, |
|
"loss": 0.1745, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 9.342068672180176, |
|
"learning_rate": 1.694915254237288e-05, |
|
"loss": 0.7482, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.1363636363636362, |
|
"grad_norm": 9.230262756347656, |
|
"learning_rate": 1.6101694915254237e-05, |
|
"loss": 0.5335, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 9.847858428955078, |
|
"learning_rate": 1.5254237288135596e-05, |
|
"loss": 0.7199, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 15.958962440490723, |
|
"learning_rate": 1.440677966101695e-05, |
|
"loss": 0.1243, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 4.7927680015563965, |
|
"learning_rate": 1.3559322033898305e-05, |
|
"loss": 0.3531, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.3181818181818183, |
|
"grad_norm": 9.752751350402832, |
|
"learning_rate": 1.2711864406779661e-05, |
|
"loss": 0.5788, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 57.86676025390625, |
|
"learning_rate": 1.1864406779661018e-05, |
|
"loss": 0.5495, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.409090909090909, |
|
"grad_norm": 14.14869499206543, |
|
"learning_rate": 1.1016949152542374e-05, |
|
"loss": 0.3548, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 11.623541831970215, |
|
"learning_rate": 1.016949152542373e-05, |
|
"loss": 0.1877, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 18.557632446289062, |
|
"learning_rate": 9.322033898305085e-06, |
|
"loss": 0.1552, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 12.741007804870605, |
|
"learning_rate": 8.47457627118644e-06, |
|
"loss": 0.2064, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.590909090909091, |
|
"grad_norm": 14.985441207885742, |
|
"learning_rate": 7.627118644067798e-06, |
|
"loss": 0.5428, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 6.256850719451904, |
|
"learning_rate": 6.779661016949153e-06, |
|
"loss": 0.4934, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.6818181818181817, |
|
"grad_norm": 13.29955005645752, |
|
"learning_rate": 5.932203389830509e-06, |
|
"loss": 0.1479, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 11.591996192932129, |
|
"learning_rate": 5.084745762711865e-06, |
|
"loss": 0.4157, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.7727272727272725, |
|
"grad_norm": 6.75338077545166, |
|
"learning_rate": 4.23728813559322e-06, |
|
"loss": 0.3989, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 20.437047958374023, |
|
"learning_rate": 3.3898305084745763e-06, |
|
"loss": 1.0043, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.8636363636363638, |
|
"grad_norm": 9.836654663085938, |
|
"learning_rate": 2.5423728813559323e-06, |
|
"loss": 0.0952, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 8.069482803344727, |
|
"learning_rate": 1.6949152542372882e-06, |
|
"loss": 0.2437, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 4.515679359436035, |
|
"learning_rate": 8.474576271186441e-07, |
|
"loss": 0.0323, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 5.198877334594727, |
|
"learning_rate": 0.0, |
|
"loss": 0.4612, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_explained_variance": 0.26253634691238403, |
|
"eval_loss": 0.19687849283218384, |
|
"eval_mae": 0.2855921685695648, |
|
"eval_mse": 0.19687849283218384, |
|
"eval_r2": 0.2613240842619654, |
|
"eval_rmse": 0.4437099099159241, |
|
"eval_runtime": 0.1522, |
|
"eval_samples_per_second": 289.029, |
|
"eval_steps_per_second": 19.706, |
|
"step": 66 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 66, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 34730347474944.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|