| { |
| "best_global_step": 150, |
| "best_metric": 0.9, |
| "best_model_checkpoint": "./models/punctuation/decoder_model_simple/checkpoint-150", |
| "epoch": 3.0, |
| "eval_steps": 50, |
| "global_step": 321, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09411764705882353, |
| "grad_norm": 1.3681155443191528, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 0.7202, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.18823529411764706, |
| "grad_norm": 0.7711533308029175, |
| "learning_rate": 5.757575757575758e-05, |
| "loss": 0.3185, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2823529411764706, |
| "grad_norm": 1.2256836891174316, |
| "learning_rate": 8.787878787878789e-05, |
| "loss": 0.1827, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "grad_norm": 0.888201892375946, |
| "learning_rate": 9.989294616193017e-05, |
| "loss": 0.1346, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.853918731212616, |
| "learning_rate": 9.924038765061042e-05, |
| "loss": 0.0721, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "eval_loss": 0.04328128695487976, |
| "eval_model_preparation_time": 0.0162, |
| "eval_runtime": 2.2686, |
| "eval_samples_per_second": 44.081, |
| "eval_steps_per_second": 22.04, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5647058823529412, |
| "grad_norm": 0.48631730675697327, |
| "learning_rate": 9.800249271929645e-05, |
| "loss": 0.042, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6588235294117647, |
| "grad_norm": 0.5516029596328735, |
| "learning_rate": 9.619397662556435e-05, |
| "loss": 0.0218, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "grad_norm": 0.28095027804374695, |
| "learning_rate": 9.38363377853754e-05, |
| "loss": 0.0127, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8470588235294118, |
| "grad_norm": 0.8756083846092224, |
| "learning_rate": 9.09576022144496e-05, |
| "loss": 0.0124, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.5514245629310608, |
| "learning_rate": 8.759199037394887e-05, |
| "loss": 0.0119, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "eval_loss": 0.004549662582576275, |
| "eval_model_preparation_time": 0.0162, |
| "eval_runtime": 2.2779, |
| "eval_samples_per_second": 43.9, |
| "eval_steps_per_second": 21.95, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.0282352941176471, |
| "grad_norm": 0.48209479451179504, |
| "learning_rate": 8.377951038078302e-05, |
| "loss": 0.0074, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1223529411764706, |
| "grad_norm": 0.22237619757652283, |
| "learning_rate": 7.956548241817912e-05, |
| "loss": 0.0033, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.2164705882352942, |
| "grad_norm": 0.22323866188526154, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.0019, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.3105882352941176, |
| "grad_norm": 0.044407621026039124, |
| "learning_rate": 7.013733449293687e-05, |
| "loss": 0.0022, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4047058823529412, |
| "grad_norm": 0.1491803228855133, |
| "learning_rate": 6.503528997521366e-05, |
| "loss": 0.0026, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.4047058823529412, |
| "eval_loss": 0.00037357292603701353, |
| "eval_model_preparation_time": 0.0162, |
| "eval_runtime": 2.317, |
| "eval_samples_per_second": 43.159, |
| "eval_steps_per_second": 21.579, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.4988235294117647, |
| "grad_norm": 0.014182478189468384, |
| "learning_rate": 5.9754516100806423e-05, |
| "loss": 0.0012, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.592941176470588, |
| "grad_norm": 0.015145699493587017, |
| "learning_rate": 5.435778713738292e-05, |
| "loss": 0.0003, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.6870588235294117, |
| "grad_norm": 0.011491155251860619, |
| "learning_rate": 4.890925574827195e-05, |
| "loss": 0.0039, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.7811764705882354, |
| "grad_norm": 0.16684198379516602, |
| "learning_rate": 4.347369038899744e-05, |
| "loss": 0.0021, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.8752941176470588, |
| "grad_norm": 0.006741759832948446, |
| "learning_rate": 3.8115705383691355e-05, |
| "loss": 0.0009, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8752941176470588, |
| "eval_loss": 0.0005264483625069261, |
| "eval_model_preparation_time": 0.0162, |
| "eval_runtime": 2.2881, |
| "eval_samples_per_second": 43.705, |
| "eval_steps_per_second": 21.852, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.9694117647058822, |
| "grad_norm": 0.0708838701248169, |
| "learning_rate": 3.289899283371657e-05, |
| "loss": 0.0012, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.0564705882352943, |
| "grad_norm": 0.0013810750097036362, |
| "learning_rate": 2.7885565489049946e-05, |
| "loss": 0.0005, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.1505882352941175, |
| "grad_norm": 0.0015096160350367427, |
| "learning_rate": 2.3135019582658802e-05, |
| "loss": 0.0004, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.244705882352941, |
| "grad_norm": 0.00760283600538969, |
| "learning_rate": 1.8703826390797048e-05, |
| "loss": 0.0002, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.3388235294117647, |
| "grad_norm": 0.00949388649314642, |
| "learning_rate": 1.4644660940672627e-05, |
| "loss": 0.0002, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.3388235294117647, |
| "eval_loss": 0.00012819873518310487, |
| "eval_model_preparation_time": 0.0162, |
| "eval_runtime": 2.2912, |
| "eval_samples_per_second": 43.646, |
| "eval_steps_per_second": 21.823, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.4329411764705884, |
| "grad_norm": 0.0011758297914639115, |
| "learning_rate": 1.100577584535592e-05, |
| "loss": 0.0002, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.527058823529412, |
| "grad_norm": 0.002146691083908081, |
| "learning_rate": 7.830427709355725e-06, |
| "loss": 0.0003, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.621176470588235, |
| "grad_norm": 0.003603292629122734, |
| "learning_rate": 5.156362923365588e-06, |
| "loss": 0.0004, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.715294117647059, |
| "grad_norm": 0.005790353287011385, |
| "learning_rate": 3.0153689607045845e-06, |
| "loss": 0.0003, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.8094117647058825, |
| "grad_norm": 0.012438272126019001, |
| "learning_rate": 1.4328965093369283e-06, |
| "loss": 0.0001, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.8094117647058825, |
| "eval_loss": 8.123814041027799e-05, |
| "eval_model_preparation_time": 0.0162, |
| "eval_runtime": 2.3021, |
| "eval_samples_per_second": 43.439, |
| "eval_steps_per_second": 21.72, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.9035294117647057, |
| "grad_norm": 0.0017391764558851719, |
| "learning_rate": 4.277569313094809e-07, |
| "loss": 0.0001, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.9976470588235293, |
| "grad_norm": 0.0015454388922080398, |
| "learning_rate": 1.189864600454338e-08, |
| "loss": 0.0001, |
| "step": 320 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 321, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.146462931369165e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|