|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.021114864864864864, |
|
"eval_steps": 25, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008445945945945946, |
|
"grad_norm": 7.727255344390869, |
|
"learning_rate": 2e-05, |
|
"loss": 8.372, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008445945945945946, |
|
"eval_loss": 8.063016891479492, |
|
"eval_runtime": 125.6054, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 1.99, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016891891891891893, |
|
"grad_norm": 9.669200897216797, |
|
"learning_rate": 4e-05, |
|
"loss": 8.046, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002533783783783784, |
|
"grad_norm": 9.18079948425293, |
|
"learning_rate": 6e-05, |
|
"loss": 8.0321, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0033783783783783786, |
|
"grad_norm": 9.363329887390137, |
|
"learning_rate": 8e-05, |
|
"loss": 7.0182, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004222972972972973, |
|
"grad_norm": 9.816231727600098, |
|
"learning_rate": 0.0001, |
|
"loss": 7.7225, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005067567567567568, |
|
"grad_norm": 9.099088668823242, |
|
"learning_rate": 0.00012, |
|
"loss": 7.875, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0059121621621621625, |
|
"grad_norm": 9.296707153320312, |
|
"learning_rate": 0.00014, |
|
"loss": 5.6422, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006756756756756757, |
|
"grad_norm": 9.570911407470703, |
|
"learning_rate": 0.00016, |
|
"loss": 3.8105, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007601351351351352, |
|
"grad_norm": 8.131881713867188, |
|
"learning_rate": 0.00018, |
|
"loss": 2.0885, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008445945945945946, |
|
"grad_norm": 5.208433628082275, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0562, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009290540540540541, |
|
"grad_norm": 10.91145133972168, |
|
"learning_rate": 0.0001999390827019096, |
|
"loss": 2.9782, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010135135135135136, |
|
"grad_norm": 17.846271514892578, |
|
"learning_rate": 0.00019975640502598244, |
|
"loss": 3.6074, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01097972972972973, |
|
"grad_norm": 8.735333442687988, |
|
"learning_rate": 0.00019945218953682734, |
|
"loss": 0.805, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011824324324324325, |
|
"grad_norm": 6.351312637329102, |
|
"learning_rate": 0.00019902680687415705, |
|
"loss": 1.1463, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01266891891891892, |
|
"grad_norm": 2.7280004024505615, |
|
"learning_rate": 0.00019848077530122083, |
|
"loss": 0.2077, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013513513513513514, |
|
"grad_norm": 5.975780487060547, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 1.1476, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014358108108108109, |
|
"grad_norm": 5.841843605041504, |
|
"learning_rate": 0.00019702957262759965, |
|
"loss": 1.0156, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.015202702702702704, |
|
"grad_norm": 6.997043132781982, |
|
"learning_rate": 0.0001961261695938319, |
|
"loss": 0.6625, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016047297297297296, |
|
"grad_norm": 7.254495620727539, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.8055, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016891891891891893, |
|
"grad_norm": 5.706070423126221, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 0.3771, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017736486486486486, |
|
"grad_norm": 3.621236801147461, |
|
"learning_rate": 0.00019271838545667876, |
|
"loss": 0.2341, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.018581081081081082, |
|
"grad_norm": 1.3116642236709595, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.0388, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.019425675675675675, |
|
"grad_norm": 6.483520030975342, |
|
"learning_rate": 0.0001898794046299167, |
|
"loss": 0.7181, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02027027027027027, |
|
"grad_norm": 16.79340171813965, |
|
"learning_rate": 0.00018829475928589271, |
|
"loss": 1.0507, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021114864864864864, |
|
"grad_norm": 3.9302544593811035, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.8626, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021114864864864864, |
|
"eval_loss": 0.4980570375919342, |
|
"eval_runtime": 126.9443, |
|
"eval_samples_per_second": 3.931, |
|
"eval_steps_per_second": 1.969, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3054705844224e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|