|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.05714285714285714, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014285714285714286, |
|
"grad_norm": 0.8723945021629333, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4591, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002857142857142857, |
|
"grad_norm": 0.6812328100204468, |
|
"learning_rate": 0.0002, |
|
"loss": 1.335, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004285714285714286, |
|
"grad_norm": 0.4533853232860565, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2956, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005714285714285714, |
|
"grad_norm": 0.5219816565513611, |
|
"learning_rate": 0.0002, |
|
"loss": 1.177, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007142857142857143, |
|
"grad_norm": 0.6963663697242737, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1449, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008571428571428572, |
|
"grad_norm": 0.59157794713974, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1329, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6530716419219971, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0745, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.011428571428571429, |
|
"grad_norm": 0.5379722714424133, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0786, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.012857142857142857, |
|
"grad_norm": 0.6812428832054138, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1211, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014285714285714285, |
|
"grad_norm": 0.7551608681678772, |
|
"learning_rate": 0.0002, |
|
"loss": 0.936, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.015714285714285715, |
|
"grad_norm": 0.6422017812728882, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1515, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.017142857142857144, |
|
"grad_norm": 0.4808028042316437, |
|
"learning_rate": 0.0002, |
|
"loss": 1.068, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.018571428571428572, |
|
"grad_norm": 0.4988093078136444, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0567, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5651537179946899, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9769, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02142857142857143, |
|
"grad_norm": 0.7912259101867676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8673, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 0.6468149423599243, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1511, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.024285714285714285, |
|
"grad_norm": 0.5371859669685364, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0232, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.025714285714285714, |
|
"grad_norm": 0.5603945255279541, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0743, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.027142857142857142, |
|
"grad_norm": 0.558032751083374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0368, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02857142857142857, |
|
"grad_norm": 0.6991172432899475, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9307, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4212726652622223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9608, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03142857142857143, |
|
"grad_norm": 0.5686449408531189, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0988, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.032857142857142856, |
|
"grad_norm": 0.5314635038375854, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9674, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03428571428571429, |
|
"grad_norm": 1.0691275596618652, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9617, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03571428571428571, |
|
"grad_norm": 0.3752906918525696, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8711, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.037142857142857144, |
|
"grad_norm": 0.668821394443512, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8694, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03857142857142857, |
|
"grad_norm": 0.7953740358352661, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9395, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4894189238548279, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0876, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.041428571428571426, |
|
"grad_norm": 0.6148614287376404, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0807, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04285714285714286, |
|
"grad_norm": 0.5681003332138062, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04428571428571428, |
|
"grad_norm": 0.6531659960746765, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9016, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 0.46772944927215576, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9432, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.047142857142857146, |
|
"grad_norm": 0.9201459884643555, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9689, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04857142857142857, |
|
"grad_norm": 0.4148743748664856, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9866, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.0026582479476929, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0142, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05142857142857143, |
|
"grad_norm": 0.5541752576828003, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1917, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.05285714285714286, |
|
"grad_norm": 0.367127001285553, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9925, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.054285714285714284, |
|
"grad_norm": 0.7337833046913147, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9636, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.055714285714285716, |
|
"grad_norm": 0.8890251517295837, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8888, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.05714285714285714, |
|
"grad_norm": 0.9325958490371704, |
|
"learning_rate": 0.0002, |
|
"loss": 0.746, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3002167578036480.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|