| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.615384615384615, |
| "eval_steps": 500, |
| "global_step": 60, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 4.412626190006247, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 1.2032, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 4.441970068628214, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.2132, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 4.581997128609873, |
| "learning_rate": 2.5e-06, |
| "loss": 1.2137, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 4.304844304249193, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.1822, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 3.8791643255654202, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 1.2146, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 3.0048985534972137, |
| "learning_rate": 5e-06, |
| "loss": 1.1544, |
| "step": 6 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 3.0048985534972137, |
| "learning_rate": 4.995770395678171e-06, |
| "loss": 0.5766, |
| "step": 7 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 2.7977743386578084, |
| "learning_rate": 4.983095894354858e-06, |
| "loss": 1.1065, |
| "step": 8 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 2.952566520549763, |
| "learning_rate": 4.962019382530521e-06, |
| "loss": 1.0679, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 2.6181710127008944, |
| "learning_rate": 4.93261217644956e-06, |
| "loss": 1.0155, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 2.559248743534276, |
| "learning_rate": 4.894973780788722e-06, |
| "loss": 1.0063, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 2.022076331439615, |
| "learning_rate": 4.849231551964771e-06, |
| "loss": 0.9441, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 2.4659223307375835, |
| "learning_rate": 4.7955402672006855e-06, |
| "loss": 0.9214, |
| "step": 13 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.3269412931921942, |
| "learning_rate": 4.734081600808531e-06, |
| "loss": 0.4572, |
| "step": 14 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 1.8941456630880233, |
| "learning_rate": 4.665063509461098e-06, |
| "loss": 0.8955, |
| "step": 15 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 1.647028999939705, |
| "learning_rate": 4.588719528532342e-06, |
| "loss": 0.8693, |
| "step": 16 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 1.3563259284626916, |
| "learning_rate": 4.50530798188761e-06, |
| "loss": 0.841, |
| "step": 17 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 1.251613461595753, |
| "learning_rate": 4.415111107797445e-06, |
| "loss": 0.8152, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 1.4767406480770795, |
| "learning_rate": 4.318434103932622e-06, |
| "loss": 0.7768, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 1.5508389570529124, |
| "learning_rate": 4.215604094671835e-06, |
| "loss": 0.7846, |
| "step": 20 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.5508389570529124, |
| "learning_rate": 4.106969024216348e-06, |
| "loss": 0.3778, |
| "step": 21 |
| }, |
| { |
| "epoch": 3.1538461538461537, |
| "grad_norm": 1.491365044443786, |
| "learning_rate": 3.992896479256966e-06, |
| "loss": 0.7495, |
| "step": 22 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 1.2365324578204933, |
| "learning_rate": 3.8737724451770155e-06, |
| "loss": 0.7175, |
| "step": 23 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 1.1370658509132563, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.7281, |
| "step": 24 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 7.279215244130937, |
| "learning_rate": 3.621997950501156e-06, |
| "loss": 0.7187, |
| "step": 25 |
| }, |
| { |
| "epoch": 3.769230769230769, |
| "grad_norm": 1.3074060847565636, |
| "learning_rate": 3.4901994150978926e-06, |
| "loss": 0.7123, |
| "step": 26 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 1.3207588352162958, |
| "learning_rate": 3.3550503583141726e-06, |
| "loss": 0.7025, |
| "step": 27 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.1690670792295927, |
| "learning_rate": 3.217008081777726e-06, |
| "loss": 0.3284, |
| "step": 28 |
| }, |
| { |
| "epoch": 4.153846153846154, |
| "grad_norm": 1.1528609376029573, |
| "learning_rate": 3.0765396768561005e-06, |
| "loss": 0.6758, |
| "step": 29 |
| }, |
| { |
| "epoch": 4.3076923076923075, |
| "grad_norm": 1.4549662024813634, |
| "learning_rate": 2.9341204441673267e-06, |
| "loss": 0.6794, |
| "step": 30 |
| }, |
| { |
| "epoch": 4.461538461538462, |
| "grad_norm": 0.9855903276876635, |
| "learning_rate": 2.7902322853130758e-06, |
| "loss": 0.6446, |
| "step": 31 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 0.9679470488380136, |
| "learning_rate": 2.6453620722761897e-06, |
| "loss": 0.67, |
| "step": 32 |
| }, |
| { |
| "epoch": 4.769230769230769, |
| "grad_norm": 1.7352648364409022, |
| "learning_rate": 2.5e-06, |
| "loss": 0.636, |
| "step": 33 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 0.9877574746856996, |
| "learning_rate": 2.3546379277238107e-06, |
| "loss": 0.6559, |
| "step": 34 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.9877574746856996, |
| "learning_rate": 2.2097677146869242e-06, |
| "loss": 0.3249, |
| "step": 35 |
| }, |
| { |
| "epoch": 5.153846153846154, |
| "grad_norm": 1.3800661026691763, |
| "learning_rate": 2.0658795558326745e-06, |
| "loss": 0.6344, |
| "step": 36 |
| }, |
| { |
| "epoch": 5.3076923076923075, |
| "grad_norm": 1.383753298521261, |
| "learning_rate": 1.9234603231439e-06, |
| "loss": 0.6334, |
| "step": 37 |
| }, |
| { |
| "epoch": 5.461538461538462, |
| "grad_norm": 1.043144309237299, |
| "learning_rate": 1.7829919182222752e-06, |
| "loss": 0.6159, |
| "step": 38 |
| }, |
| { |
| "epoch": 5.615384615384615, |
| "grad_norm": 0.915191752421227, |
| "learning_rate": 1.6449496416858285e-06, |
| "loss": 0.6194, |
| "step": 39 |
| }, |
| { |
| "epoch": 5.769230769230769, |
| "grad_norm": 0.9290867173246218, |
| "learning_rate": 1.509800584902108e-06, |
| "loss": 0.6215, |
| "step": 40 |
| }, |
| { |
| "epoch": 5.923076923076923, |
| "grad_norm": 0.877279653470722, |
| "learning_rate": 1.3780020494988447e-06, |
| "loss": 0.6164, |
| "step": 41 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.9625404590011445, |
| "learning_rate": 1.2500000000000007e-06, |
| "loss": 0.306, |
| "step": 42 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "grad_norm": 0.9447763225617137, |
| "learning_rate": 1.1262275548229852e-06, |
| "loss": 0.6098, |
| "step": 43 |
| }, |
| { |
| "epoch": 6.3076923076923075, |
| "grad_norm": 0.9026707226926396, |
| "learning_rate": 1.0071035207430352e-06, |
| "loss": 0.5961, |
| "step": 44 |
| }, |
| { |
| "epoch": 6.461538461538462, |
| "grad_norm": 0.8125670239271661, |
| "learning_rate": 8.930309757836517e-07, |
| "loss": 0.5896, |
| "step": 45 |
| }, |
| { |
| "epoch": 6.615384615384615, |
| "grad_norm": 0.8184346090402826, |
| "learning_rate": 7.843959053281663e-07, |
| "loss": 0.6085, |
| "step": 46 |
| }, |
| { |
| "epoch": 6.769230769230769, |
| "grad_norm": 0.8180073728960267, |
| "learning_rate": 6.815658960673782e-07, |
| "loss": 0.6024, |
| "step": 47 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "grad_norm": 0.797015085470038, |
| "learning_rate": 5.848888922025553e-07, |
| "loss": 0.5954, |
| "step": 48 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.797015085470038, |
| "learning_rate": 4.946920181123904e-07, |
| "loss": 0.2997, |
| "step": 49 |
| }, |
| { |
| "epoch": 7.153846153846154, |
| "grad_norm": 1.0365029227676532, |
| "learning_rate": 4.1128047146765936e-07, |
| "loss": 0.5838, |
| "step": 50 |
| }, |
| { |
| "epoch": 7.3076923076923075, |
| "grad_norm": 0.8010913969163229, |
| "learning_rate": 3.3493649053890325e-07, |
| "loss": 0.5993, |
| "step": 51 |
| }, |
| { |
| "epoch": 7.461538461538462, |
| "grad_norm": 0.8047853994156616, |
| "learning_rate": 2.6591839919146963e-07, |
| "loss": 0.5908, |
| "step": 52 |
| }, |
| { |
| "epoch": 7.615384615384615, |
| "grad_norm": 0.7895795633351214, |
| "learning_rate": 2.044597327993153e-07, |
| "loss": 0.5756, |
| "step": 53 |
| }, |
| { |
| "epoch": 7.769230769230769, |
| "grad_norm": 0.8891963315989966, |
| "learning_rate": 1.507684480352292e-07, |
| "loss": 0.6084, |
| "step": 54 |
| }, |
| { |
| "epoch": 7.923076923076923, |
| "grad_norm": 0.8233725761566618, |
| "learning_rate": 1.0502621921127776e-07, |
| "loss": 0.5859, |
| "step": 55 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.9193007935270117, |
| "learning_rate": 6.738782355044048e-08, |
| "loss": 0.2892, |
| "step": 56 |
| }, |
| { |
| "epoch": 8.153846153846153, |
| "grad_norm": 0.8092902005307483, |
| "learning_rate": 3.798061746947995e-08, |
| "loss": 0.586, |
| "step": 57 |
| }, |
| { |
| "epoch": 8.307692307692308, |
| "grad_norm": 0.7679863614372926, |
| "learning_rate": 1.6904105645142443e-08, |
| "loss": 0.5827, |
| "step": 58 |
| }, |
| { |
| "epoch": 8.461538461538462, |
| "grad_norm": 1.6098807513131712, |
| "learning_rate": 4.229604321829561e-09, |
| "loss": 0.5837, |
| "step": 59 |
| }, |
| { |
| "epoch": 8.615384615384615, |
| "grad_norm": 0.8372544817321463, |
| "learning_rate": 0.0, |
| "loss": 0.5892, |
| "step": 60 |
| }, |
| { |
| "epoch": 8.615384615384615, |
| "step": 60, |
| "total_flos": 24955762507776.0, |
| "train_loss": 0.0, |
| "train_runtime": 0.003, |
| "train_samples_per_second": 1345694.74, |
| "train_steps_per_second": 19985.565 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 60, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 24955762507776.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|