| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.956175298804781, | |
| "eval_steps": 500, | |
| "global_step": 186, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 0.0, | |
| "loss": 1.4053, | |
| "memory/device_reserved (GiB)": 61.34, | |
| "memory/max_active (GiB)": 49.6, | |
| "memory/max_allocated (GiB)": 49.6, | |
| "step": 1, | |
| "tokens_per_second_per_gpu": 4706.79, | |
| "total_tokens": 180518 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.3369, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 2, | |
| "tokens_per_second_per_gpu": 5826.72, | |
| "total_tokens": 363757 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 1.3623, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 3, | |
| "tokens_per_second_per_gpu": 5939.96, | |
| "total_tokens": 558043 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.3643, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 4, | |
| "tokens_per_second_per_gpu": 5941.47, | |
| "total_tokens": 743276 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 3.90625, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 1.2998, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 5, | |
| "tokens_per_second_per_gpu": 5380.38, | |
| "total_tokens": 929761 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 3.546875, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 1.3018, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 6, | |
| "tokens_per_second_per_gpu": 5949.69, | |
| "total_tokens": 1118316 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 3.171875, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.2793, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 7, | |
| "tokens_per_second_per_gpu": 5785.23, | |
| "total_tokens": 1301615 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 2.96875, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 1.3115, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 8, | |
| "tokens_per_second_per_gpu": 5941.21, | |
| "total_tokens": 1490474 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 1.2588, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 9, | |
| "tokens_per_second_per_gpu": 5534.49, | |
| "total_tokens": 1667576 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 1e-05, | |
| "loss": 1.1992, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 6154.87, | |
| "total_tokens": 1857807 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 1.1436, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 11, | |
| "tokens_per_second_per_gpu": 5715.6, | |
| "total_tokens": 2041489 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 1.2222222222222224e-05, | |
| "loss": 1.2402, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 12, | |
| "tokens_per_second_per_gpu": 5749.37, | |
| "total_tokens": 2216014 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.2051, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 13, | |
| "tokens_per_second_per_gpu": 5748.94, | |
| "total_tokens": 2397131 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1.4444444444444446e-05, | |
| "loss": 1.1211, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 14, | |
| "tokens_per_second_per_gpu": 6171.1, | |
| "total_tokens": 2590472 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 1.1777, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 15, | |
| "tokens_per_second_per_gpu": 6160.5, | |
| "total_tokens": 2780711 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.1025, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 16, | |
| "tokens_per_second_per_gpu": 5706.58, | |
| "total_tokens": 2968588 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 1.7777777777777777e-05, | |
| "loss": 1.2041, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 17, | |
| "tokens_per_second_per_gpu": 5569.19, | |
| "total_tokens": 3148691 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 1.168, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 18, | |
| "tokens_per_second_per_gpu": 5894.91, | |
| "total_tokens": 3332398 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0977, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 19, | |
| "tokens_per_second_per_gpu": 6092.09, | |
| "total_tokens": 3526610 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.9998251609127465e-05, | |
| "loss": 1.1372, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 5971.19, | |
| "total_tokens": 3711042 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.9993007047883988e-05, | |
| "loss": 1.0659, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 21, | |
| "tokens_per_second_per_gpu": 5750.71, | |
| "total_tokens": 3890841 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.998426815017817e-05, | |
| "loss": 1.124, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 22, | |
| "tokens_per_second_per_gpu": 5968.21, | |
| "total_tokens": 4074024 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.9972037971811802e-05, | |
| "loss": 1.064, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 23, | |
| "tokens_per_second_per_gpu": 5672.93, | |
| "total_tokens": 4261426 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 1.9956320789411338e-05, | |
| "loss": 1.0977, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 24, | |
| "tokens_per_second_per_gpu": 5947.63, | |
| "total_tokens": 4448221 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 1.9937122098932428e-05, | |
| "loss": 0.9438, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 25, | |
| "tokens_per_second_per_gpu": 5830.3, | |
| "total_tokens": 4643418 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 1.9914448613738107e-05, | |
| "loss": 1.0786, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 26, | |
| "tokens_per_second_per_gpu": 5753.23, | |
| "total_tokens": 4826564 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 1.9888308262251286e-05, | |
| "loss": 1.1084, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 27, | |
| "tokens_per_second_per_gpu": 5786.21, | |
| "total_tokens": 5008617 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 1.985871018518236e-05, | |
| "loss": 1.0488, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 28, | |
| "tokens_per_second_per_gpu": 5935.98, | |
| "total_tokens": 5194550 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 1.9825664732332886e-05, | |
| "loss": 1.0894, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 29, | |
| "tokens_per_second_per_gpu": 5927.93, | |
| "total_tokens": 5380376 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 1.9789183458976485e-05, | |
| "loss": 1.0869, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 6097.05, | |
| "total_tokens": 5567310 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 1.9749279121818235e-05, | |
| "loss": 1.0181, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 31, | |
| "tokens_per_second_per_gpu": 6055.18, | |
| "total_tokens": 5750982 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 1.970596567453391e-05, | |
| "loss": 1.0552, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 32, | |
| "tokens_per_second_per_gpu": 6008.39, | |
| "total_tokens": 5937332 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 1.9659258262890683e-05, | |
| "loss": 1.0439, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 33, | |
| "tokens_per_second_per_gpu": 6030.07, | |
| "total_tokens": 6120851 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 1.9609173219450998e-05, | |
| "loss": 1.0835, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 34, | |
| "tokens_per_second_per_gpu": 5726.88, | |
| "total_tokens": 6297402 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 1.955572805786141e-05, | |
| "loss": 1.1074, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 35, | |
| "tokens_per_second_per_gpu": 5816.12, | |
| "total_tokens": 6480316 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 1.9498941466728462e-05, | |
| "loss": 1.0391, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 36, | |
| "tokens_per_second_per_gpu": 5765.66, | |
| "total_tokens": 6665052 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 1.9438833303083677e-05, | |
| "loss": 1.0371, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 37, | |
| "tokens_per_second_per_gpu": 5749.09, | |
| "total_tokens": 6849283 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 1.9375424585439994e-05, | |
| "loss": 1.0503, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 38, | |
| "tokens_per_second_per_gpu": 5927.3, | |
| "total_tokens": 7032513 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 1.9308737486442045e-05, | |
| "loss": 1.0479, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 39, | |
| "tokens_per_second_per_gpu": 5905.92, | |
| "total_tokens": 7214561 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 1.9238795325112867e-05, | |
| "loss": 1.0098, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 5853.3, | |
| "total_tokens": 7400854 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 1.9165622558699763e-05, | |
| "loss": 1.106, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 41, | |
| "tokens_per_second_per_gpu": 5556.45, | |
| "total_tokens": 7577263 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 1.908924477412211e-05, | |
| "loss": 1.0498, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 42, | |
| "tokens_per_second_per_gpu": 5928.26, | |
| "total_tokens": 7763586 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 1.900968867902419e-05, | |
| "loss": 1.0171, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 43, | |
| "tokens_per_second_per_gpu": 6102.36, | |
| "total_tokens": 7953595 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 1.8926982092436117e-05, | |
| "loss": 1.0688, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 44, | |
| "tokens_per_second_per_gpu": 6058.46, | |
| "total_tokens": 8135608 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 1.8841153935046098e-05, | |
| "loss": 0.978, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 45, | |
| "tokens_per_second_per_gpu": 5806.45, | |
| "total_tokens": 8328038 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 1.8752234219087538e-05, | |
| "loss": 1.0435, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 46, | |
| "tokens_per_second_per_gpu": 5940.85, | |
| "total_tokens": 8517629 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 1.866025403784439e-05, | |
| "loss": 1.0317, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 47, | |
| "tokens_per_second_per_gpu": 5929.8, | |
| "total_tokens": 8700619 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.8565245554778516e-05, | |
| "loss": 0.9819, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 48, | |
| "tokens_per_second_per_gpu": 5991.89, | |
| "total_tokens": 8886726 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 1.8467241992282842e-05, | |
| "loss": 1.0396, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 49, | |
| "tokens_per_second_per_gpu": 5941.59, | |
| "total_tokens": 9074210 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 1.83662776200642e-05, | |
| "loss": 1.0703, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 5856.71, | |
| "total_tokens": 9253264 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.826238774315995e-05, | |
| "loss": 1.0078, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 51, | |
| "tokens_per_second_per_gpu": 5883.97, | |
| "total_tokens": 9437019 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.8155608689592604e-05, | |
| "loss": 1.0352, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 52, | |
| "tokens_per_second_per_gpu": 6284.45, | |
| "total_tokens": 9624777 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 1.8045977797666685e-05, | |
| "loss": 1.0015, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 53, | |
| "tokens_per_second_per_gpu": 6227.78, | |
| "total_tokens": 9816093 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 1.7933533402912354e-05, | |
| "loss": 1.0205, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 54, | |
| "tokens_per_second_per_gpu": 5562.75, | |
| "total_tokens": 10003875 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 1.78183148246803e-05, | |
| "loss": 0.9985, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 55, | |
| "tokens_per_second_per_gpu": 6029.45, | |
| "total_tokens": 10195261 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.7700362352392632e-05, | |
| "loss": 1.0151, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 56, | |
| "tokens_per_second_per_gpu": 5824.93, | |
| "total_tokens": 10378607 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 1.757971723145453e-05, | |
| "loss": 1.0737, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 57, | |
| "tokens_per_second_per_gpu": 5758.69, | |
| "total_tokens": 10565102 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 1.7456421648831658e-05, | |
| "loss": 1.0444, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 58, | |
| "tokens_per_second_per_gpu": 5699.09, | |
| "total_tokens": 10743645 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 1.7330518718298263e-05, | |
| "loss": 0.998, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 59, | |
| "tokens_per_second_per_gpu": 5772.72, | |
| "total_tokens": 10926325 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 1.7202052465361268e-05, | |
| "loss": 1.0659, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 5781.99, | |
| "total_tokens": 11105741 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.7071067811865477e-05, | |
| "loss": 1.0024, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 61, | |
| "tokens_per_second_per_gpu": 5416.25, | |
| "total_tokens": 11283752 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 1.693761056028542e-05, | |
| "loss": 0.9429, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 62, | |
| "tokens_per_second_per_gpu": 6080.81, | |
| "total_tokens": 11476891 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.6801727377709195e-05, | |
| "loss": 0.8979, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 63, | |
| "tokens_per_second_per_gpu": 4586.33, | |
| "total_tokens": 11600559 | |
| }, | |
| { | |
| "epoch": 1.0159362549800797, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.6663465779520042e-05, | |
| "loss": 1.0391, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 64, | |
| "tokens_per_second_per_gpu": 5765.65, | |
| "total_tokens": 11781077 | |
| }, | |
| { | |
| "epoch": 1.0318725099601593, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.6522874112781213e-05, | |
| "loss": 0.9893, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 65, | |
| "tokens_per_second_per_gpu": 5812.65, | |
| "total_tokens": 11964316 | |
| }, | |
| { | |
| "epoch": 1.047808764940239, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.6380001539330088e-05, | |
| "loss": 1.019, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 66, | |
| "tokens_per_second_per_gpu": 5958.35, | |
| "total_tokens": 12158602 | |
| }, | |
| { | |
| "epoch": 1.0637450199203187, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 1.6234898018587336e-05, | |
| "loss": 1.0098, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 67, | |
| "tokens_per_second_per_gpu": 5947.9, | |
| "total_tokens": 12343835 | |
| }, | |
| { | |
| "epoch": 1.0796812749003983, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 1.608761429008721e-05, | |
| "loss": 0.959, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 68, | |
| "tokens_per_second_per_gpu": 5410.16, | |
| "total_tokens": 12530320 | |
| }, | |
| { | |
| "epoch": 1.095617529880478, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 1.5938201855735017e-05, | |
| "loss": 0.998, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 69, | |
| "tokens_per_second_per_gpu": 5950.0, | |
| "total_tokens": 12718875 | |
| }, | |
| { | |
| "epoch": 1.1115537848605577, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 1.578671296179806e-05, | |
| "loss": 0.9834, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 5806.97, | |
| "total_tokens": 12902174 | |
| }, | |
| { | |
| "epoch": 1.1274900398406373, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.563320058063622e-05, | |
| "loss": 1.02, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 71, | |
| "tokens_per_second_per_gpu": 5964.93, | |
| "total_tokens": 13091033 | |
| }, | |
| { | |
| "epoch": 1.1434262948207172, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.5477718392178716e-05, | |
| "loss": 1.001, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 72, | |
| "tokens_per_second_per_gpu": 5543.95, | |
| "total_tokens": 13268135 | |
| }, | |
| { | |
| "epoch": 1.159362549800797, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.5320320765153367e-05, | |
| "loss": 0.9868, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 73, | |
| "tokens_per_second_per_gpu": 6161.46, | |
| "total_tokens": 13458366 | |
| }, | |
| { | |
| "epoch": 1.1752988047808766, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.5161062738075068e-05, | |
| "loss": 0.9404, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 74, | |
| "tokens_per_second_per_gpu": 5781.1, | |
| "total_tokens": 13642048 | |
| }, | |
| { | |
| "epoch": 1.1912350597609562, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.0273, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 75, | |
| "tokens_per_second_per_gpu": 5754.51, | |
| "total_tokens": 13816573 | |
| }, | |
| { | |
| "epoch": 1.207171314741036, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 1.4837188871052399e-05, | |
| "loss": 0.999, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 76, | |
| "tokens_per_second_per_gpu": 5745.16, | |
| "total_tokens": 13997690 | |
| }, | |
| { | |
| "epoch": 1.2231075697211156, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 1.4672686282730622e-05, | |
| "loss": 0.9365, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 77, | |
| "tokens_per_second_per_gpu": 6187.18, | |
| "total_tokens": 14191031 | |
| }, | |
| { | |
| "epoch": 1.2390438247011952, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 1.4506549757999456e-05, | |
| "loss": 0.9932, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 78, | |
| "tokens_per_second_per_gpu": 6189.26, | |
| "total_tokens": 14381270 | |
| }, | |
| { | |
| "epoch": 1.254980079681275, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 1.4338837391175582e-05, | |
| "loss": 0.9253, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 79, | |
| "tokens_per_second_per_gpu": 5694.92, | |
| "total_tokens": 14569147 | |
| }, | |
| { | |
| "epoch": 1.2709163346613546, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 1.4169607827613284e-05, | |
| "loss": 1.0249, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 5574.13, | |
| "total_tokens": 14749250 | |
| }, | |
| { | |
| "epoch": 1.2868525896414342, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 1.3998920243197408e-05, | |
| "loss": 1.0044, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 81, | |
| "tokens_per_second_per_gpu": 5892.86, | |
| "total_tokens": 14932957 | |
| }, | |
| { | |
| "epoch": 1.302788844621514, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 1.3826834323650899e-05, | |
| "loss": 0.9443, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 82, | |
| "tokens_per_second_per_gpu": 6084.92, | |
| "total_tokens": 15127169 | |
| }, | |
| { | |
| "epoch": 1.3187250996015936, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.3653410243663953e-05, | |
| "loss": 0.9878, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 83, | |
| "tokens_per_second_per_gpu": 5984.25, | |
| "total_tokens": 15311601 | |
| }, | |
| { | |
| "epoch": 1.3346613545816732, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 1.3478708645852272e-05, | |
| "loss": 0.9248, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 84, | |
| "tokens_per_second_per_gpu": 5744.73, | |
| "total_tokens": 15491400 | |
| }, | |
| { | |
| "epoch": 1.3505976095617531, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.3302790619551673e-05, | |
| "loss": 0.9824, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 85, | |
| "tokens_per_second_per_gpu": 6009.5, | |
| "total_tokens": 15674583 | |
| }, | |
| { | |
| "epoch": 1.3665338645418328, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 1.3125717679456447e-05, | |
| "loss": 0.9404, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 86, | |
| "tokens_per_second_per_gpu": 5690.82, | |
| "total_tokens": 15861985 | |
| }, | |
| { | |
| "epoch": 1.3824701195219125, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 1.2947551744109044e-05, | |
| "loss": 0.9731, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 87, | |
| "tokens_per_second_per_gpu": 5962.66, | |
| "total_tokens": 16048780 | |
| }, | |
| { | |
| "epoch": 1.3984063745019921, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 1.2768355114248493e-05, | |
| "loss": 0.8406, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 88, | |
| "tokens_per_second_per_gpu": 5796.13, | |
| "total_tokens": 16243977 | |
| }, | |
| { | |
| "epoch": 1.4143426294820718, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 1.2588190451025209e-05, | |
| "loss": 0.9692, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 89, | |
| "tokens_per_second_per_gpu": 5748.01, | |
| "total_tokens": 16427123 | |
| }, | |
| { | |
| "epoch": 1.4302788844621515, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 1.2407120754089733e-05, | |
| "loss": 0.998, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 5897.24, | |
| "total_tokens": 16609176 | |
| }, | |
| { | |
| "epoch": 1.4462151394422311, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.9507, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 91, | |
| "tokens_per_second_per_gpu": 5936.81, | |
| "total_tokens": 16795109 | |
| }, | |
| { | |
| "epoch": 1.4621513944223108, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.2042519817896805e-05, | |
| "loss": 0.9912, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 92, | |
| "tokens_per_second_per_gpu": 5949.43, | |
| "total_tokens": 16980935 | |
| }, | |
| { | |
| "epoch": 1.4780876494023905, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 1.1859116071629148e-05, | |
| "loss": 0.9888, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 93, | |
| "tokens_per_second_per_gpu": 6095.07, | |
| "total_tokens": 17167869 | |
| }, | |
| { | |
| "epoch": 1.4940239043824701, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.1675062233047365e-05, | |
| "loss": 0.9219, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 94, | |
| "tokens_per_second_per_gpu": 6067.77, | |
| "total_tokens": 17351541 | |
| }, | |
| { | |
| "epoch": 1.5099601593625498, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 1.1490422661761744e-05, | |
| "loss": 0.9648, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 95, | |
| "tokens_per_second_per_gpu": 6008.67, | |
| "total_tokens": 17537891 | |
| }, | |
| { | |
| "epoch": 1.5258964143426295, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.130526192220052e-05, | |
| "loss": 0.9556, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 96, | |
| "tokens_per_second_per_gpu": 5955.34, | |
| "total_tokens": 17721410 | |
| }, | |
| { | |
| "epoch": 1.5418326693227091, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 1.1119644761033079e-05, | |
| "loss": 0.9951, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 97, | |
| "tokens_per_second_per_gpu": 5732.24, | |
| "total_tokens": 17897961 | |
| }, | |
| { | |
| "epoch": 1.5577689243027888, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 1.0933636084529507e-05, | |
| "loss": 1.02, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 98, | |
| "tokens_per_second_per_gpu": 5813.99, | |
| "total_tokens": 18080875 | |
| }, | |
| { | |
| "epoch": 1.5737051792828685, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 1.0747300935864245e-05, | |
| "loss": 0.958, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 99, | |
| "tokens_per_second_per_gpu": 5769.52, | |
| "total_tokens": 18265611 | |
| }, | |
| { | |
| "epoch": 1.5896414342629481, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.0560704472371919e-05, | |
| "loss": 0.9561, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 5726.38, | |
| "total_tokens": 18449842 | |
| }, | |
| { | |
| "epoch": 1.6055776892430278, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.037391194276326e-05, | |
| "loss": 0.9707, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 101, | |
| "tokens_per_second_per_gpu": 5944.93, | |
| "total_tokens": 18633072 | |
| }, | |
| { | |
| "epoch": 1.6215139442231075, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 1.0186988664309023e-05, | |
| "loss": 0.9707, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 102, | |
| "tokens_per_second_per_gpu": 5897.0, | |
| "total_tokens": 18815120 | |
| }, | |
| { | |
| "epoch": 1.6374501992031871, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1e-05, | |
| "loss": 0.9385, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 103, | |
| "tokens_per_second_per_gpu": 5841.82, | |
| "total_tokens": 19001413 | |
| }, | |
| { | |
| "epoch": 1.6533864541832668, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 9.81301133569098e-06, | |
| "loss": 1.0303, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 104, | |
| "tokens_per_second_per_gpu": 5567.16, | |
| "total_tokens": 19177822 | |
| }, | |
| { | |
| "epoch": 1.6693227091633465, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 9.626088057236745e-06, | |
| "loss": 0.9814, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 105, | |
| "tokens_per_second_per_gpu": 5850.82, | |
| "total_tokens": 19364145 | |
| }, | |
| { | |
| "epoch": 1.6852589641434261, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 9.439295527628083e-06, | |
| "loss": 0.9531, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 106, | |
| "tokens_per_second_per_gpu": 6142.09, | |
| "total_tokens": 19554154 | |
| }, | |
| { | |
| "epoch": 1.701195219123506, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 9.252699064135759e-06, | |
| "loss": 0.998, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 107, | |
| "tokens_per_second_per_gpu": 6059.46, | |
| "total_tokens": 19736167 | |
| }, | |
| { | |
| "epoch": 1.7171314741035857, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 9.066363915470494e-06, | |
| "loss": 0.9204, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 108, | |
| "tokens_per_second_per_gpu": 5807.52, | |
| "total_tokens": 19928597 | |
| }, | |
| { | |
| "epoch": 1.7330677290836654, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 8.880355238966923e-06, | |
| "loss": 0.978, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 109, | |
| "tokens_per_second_per_gpu": 5987.33, | |
| "total_tokens": 20118188 | |
| }, | |
| { | |
| "epoch": 1.749003984063745, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 8.694738077799487e-06, | |
| "loss": 0.9702, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 5888.18, | |
| "total_tokens": 20301178 | |
| }, | |
| { | |
| "epoch": 1.7649402390438247, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 8.509577338238255e-06, | |
| "loss": 0.9253, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 111, | |
| "tokens_per_second_per_gpu": 5972.68, | |
| "total_tokens": 20487285 | |
| }, | |
| { | |
| "epoch": 1.7808764940239044, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 8.324937766952638e-06, | |
| "loss": 0.9814, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 112, | |
| "tokens_per_second_per_gpu": 5932.16, | |
| "total_tokens": 20674769 | |
| }, | |
| { | |
| "epoch": 1.796812749003984, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 8.140883928370855e-06, | |
| "loss": 1.0088, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 113, | |
| "tokens_per_second_per_gpu": 5830.81, | |
| "total_tokens": 20853823 | |
| }, | |
| { | |
| "epoch": 1.812749003984064, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 7.957480182103198e-06, | |
| "loss": 0.9487, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 114, | |
| "tokens_per_second_per_gpu": 5865.22, | |
| "total_tokens": 21037578 | |
| }, | |
| { | |
| "epoch": 1.8286852589641436, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 7.774790660436857e-06, | |
| "loss": 0.9819, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 115, | |
| "tokens_per_second_per_gpu": 6252.84, | |
| "total_tokens": 21225336 | |
| }, | |
| { | |
| "epoch": 1.8446215139442232, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 7.592879245910273e-06, | |
| "loss": 0.9482, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 116, | |
| "tokens_per_second_per_gpu": 6223.23, | |
| "total_tokens": 21416652 | |
| }, | |
| { | |
| "epoch": 1.860557768924303, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 7.411809548974792e-06, | |
| "loss": 0.9697, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 117, | |
| "tokens_per_second_per_gpu": 5561.76, | |
| "total_tokens": 21604434 | |
| }, | |
| { | |
| "epoch": 1.8764940239043826, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 7.2316448857515076e-06, | |
| "loss": 0.9468, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 118, | |
| "tokens_per_second_per_gpu": 6026.88, | |
| "total_tokens": 21795820 | |
| }, | |
| { | |
| "epoch": 1.8924302788844622, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 7.052448255890958e-06, | |
| "loss": 0.9624, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 119, | |
| "tokens_per_second_per_gpu": 5817.1, | |
| "total_tokens": 21979166 | |
| }, | |
| { | |
| "epoch": 1.908366533864542, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.874282320543557e-06, | |
| "loss": 1.022, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 120, | |
| "tokens_per_second_per_gpu": 5653.71, | |
| "total_tokens": 22165661 | |
| }, | |
| { | |
| "epoch": 1.9243027888446216, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 6.697209380448333e-06, | |
| "loss": 0.9961, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 121, | |
| "tokens_per_second_per_gpu": 5699.1, | |
| "total_tokens": 22344204 | |
| }, | |
| { | |
| "epoch": 1.9402390438247012, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.521291354147727e-06, | |
| "loss": 0.9521, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 122, | |
| "tokens_per_second_per_gpu": 5765.79, | |
| "total_tokens": 22526884 | |
| }, | |
| { | |
| "epoch": 1.956175298804781, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 6.34658975633605e-06, | |
| "loss": 1.0171, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 123, | |
| "tokens_per_second_per_gpu": 5780.38, | |
| "total_tokens": 22706300 | |
| }, | |
| { | |
| "epoch": 1.9721115537848606, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 6.173165676349103e-06, | |
| "loss": 0.957, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 124, | |
| "tokens_per_second_per_gpu": 5399.07, | |
| "total_tokens": 22884311 | |
| }, | |
| { | |
| "epoch": 1.9880478087649402, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 6.001079756802592e-06, | |
| "loss": 0.9028, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 125, | |
| "tokens_per_second_per_gpu": 5850.46, | |
| "total_tokens": 23077450 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 5.830392172386723e-06, | |
| "loss": 0.8589, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 126, | |
| "tokens_per_second_per_gpu": 4564.2, | |
| "total_tokens": 23201118 | |
| }, | |
| { | |
| "epoch": 2.0159362549800797, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 5.66116260882442e-06, | |
| "loss": 0.9985, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 127, | |
| "tokens_per_second_per_gpu": 5832.26, | |
| "total_tokens": 23381636 | |
| }, | |
| { | |
| "epoch": 2.0318725099601593, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 5.493450242000546e-06, | |
| "loss": 0.9521, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 128, | |
| "tokens_per_second_per_gpu": 5774.6, | |
| "total_tokens": 23564875 | |
| }, | |
| { | |
| "epoch": 2.047808764940239, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 5.32731371726938e-06, | |
| "loss": 0.98, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 129, | |
| "tokens_per_second_per_gpu": 5972.38, | |
| "total_tokens": 23759161 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 5.1628111289476025e-06, | |
| "loss": 0.9746, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 130, | |
| "tokens_per_second_per_gpu": 5919.54, | |
| "total_tokens": 23944394 | |
| }, | |
| { | |
| "epoch": 2.0796812749003983, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.9229, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 131, | |
| "tokens_per_second_per_gpu": 5414.05, | |
| "total_tokens": 24130879 | |
| }, | |
| { | |
| "epoch": 2.095617529880478, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 4.838937261924933e-06, | |
| "loss": 0.9639, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 132, | |
| "tokens_per_second_per_gpu": 5968.88, | |
| "total_tokens": 24319434 | |
| }, | |
| { | |
| "epoch": 2.1115537848605577, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 4.679679234846636e-06, | |
| "loss": 0.9502, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 133, | |
| "tokens_per_second_per_gpu": 5802.86, | |
| "total_tokens": 24502733 | |
| }, | |
| { | |
| "epoch": 2.1274900398406373, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.522281607821288e-06, | |
| "loss": 0.9854, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 134, | |
| "tokens_per_second_per_gpu": 5970.96, | |
| "total_tokens": 24691592 | |
| }, | |
| { | |
| "epoch": 2.143426294820717, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 4.3667994193637794e-06, | |
| "loss": 0.9683, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 135, | |
| "tokens_per_second_per_gpu": 5528.1, | |
| "total_tokens": 24868694 | |
| }, | |
| { | |
| "epoch": 2.1593625498007967, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.213287038201943e-06, | |
| "loss": 0.9561, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 136, | |
| "tokens_per_second_per_gpu": 6105.7, | |
| "total_tokens": 25058925 | |
| }, | |
| { | |
| "epoch": 2.1752988047808763, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.061798144264986e-06, | |
| "loss": 0.9116, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 137, | |
| "tokens_per_second_per_gpu": 5771.48, | |
| "total_tokens": 25242607 | |
| }, | |
| { | |
| "epoch": 2.191235059760956, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 3.912385709912794e-06, | |
| "loss": 0.9966, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 138, | |
| "tokens_per_second_per_gpu": 5723.02, | |
| "total_tokens": 25417132 | |
| }, | |
| { | |
| "epoch": 2.2071713147410357, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 3.7651019814126656e-06, | |
| "loss": 0.9712, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 139, | |
| "tokens_per_second_per_gpu": 5739.14, | |
| "total_tokens": 25598249 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.619998460669916e-06, | |
| "loss": 0.9106, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 140, | |
| "tokens_per_second_per_gpu": 6168.5, | |
| "total_tokens": 25791590 | |
| }, | |
| { | |
| "epoch": 2.239043824701195, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 3.4771258872187917e-06, | |
| "loss": 0.9673, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 141, | |
| "tokens_per_second_per_gpu": 6156.05, | |
| "total_tokens": 25981829 | |
| }, | |
| { | |
| "epoch": 2.2549800796812747, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 3.3365342204799613e-06, | |
| "loss": 0.9019, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 142, | |
| "tokens_per_second_per_gpu": 5766.33, | |
| "total_tokens": 26169706 | |
| }, | |
| { | |
| "epoch": 2.2709163346613543, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 3.1982726222908046e-06, | |
| "loss": 0.9995, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 143, | |
| "tokens_per_second_per_gpu": 5566.19, | |
| "total_tokens": 26349809 | |
| }, | |
| { | |
| "epoch": 2.2868525896414345, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 3.0623894397145837e-06, | |
| "loss": 0.9805, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 144, | |
| "tokens_per_second_per_gpu": 5897.52, | |
| "total_tokens": 26533516 | |
| }, | |
| { | |
| "epoch": 2.302788844621514, | |
| "grad_norm": 0.375, | |
| "learning_rate": 2.9289321881345257e-06, | |
| "loss": 0.9219, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 145, | |
| "tokens_per_second_per_gpu": 6065.68, | |
| "total_tokens": 26727728 | |
| }, | |
| { | |
| "epoch": 2.318725099601594, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 2.7979475346387363e-06, | |
| "loss": 0.9639, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 146, | |
| "tokens_per_second_per_gpu": 5976.74, | |
| "total_tokens": 26912160 | |
| }, | |
| { | |
| "epoch": 2.3346613545816735, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 2.669481281701739e-06, | |
| "loss": 0.9038, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 147, | |
| "tokens_per_second_per_gpu": 5726.82, | |
| "total_tokens": 27091959 | |
| }, | |
| { | |
| "epoch": 2.350597609561753, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 2.5435783511683444e-06, | |
| "loss": 0.9614, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 148, | |
| "tokens_per_second_per_gpu": 5973.36, | |
| "total_tokens": 27275142 | |
| }, | |
| { | |
| "epoch": 2.366533864541833, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 2.420282768545469e-06, | |
| "loss": 0.9219, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 149, | |
| "tokens_per_second_per_gpu": 5654.39, | |
| "total_tokens": 27462544 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 2.2996376476073724e-06, | |
| "loss": 0.9526, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 150, | |
| "tokens_per_second_per_gpu": 5952.85, | |
| "total_tokens": 27649339 | |
| }, | |
| { | |
| "epoch": 2.398406374501992, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 2.1816851753197023e-06, | |
| "loss": 0.8235, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 151, | |
| "tokens_per_second_per_gpu": 5837.21, | |
| "total_tokens": 27844536 | |
| }, | |
| { | |
| "epoch": 2.414342629482072, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 2.0664665970876496e-06, | |
| "loss": 0.9521, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 152, | |
| "tokens_per_second_per_gpu": 5754.33, | |
| "total_tokens": 28027682 | |
| }, | |
| { | |
| "epoch": 2.4302788844621515, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.9540222023333165e-06, | |
| "loss": 0.9805, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 153, | |
| "tokens_per_second_per_gpu": 5902.39, | |
| "total_tokens": 28209735 | |
| }, | |
| { | |
| "epoch": 2.446215139442231, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 1.8443913104073984e-06, | |
| "loss": 0.9321, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 154, | |
| "tokens_per_second_per_gpu": 5930.45, | |
| "total_tokens": 28395668 | |
| }, | |
| { | |
| "epoch": 2.462151394422311, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 1.7376122568400533e-06, | |
| "loss": 0.9756, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 155, | |
| "tokens_per_second_per_gpu": 5945.66, | |
| "total_tokens": 28581494 | |
| }, | |
| { | |
| "epoch": 2.4780876494023905, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.6337223799358025e-06, | |
| "loss": 0.9736, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 156, | |
| "tokens_per_second_per_gpu": 6107.11, | |
| "total_tokens": 28768428 | |
| }, | |
| { | |
| "epoch": 2.49402390438247, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 1.5327580077171589e-06, | |
| "loss": 0.9067, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 157, | |
| "tokens_per_second_per_gpu": 6059.61, | |
| "total_tokens": 28952100 | |
| }, | |
| { | |
| "epoch": 2.50996015936255, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.4347544452214869e-06, | |
| "loss": 0.9512, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 158, | |
| "tokens_per_second_per_gpu": 6010.47, | |
| "total_tokens": 29138450 | |
| }, | |
| { | |
| "epoch": 2.5258964143426295, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 1.339745962155613e-06, | |
| "loss": 0.9409, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 159, | |
| "tokens_per_second_per_gpu": 6038.89, | |
| "total_tokens": 29321969 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 1.2477657809124632e-06, | |
| "loss": 0.9824, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 160, | |
| "tokens_per_second_per_gpu": 5740.71, | |
| "total_tokens": 29498520 | |
| }, | |
| { | |
| "epoch": 2.557768924302789, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.1588460649539036e-06, | |
| "loss": 1.0068, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 161, | |
| "tokens_per_second_per_gpu": 5804.04, | |
| "total_tokens": 29681434 | |
| }, | |
| { | |
| "epoch": 2.5737051792828685, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 1.073017907563887e-06, | |
| "loss": 0.9453, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 162, | |
| "tokens_per_second_per_gpu": 5741.44, | |
| "total_tokens": 29866170 | |
| }, | |
| { | |
| "epoch": 2.589641434262948, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 9.903113209758098e-07, | |
| "loss": 0.9443, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 163, | |
| "tokens_per_second_per_gpu": 5756.27, | |
| "total_tokens": 30050401 | |
| }, | |
| { | |
| "epoch": 2.605577689243028, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 9.107552258778907e-07, | |
| "loss": 0.9585, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 164, | |
| "tokens_per_second_per_gpu": 5959.88, | |
| "total_tokens": 30233631 | |
| }, | |
| { | |
| "epoch": 2.6215139442231075, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 8.343774413002382e-07, | |
| "loss": 0.9604, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 165, | |
| "tokens_per_second_per_gpu": 5899.57, | |
| "total_tokens": 30415679 | |
| }, | |
| { | |
| "epoch": 2.637450199203187, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 7.612046748871327e-07, | |
| "loss": 0.9277, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 166, | |
| "tokens_per_second_per_gpu": 5845.3, | |
| "total_tokens": 30601972 | |
| }, | |
| { | |
| "epoch": 2.653386454183267, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.912625135579587e-07, | |
| "loss": 1.022, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 167, | |
| "tokens_per_second_per_gpu": 5574.11, | |
| "total_tokens": 30778381 | |
| }, | |
| { | |
| "epoch": 2.6693227091633465, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 6.245754145600091e-07, | |
| "loss": 0.9707, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 168, | |
| "tokens_per_second_per_gpu": 5930.11, | |
| "total_tokens": 30964704 | |
| }, | |
| { | |
| "epoch": 2.685258964143426, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 5.611666969163243e-07, | |
| "loss": 0.9448, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 169, | |
| "tokens_per_second_per_gpu": 6128.63, | |
| "total_tokens": 31154713 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 5.010585332715401e-07, | |
| "loss": 0.9883, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 170, | |
| "tokens_per_second_per_gpu": 6064.77, | |
| "total_tokens": 31336726 | |
| }, | |
| { | |
| "epoch": 2.717131474103586, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 4.4427194213859216e-07, | |
| "loss": 0.9131, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 171, | |
| "tokens_per_second_per_gpu": 5798.36, | |
| "total_tokens": 31529156 | |
| }, | |
| { | |
| "epoch": 2.7330677290836656, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 3.908267805490051e-07, | |
| "loss": 0.9697, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 172, | |
| "tokens_per_second_per_gpu": 5977.29, | |
| "total_tokens": 31718747 | |
| }, | |
| { | |
| "epoch": 2.7490039840637452, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 3.4074173710931804e-07, | |
| "loss": 0.9619, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 173, | |
| "tokens_per_second_per_gpu": 5934.4, | |
| "total_tokens": 31901737 | |
| }, | |
| { | |
| "epoch": 2.764940239043825, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 2.940343254660905e-07, | |
| "loss": 0.9185, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 174, | |
| "tokens_per_second_per_gpu": 5978.89, | |
| "total_tokens": 32087844 | |
| }, | |
| { | |
| "epoch": 2.7808764940239046, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 2.507208781817638e-07, | |
| "loss": 0.9751, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 175, | |
| "tokens_per_second_per_gpu": 5946.29, | |
| "total_tokens": 32275328 | |
| }, | |
| { | |
| "epoch": 2.7968127490039842, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 2.1081654102351634e-07, | |
| "loss": 1.0015, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 176, | |
| "tokens_per_second_per_gpu": 5871.96, | |
| "total_tokens": 32454382 | |
| }, | |
| { | |
| "epoch": 2.812749003984064, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 1.7433526766711727e-07, | |
| "loss": 0.9429, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 177, | |
| "tokens_per_second_per_gpu": 5872.24, | |
| "total_tokens": 32638137 | |
| }, | |
| { | |
| "epoch": 2.8286852589641436, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 1.4128981481764115e-07, | |
| "loss": 0.9746, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 178, | |
| "tokens_per_second_per_gpu": 6173.8, | |
| "total_tokens": 32825895 | |
| }, | |
| { | |
| "epoch": 2.8446215139442232, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.1169173774871478e-07, | |
| "loss": 0.9434, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 179, | |
| "tokens_per_second_per_gpu": 6232.85, | |
| "total_tokens": 33017211 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 8.555138626189619e-08, | |
| "loss": 0.9644, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 180, | |
| "tokens_per_second_per_gpu": 5578.64, | |
| "total_tokens": 33204993 | |
| }, | |
| { | |
| "epoch": 2.8764940239043826, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 6.287790106757396e-08, | |
| "loss": 0.9429, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 181, | |
| "tokens_per_second_per_gpu": 6034.66, | |
| "total_tokens": 33396379 | |
| }, | |
| { | |
| "epoch": 2.8924302788844622, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.367921058866187e-08, | |
| "loss": 0.959, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 182, | |
| "tokens_per_second_per_gpu": 5841.56, | |
| "total_tokens": 33579725 | |
| }, | |
| { | |
| "epoch": 2.908366533864542, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 2.796202818819871e-08, | |
| "loss": 1.0166, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 183, | |
| "tokens_per_second_per_gpu": 5736.29, | |
| "total_tokens": 33766220 | |
| }, | |
| { | |
| "epoch": 2.9243027888446216, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.5731849821833955e-08, | |
| "loss": 0.9907, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 184, | |
| "tokens_per_second_per_gpu": 5704.78, | |
| "total_tokens": 33944763 | |
| }, | |
| { | |
| "epoch": 2.9402390438247012, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 6.992952116013918e-09, | |
| "loss": 0.9478, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 185, | |
| "tokens_per_second_per_gpu": 5773.31, | |
| "total_tokens": 34127443 | |
| }, | |
| { | |
| "epoch": 2.956175298804781, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 1.7483908725357546e-09, | |
| "loss": 1.0122, | |
| "memory/device_reserved (GiB)": 76.38, | |
| "memory/max_active (GiB)": 64.91, | |
| "memory/max_allocated (GiB)": 64.91, | |
| "step": 186, | |
| "tokens_per_second_per_gpu": 5785.44, | |
| "total_tokens": 34306859 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 186, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 62, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2082055574021734e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |