diff --git "a/training.log" "b/training.log" new file mode 100644--- /dev/null +++ "b/training.log" @@ -0,0 +1,5836 @@ +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +[2023-04-21 23:35:43,549] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. +[2023-04-21 23:35:44,216] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static --data_split 2,4,4 --actor_model_name_or_path ../step1_supervised_finetuning/output/ --critic_model_name_or_path ../step2_reward_model_finetuning/output/ --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --disable_actor_dropout --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --enable_ema --output_dir ./output +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +[2023-04-21 23:35:47,889] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]} +[2023-04-21 23:35:47,889] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0 +[2023-04-21 23:35:47,889] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}) +[2023-04-21 23:35:47,889] [INFO] [launch.py:247:main] dist_world_size=8 +[2023-04-21 23:35:47,889] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +[2023-04-21 23:35:59,562] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec) +Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec) + 0%| | 0/2 [00:00 +[2023-04-21 23:36:57,099] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer +[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000 +[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000 +[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False +[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/utils/build.ninja... +Building extension module utils... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +ninja: no work to do. +Loading extension module utils... +Time to load utils op: 0.5721704959869385 seconds +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Time to load utils op: 0.6046469211578369 seconds +Time to load utils op: 0.60459303855896 seconds +Time to load utils op: 0.6040091514587402 seconds +Time to load utils op: 0.6041994094848633 secondsTime to load utils op: 0.6039597988128662 seconds + +Time to load utils op: 0.6039996147155762 seconds +Time to load utils op: 0.6041290760040283 seconds +Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 1 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 4 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 6 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 0 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 7 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 5 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0013031959533691406 seconds +Time to load utils op: 0.0010409355163574219 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0012707710266113281 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0009152889251708984 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0009307861328125 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0007898807525634766 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012035369873046875 seconds +[2023-04-21 23:37:08,253] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states +[2023-04-21 23:37:08,253] [INFO] [utils.py:786:see_memory_usage] MA 3.06 GB Max_MA 3.06 GB CA 3.07 GB Max_CA 3 GB +[2023-04-21 23:37:08,254] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 63.06 GB, percent = 6.3% +[2023-04-21 23:37:08,444] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states +[2023-04-21 23:37:08,445] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB Max_MA 4.91 GB CA 4.91 GB Max_CA 5 GB +[2023-04-21 23:37:08,445] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 63.06 GB, percent = 6.3% +[2023-04-21 23:37:08,446] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized +[2023-04-21 23:37:08,634] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer +[2023-04-21 23:37:08,634] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB Max_MA 4.29 GB CA 4.91 GB Max_CA 5 GB +[2023-04-21 23:37:08,635] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 63.06 GB, percent = 6.3% +[2023-04-21 23:37:08,636] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +[2023-04-21 23:37:08,637] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler +[2023-04-21 23:37:08,637] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = +[2023-04-21 23:37:08,637] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:37:08,637] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] comms_config ................. +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1} +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-21 23:37:08,638] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] hybrid_engine ................ enabled=True max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] zero_enabled ................. True +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-21 23:37:08,639] [INFO] [config.py:957:print] zero_optimization_stage ...... 2 +[2023-04-21 23:37:08,640] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 2, + "offload_param": { + "device": "none" + }, + "offload_optimizer": { + "device": "none" + }, + "stage3_param_persistence_threshold": 1.000000e+04, + "stage3_max_live_parameters": 3.000000e+07, + "stage3_prefetch_bucket_size": 3.000000e+07, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true, + "loss_scale_window": 100 + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false, + "hybrid_engine": { + "enabled": true, + "max_out_tokens": 512, + "inference_tp_size": 1, + "release_inference_cache": false, + "pin_parameters": true, + "tp_gather_partition_size": 8 + } +} +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0008935928344726562 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Detected CUDA files, patching ldflags +Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja... +Building extension module transformer_inference... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +ninja: no work to do. +Loading extension module transformer_inference... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.695580244064331 seconds +Time to load transformer_inference op: 0.6461379528045654 seconds +[2023-04-21 23:37:09,607] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 2048, 'intermediate_size': 8192, 'heads': 32, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': , 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 512, 'min_out_tokens': 512, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': True, 'transposed_mode': True} +Loading extension module transformer_inference... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.7190039157867432 seconds +Time to load transformer_inference op: 0.7204358577728271 seconds +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.7205994129180908 seconds +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.722914457321167 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Loading extension module transformer_inference... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.7355391979217529 seconds +Time to load transformer_inference op: 0.657106876373291 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11342453956604004 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11234903335571289 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11516976356506348 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11846494674682617 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.13445663452148438 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.13429594039916992 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.12044620513916016 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.15472984313964844 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11486577987670898 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11190509796142578 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.1143035888671875 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.11675357818603516 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.12160348892211914 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.1185293197631836 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.13269996643066406 seconds +******************[end] Initialized Actor Model [end] (duration: 40.32s)****************** +*************************[start] Initializing Ref Model [start] ************************** +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.16194510459899902 seconds +[2023-04-21 23:37:27,313] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0013289451599121094 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012705326080322266 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012392997741699219 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012006759643554688 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012307167053222656 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012030601501464844 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012555122375488281 seconds +[2023-04-21 23:37:35,898] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2023-04-21 23:37:35,899] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] comms_config ................. +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... None +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-21 23:37:35,900] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-21 23:37:35,901] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] zero_enabled ................. False +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-21 23:37:35,902] [INFO] [config.py:957:print] zero_optimization_stage ...... 0 +[2023-04-21 23:37:35,902] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 0, + "stage3_param_persistence_threshold": 1.000000e+04, + "offload_param": { + "device": "none" + }, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false +} +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0016591548919677734 seconds +*******************[end] Initialized Ref Model [end] (duration: 25.47s)******************* +*************************[start] Initializing EMA Model [start] ************************** +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0015752315521240234 seconds +[2023-04-21 23:37:53,908] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0014162063598632812 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.011510372161865234 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012710094451904297 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012392997741699219 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001157522201538086 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0017504692077636719 seconds +[2023-04-21 23:37:59,716] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2023-04-21 23:37:59,717] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-21 23:37:59,717] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] comms_config ................. +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... None +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-21 23:37:59,718] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] zero_enabled ................. False +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-21 23:37:59,719] [INFO] [config.py:957:print] zero_optimization_stage ...... 0 +[2023-04-21 23:37:59,720] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 0, + "stage3_param_persistence_threshold": 1.000000e+04, + "offload_param": { + "device": "none" + }, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false +} +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0014257431030273438 seconds +*******************[end] Initialized EMA Model [end] (duration: 23.82s)******************* +************************[start] Initializing Critic Model [start] ************************ +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0015196800231933594 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0009224414825439453 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.006412982940673828 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0021407604217529297 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0017254352569580078 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.002038240432739258 seconds +[2023-04-21 23:38:09,597] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.002299070358276367 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0019500255584716797 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0018029212951660156 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.002105236053466797 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001987457275390625 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0010986328125 seconds +[2023-04-21 23:38:13,959] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2023-04-21 23:38:13,961] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +[2023-04-21 23:38:13,961] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer +[2023-04-21 23:38:13,976] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +[2023-04-21 23:38:13,976] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type= +[2023-04-21 23:38:13,976] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer +[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000 +[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000 +[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False +[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0016143321990966797 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.004909992218017578 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001100301742553711 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.00106048583984375 seconds +Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 0 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 3 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 5 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0012159347534179688 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils...Time to load utils op: 0.001161813735961914 seconds + +Time to load utils op: 0.0007848739624023438 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0008122920989990234 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0008475780487060547 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0010933876037597656 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012607574462890625 seconds +[2023-04-21 23:38:23,773] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states +[2023-04-21 23:38:23,774] [INFO] [utils.py:786:see_memory_usage] MA 10.55 GB Max_MA 10.55 GB CA 10.94 GB Max_CA 11 GB +[2023-04-21 23:38:23,774] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 89.44 GB, percent = 8.9% +[2023-04-21 23:38:23,968] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states +[2023-04-21 23:38:23,968] [INFO] [utils.py:786:see_memory_usage] MA 10.86 GB Max_MA 11.01 GB CA 11.41 GB Max_CA 11 GB +[2023-04-21 23:38:23,969] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 89.93 GB, percent = 8.9% +[2023-04-21 23:38:23,969] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized +[2023-04-21 23:38:24,165] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer +[2023-04-21 23:38:24,166] [INFO] [utils.py:786:see_memory_usage] MA 10.86 GB Max_MA 10.86 GB CA 11.41 GB Max_CA 11 GB +[2023-04-21 23:38:24,166] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 90.43 GB, percent = 9.0% +[2023-04-21 23:38:24,167] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +[2023-04-21 23:38:24,168] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler +[2023-04-21 23:38:24,168] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = +[2023-04-21 23:38:24,168] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:38:24,168] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-21 23:38:24,168] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-21 23:38:24,168] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-21 23:38:24,168] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] comms_config ................. +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1} +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-21 23:38:24,169] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] zero_enabled ................. True +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-21 23:38:24,170] [INFO] [config.py:957:print] zero_optimization_stage ...... 2 +[2023-04-21 23:38:24,170] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 2, + "offload_param": { + "device": "none" + }, + "offload_optimizer": { + "device": "none" + }, + "stage3_param_persistence_threshold": 1.000000e+04, + "stage3_max_live_parameters": 3.000000e+07, + "stage3_prefetch_bucket_size": 3.000000e+07, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true, + "loss_scale_window": 100 + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false, + "hybrid_engine": { + "enabled": false, + "max_out_tokens": 512, + "inference_tp_size": 1, + "release_inference_cache": false, + "pin_parameters": true, + "tp_gather_partition_size": 8 + } +} +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0009183883666992188 seconds +*****************[end] Initialized Critic Model [end] (duration: 24.45s)****************** +************************[start] Initializing Reward Model [start] ************************ +[2023-04-21 23:38:34,778] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0014276504516601562 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0013539791107177734 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012841224670410156 seconds +[2023-04-21 23:38:41,712] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2023-04-21 23:38:41,713] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-21 23:38:41,713] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-21 23:38:41,713] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-21 23:38:41,713] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-21 23:38:41,713] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] comms_config ................. +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... None +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-21 23:38:41,714] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] zero_enabled ................. False +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-21 23:38:41,715] [INFO] [config.py:957:print] zero_optimization_stage ...... 0 +[2023-04-21 23:38:41,715] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 0, + "stage3_param_persistence_threshold": 1.000000e+04, + "offload_param": { + "device": "none" + }, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false +} +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0010828971862792969 seconds +*****************[end] Initialized Reward Model [end] (duration: 17.55s)****************** +***** Running training ***** +Beginning of Epoch 1/1, Total Generation Batches 954 +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012369155883789062 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0009238719940185547 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0014069080352783203 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001169443130493164 seconds +------------------------------------------------------ +Free memory : 25.666199 (GigaBytes) +Total memory: 39.586121 (GigaBytes) +Requested memory: 0.515625 (GigaBytes) +Setting maximum total tokens (input + output) to 512 +WorkSpace: 0x7fa71e000000 +------------------------------------------------------ +[2023-04-21 23:38:45,582] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-21 23:38:45,666] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 0|ppo_ep: 1|act_loss: 0.231689453125|cri_loss: 0.1451416015625|unsuper_loss: 0.0 +average reward score: -3.01171875 +------------------------------------------------------------------------------------- +|E2E latency=3.51s |Gather latency=0.00s (0.00%) |Generate time=2.68s (76.22%) |Training time=0.74s (20.94%) |Others=0.10 (2.84%)|CurSamplesPerSec=9.12 |AvgSamplesPerSec=9.12 +[2023-04-21 23:38:47,689] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-21 23:38:47,773] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 1|ppo_ep: 1|act_loss: 0.09918212890625|cri_loss: 0.07025146484375|unsuper_loss: 0.0 +average reward score: -3.490234375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.24%) |Training time=0.38s (18.05%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=11.39 +[2023-04-21 23:38:49,800] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-21 23:38:49,886] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 2|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.027374267578125|unsuper_loss: 0.0 +average reward score: -3.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.07%) |Training time=0.38s (18.14%) |Others=0.10 (4.79%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=12.42 +epoch: 0|step: 3|ppo_ep: 1|act_loss: 0.25341796875|cri_loss: 0.15380859375|unsuper_loss: 0.0 +average reward score: -2.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.43s (19.89%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=12.94 +epoch: 0|step: 4|ppo_ep: 1|act_loss: -0.09210205078125|cri_loss: 0.01593017578125|unsuper_loss: 0.0 +average reward score: -2.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.26%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=13.28 +[2023-04-21 23:38:56,233] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 5|ppo_ep: 1|act_loss: 0.1788330078125|cri_loss: 0.107421875|unsuper_loss: 0.0 +average reward score: -3.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.81%) |Training time=0.38s (18.02%) |Others=0.11 (5.17%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=13.55 +[2023-04-21 23:38:58,358] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 6|ppo_ep: 1|act_loss: 0.1064453125|cri_loss: 0.06024169921875|unsuper_loss: 0.0 +average reward score: -3.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.57%) |Training time=0.39s (18.15%) |Others=0.11 (5.28%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=13.75 +epoch: 0|step: 7|ppo_ep: 1|act_loss: 0.1566162109375|cri_loss: 0.09619140625|unsuper_loss: 0.0 +average reward score: -3.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.86%) |Training time=0.41s (17.43%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=13.74 +epoch: 0|step: 8|ppo_ep: 1|act_loss: 0.1761474609375|cri_loss: 0.1156005859375|unsuper_loss: 0.0 +average reward score: -3.322265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.67%) |Training time=0.41s (19.15%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=13.86 +[2023-04-21 23:39:04,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=5, lr=[4.825e-07, 4.825e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:39:05,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=10/global_step=10, RunningAvgSamplesPerSec=133.058818951636, CurrSamplesPerSec=130.0445968859305, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:39:05,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=3, lr=[3.5000000000000004e-07, 3.5000000000000004e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 9|ppo_ep: 1|act_loss: 0.084228515625|cri_loss: 0.0743408203125|unsuper_loss: 0.0 +average reward score: -3.283203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.07%) |Training time=0.41s (18.88%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=13.94 +epoch: 0|step: 10|ppo_ep: 1|act_loss: 0.1031494140625|cri_loss: 0.065673828125|unsuper_loss: 0.0 +average reward score: -3.314453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.88%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.02 +epoch: 0|step: 11|ppo_ep: 1|act_loss: 0.2239990234375|cri_loss: 0.1685791015625|unsuper_loss: 0.0 +average reward score: -3.419921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.99%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.09 +[2023-04-21 23:39:11,553] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 12|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: 0.102783203125|unsuper_loss: 0.0 +average reward score: -3.212890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.22%) |Training time=0.41s (18.96%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.15 +epoch: 0|step: 13|ppo_ep: 1|act_loss: -0.0860595703125|cri_loss: -0.029083251953125|unsuper_loss: 0.0 +average reward score: -2.947265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.39%) |Training time=0.40s (18.51%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.20 +epoch: 0|step: 14|ppo_ep: 1|act_loss: -0.11102294921875|cri_loss: -0.04864501953125|unsuper_loss: 0.0 +average reward score: -3.197265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.90%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.24 +epoch: 0|step: 15|ppo_ep: 1|act_loss: -0.282958984375|cri_loss: -0.09857177734375|unsuper_loss: 0.0 +average reward score: -2.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.79%) |Training time=0.41s (19.12%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.28 +epoch: 0|step: 16|ppo_ep: 1|act_loss: 0.0048980712890625|cri_loss: 0.016021728515625|unsuper_loss: 0.0 +average reward score: -3.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.93%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.31 +epoch: 0|step: 17|ppo_ep: 1|act_loss: -0.2861328125|cri_loss: -0.10845947265625|unsuper_loss: 0.0 +average reward score: -3.591796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.95%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.34 +epoch: 0|step: 18|ppo_ep: 1|act_loss: -0.137939453125|cri_loss: -0.045623779296875|unsuper_loss: 0.0 +average reward score: -2.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.21%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.37 +[2023-04-21 23:39:26,503] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=5, lr=[1.4475000000000001e-06, 1.4475000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:39:26,521] [INFO] [timer.py:199:stop] epoch=0/micro_step=20/global_step=20, RunningAvgSamplesPerSec=131.81286093572152, CurrSamplesPerSec=129.76547456758612, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:39:26,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=4, lr=[8.000000000000001e-07, 8.000000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 19|ppo_ep: 1|act_loss: 0.454345703125|cri_loss: 0.290771484375|unsuper_loss: 0.0 +average reward score: -2.740234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.04%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.39 +[2023-04-21 23:39:28,643] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 20|ppo_ep: 1|act_loss: 0.38330078125|cri_loss: 0.247802734375|unsuper_loss: 0.0 +average reward score: -3.064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.93%) |Training time=0.38s (17.78%) |Others=0.11 (5.29%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.43 +epoch: 0|step: 21|ppo_ep: 1|act_loss: 0.45361328125|cri_loss: 0.27490234375|unsuper_loss: 0.0 +average reward score: -3.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.67%) |Training time=0.41s (19.10%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 22|ppo_ep: 1|act_loss: 0.40283203125|cri_loss: 0.27197265625|unsuper_loss: 0.0 +average reward score: -2.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.82%) |Training time=0.40s (17.36%) |Others=0.11 (4.82%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 23|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0041961669921875|unsuper_loss: 0.0 +average reward score: -2.962890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.95%) |Training time=0.41s (18.92%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 24|ppo_ep: 1|act_loss: 0.2421875|cri_loss: 0.1334228515625|unsuper_loss: 0.0 +average reward score: -3.490234375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.80%) |Training time=0.41s (18.25%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.41 +epoch: 0|step: 25|ppo_ep: 1|act_loss: 0.144775390625|cri_loss: 0.0914306640625|unsuper_loss: 0.0 +average reward score: -2.775390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43 +epoch: 0|step: 26|ppo_ep: 1|act_loss: -0.08734130859375|cri_loss: -0.032958984375|unsuper_loss: 0.0 +average reward score: -2.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.85%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 27|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.01959228515625|unsuper_loss: 0.0 +average reward score: -2.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 28|ppo_ep: 1|act_loss: -0.266357421875|cri_loss: -0.11358642578125|unsuper_loss: 0.0 +average reward score: -2.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.10%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48 +[2023-04-21 23:39:48,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=6, lr=[2.316e-06, 2.316e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:39:48,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=30/global_step=30, RunningAvgSamplesPerSec=131.6430024413581, CurrSamplesPerSec=127.75158169262765, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:39:48,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[1.3e-06, 1.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 29|ppo_ep: 1|act_loss: -0.201171875|cri_loss: -0.0841064453125|unsuper_loss: 0.0 +average reward score: -2.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.59%) |Training time=0.41s (19.23%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 30|ppo_ep: 1|act_loss: -0.0819091796875|cri_loss: -0.03509521484375|unsuper_loss: 0.0 +average reward score: -2.072265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.31%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 31|ppo_ep: 1|act_loss: -0.12060546875|cri_loss: -0.043487548828125|unsuper_loss: 0.0 +average reward score: -2.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.02%) |Training time=0.40s (18.85%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 32|ppo_ep: 1|act_loss: -0.11376953125|cri_loss: -0.01654052734375|unsuper_loss: 0.0 +average reward score: -1.240234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.67%) |Training time=0.41s (19.15%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.53 +epoch: 0|step: 33|ppo_ep: 1|act_loss: -0.05419921875|cri_loss: -0.012725830078125|unsuper_loss: 0.0 +average reward score: -1.427734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 34|ppo_ep: 1|act_loss: 0.1273193359375|cri_loss: 0.07110595703125|unsuper_loss: 0.0 +average reward score: -2.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.04%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 35|ppo_ep: 1|act_loss: 0.1138916015625|cri_loss: 0.06243896484375|unsuper_loss: 0.0 +average reward score: -2.654296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +[2023-04-21 23:40:03,431] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 36|ppo_ep: 1|act_loss: 0.059783935546875|cri_loss: 0.037017822265625|unsuper_loss: 0.0 +average reward score: -2.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.05%) |Training time=0.41s (19.15%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 37|ppo_ep: 1|act_loss: 0.019195556640625|cri_loss: 0.0211334228515625|unsuper_loss: 0.0 +average reward score: -2.318359375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.69%) |Training time=0.47s (20.84%) |Others=0.12 (5.47%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.56 +epoch: 0|step: 38|ppo_ep: 1|act_loss: -0.214599609375|cri_loss: -0.0810546875|unsuper_loss: 0.0 +average reward score: -1.4873046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (18.95%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57 +[2023-04-21 23:40:09,846] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=6, lr=[3.2810000000000004e-06, 3.2810000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:40:09,864] [INFO] [timer.py:199:stop] epoch=0/micro_step=40/global_step=40, RunningAvgSamplesPerSec=130.979460749, CurrSamplesPerSec=132.2262681750161, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:40:09,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=5, lr=[1.75e-06, 1.75e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 39|ppo_ep: 1|act_loss: -0.10284423828125|cri_loss: -0.02862548828125|unsuper_loss: 0.0 +average reward score: -1.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.86%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.58 +epoch: 0|step: 40|ppo_ep: 1|act_loss: 0.0841064453125|cri_loss: 0.051300048828125|unsuper_loss: 0.0 +average reward score: -2.287109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.09%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.59 +epoch: 0|step: 41|ppo_ep: 1|act_loss: 0.241943359375|cri_loss: 0.13330078125|unsuper_loss: 0.0 +average reward score: -1.4775390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.59 +epoch: 0|step: 42|ppo_ep: 1|act_loss: 0.169677734375|cri_loss: 0.1021728515625|unsuper_loss: 0.0 +average reward score: -2.404296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.05%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.60 +epoch: 0|step: 43|ppo_ep: 1|act_loss: 0.1094970703125|cri_loss: 0.0714111328125|unsuper_loss: 0.0 +average reward score: -2.298828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.02%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.61 +epoch: 0|step: 44|ppo_ep: 1|act_loss: 0.214111328125|cri_loss: 0.1400146484375|unsuper_loss: 0.0 +average reward score: -1.5478515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (18.92%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.61 +epoch: 0|step: 45|ppo_ep: 1|act_loss: 0.0830078125|cri_loss: 0.06219482421875|unsuper_loss: 0.0 +average reward score: -1.7490234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.62 +epoch: 0|step: 46|ppo_ep: 1|act_loss: -0.0323486328125|cri_loss: 0.013916015625|unsuper_loss: 0.0 +average reward score: -2.064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.63 +epoch: 0|step: 47|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.06707763671875|unsuper_loss: 0.0 +average reward score: -2.509765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.41s (18.93%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.63 +epoch: 0|step: 48|ppo_ep: 1|act_loss: -0.6728515625|cri_loss: -0.2484130859375|unsuper_loss: 0.0 +average reward score: -1.5966796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.24%) |Training time=0.40s (18.56%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.64 +[2023-04-21 23:40:31,329] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=6, lr=[4.2460000000000005e-06, 4.2460000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:40:31,347] [INFO] [timer.py:199:stop] epoch=0/micro_step=50/global_step=50, RunningAvgSamplesPerSec=130.87985010288241, CurrSamplesPerSec=129.97923512190965, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:40:31,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=5, lr=[2.25e-06, 2.25e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 49|ppo_ep: 1|act_loss: 0.01155853271484375|cri_loss: 0.0257568359375|unsuper_loss: 0.0 +average reward score: -1.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.03%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.64 +epoch: 0|step: 50|ppo_ep: 1|act_loss: -0.01409912109375|cri_loss: 0.05267333984375|unsuper_loss: 0.0 +average reward score: -1.521484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.65 +epoch: 0|step: 51|ppo_ep: 1|act_loss: 0.54052734375|cri_loss: 0.328125|unsuper_loss: 0.0 +average reward score: -1.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.57%) |Training time=0.41s (19.27%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.65 +epoch: 0|step: 52|ppo_ep: 1|act_loss: 0.22802734375|cri_loss: 0.130615234375|unsuper_loss: 0.0 +average reward score: -1.7666015625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.04%) |Training time=0.42s (18.61%) |Others=0.19 (8.35%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.64 +epoch: 0|step: 53|ppo_ep: 1|act_loss: -0.07568359375|cri_loss: -0.014739990234375|unsuper_loss: 0.0 +average reward score: -1.5986328125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.98%) |Training time=0.41s (18.83%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.64 +epoch: 0|step: 54|ppo_ep: 1|act_loss: -0.41357421875|cri_loss: -0.1488037109375|unsuper_loss: 0.0 +average reward score: -1.4091796875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.86%) |Training time=0.41s (18.17%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.63 +epoch: 0|step: 55|ppo_ep: 1|act_loss: -0.0655517578125|cri_loss: 0.0006103515625|unsuper_loss: 0.0 +average reward score: -0.7197265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.82%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.64 +epoch: 0|step: 56|ppo_ep: 1|act_loss: 0.1314697265625|cri_loss: 0.0850830078125|unsuper_loss: 0.0 +average reward score: -1.974609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.64 +epoch: 0|step: 57|ppo_ep: 1|act_loss: -0.0340576171875|cri_loss: 0.039306640625|unsuper_loss: 0.0 +average reward score: -1.9287109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.48%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.65 +epoch: 0|step: 58|ppo_ep: 1|act_loss: 0.24560546875|cri_loss: 0.1434326171875|unsuper_loss: 0.0 +average reward score: -1.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.42s (19.61%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.65 +[2023-04-21 23:40:53,049] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=6, lr=[5.211000000000001e-06, 5.211000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:40:53,067] [INFO] [timer.py:199:stop] epoch=0/micro_step=60/global_step=60, RunningAvgSamplesPerSec=130.3054528113133, CurrSamplesPerSec=124.08667168987976, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:40:53,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=5, lr=[2.7500000000000004e-06, 2.7500000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 59|ppo_ep: 1|act_loss: 0.232421875|cri_loss: 0.18115234375|unsuper_loss: 0.0 +average reward score: -1.7568359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.42s (19.60%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.66 +epoch: 0|step: 60|ppo_ep: 1|act_loss: -0.14697265625|cri_loss: -0.02044677734375|unsuper_loss: 0.0 +average reward score: -2.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.32%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.66 +epoch: 0|step: 61|ppo_ep: 1|act_loss: 0.0006103515625|cri_loss: 0.0204925537109375|unsuper_loss: 0.0 +average reward score: -1.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.42%) |Training time=0.42s (19.45%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.66 +epoch: 0|step: 62|ppo_ep: 1|act_loss: -0.245361328125|cri_loss: -0.07476806640625|unsuper_loss: 0.0 +average reward score: -1.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.42s (19.65%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.67 +epoch: 0|step: 63|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.072021484375|unsuper_loss: 0.0 +average reward score: -0.85986328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.12%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.67 +epoch: 0|step: 64|ppo_ep: 1|act_loss: -0.28564453125|cri_loss: -0.0963134765625|unsuper_loss: 0.0 +average reward score: -1.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.68 +[2023-04-21 23:41:05,906] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024 +[2023-04-21 23:41:05,991] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 65|ppo_ep: 1|act_loss: 1.3173828125|cri_loss: 0.8857421875|unsuper_loss: 0.0 +average reward score: -1.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.57%) |Training time=0.39s (18.56%) |Others=0.10 (4.87%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=14.68 +epoch: 0|step: 66|ppo_ep: 1|act_loss: 1.689453125|cri_loss: 1.10546875|unsuper_loss: 0.0 +average reward score: -1.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.42s (19.49%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.69 +epoch: 0|step: 67|ppo_ep: 1|act_loss: 0.88232421875|cri_loss: 0.5634765625|unsuper_loss: 0.0 +average reward score: -1.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.55%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.69 +epoch: 0|step: 68|ppo_ep: 1|act_loss: 2.35546875|cri_loss: 1.568359375|unsuper_loss: 0.0 +average reward score: -1.7041015625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.24%) |Training time=0.41s (18.50%) |Others=0.18 (8.26%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.69 +[2023-04-21 23:41:14,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=7, lr=[6.079500000000001e-06, 6.079500000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:41:14,554] [INFO] [timer.py:199:stop] epoch=0/micro_step=70/global_step=70, RunningAvgSamplesPerSec=129.97624011610526, CurrSamplesPerSec=129.91268155851395, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:41:14,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=6, lr=[3.2000000000000003e-06, 3.2000000000000003e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 69|ppo_ep: 1|act_loss: 0.44384765625|cri_loss: 0.298828125|unsuper_loss: 0.0 +average reward score: -0.90087890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.54%) |Training time=0.41s (19.01%) |Others=0.12 (5.45%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.69 +epoch: 0|step: 70|ppo_ep: 1|act_loss: -0.609375|cri_loss: -0.156494140625|unsuper_loss: 0.0 +average reward score: -0.9306640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.01%) |Others=0.11 (5.26%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.69 +epoch: 0|step: 71|ppo_ep: 1|act_loss: 0.193603515625|cri_loss: 0.189453125|unsuper_loss: 0.0 +average reward score: -1.419921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.18%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.70 +epoch: 0|step: 72|ppo_ep: 1|act_loss: 0.293701171875|cri_loss: 0.239501953125|unsuper_loss: 0.0 +average reward score: -1.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.42s (19.47%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.70 +epoch: 0|step: 73|ppo_ep: 1|act_loss: 0.0489501953125|cri_loss: 0.048370361328125|unsuper_loss: 0.0 +average reward score: -0.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.28%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.70 +epoch: 0|step: 74|ppo_ep: 1|act_loss: -0.166259765625|cri_loss: -0.0423583984375|unsuper_loss: 0.0 +average reward score: -0.61279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.71 +epoch: 0|step: 75|ppo_ep: 1|act_loss: -0.0986328125|cri_loss: -0.0015869140625|unsuper_loss: 0.0 +average reward score: -0.86376953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.12%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.71 +epoch: 0|step: 76|ppo_ep: 1|act_loss: -0.13525390625|cri_loss: -0.022216796875|unsuper_loss: 0.0 +average reward score: -1.052734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.35%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.71 +epoch: 0|step: 77|ppo_ep: 1|act_loss: 0.55126953125|cri_loss: 0.3369140625|unsuper_loss: 0.0 +average reward score: -1.037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.72 +epoch: 0|step: 78|ppo_ep: 1|act_loss: 0.9658203125|cri_loss: 0.6396484375|unsuper_loss: 0.0 +average reward score: -1.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.15%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.72 +[2023-04-21 23:41:35,943] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=7, lr=[7.0445e-06, 7.0445e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:41:35,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=80/global_step=80, RunningAvgSamplesPerSec=129.79324436520156, CurrSamplesPerSec=128.96262979834717, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:41:36,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=6, lr=[3.7e-06, 3.7e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 79|ppo_ep: 1|act_loss: 0.73046875|cri_loss: 0.47705078125|unsuper_loss: 0.0 +average reward score: -1.7333984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.20%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.72 +epoch: 0|step: 80|ppo_ep: 1|act_loss: 0.408203125|cri_loss: 0.258544921875|unsuper_loss: 0.0 +average reward score: -1.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.21%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.72 +epoch: 0|step: 81|ppo_ep: 1|act_loss: 0.377685546875|cri_loss: 0.221435546875|unsuper_loss: 0.0 +average reward score: -1.1103515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.16%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.73 +epoch: 0|step: 82|ppo_ep: 1|act_loss: 0.0911865234375|cri_loss: 0.067138671875|unsuper_loss: 0.0 +average reward score: -1.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.42s (19.60%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.73 +epoch: 0|step: 83|ppo_ep: 1|act_loss: -0.3154296875|cri_loss: -0.1004638671875|unsuper_loss: 0.0 +average reward score: -1.1083984375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.55%) |Training time=0.42s (17.74%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.71 +epoch: 0|step: 84|ppo_ep: 1|act_loss: -0.142822265625|cri_loss: -0.01373291015625|unsuper_loss: 0.0 +average reward score: -1.3310546875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.85%) |Training time=0.41s (18.19%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.71 +epoch: 0|step: 85|ppo_ep: 1|act_loss: 0.0579833984375|cri_loss: 0.054473876953125|unsuper_loss: 0.0 +average reward score: -1.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.71 +epoch: 0|step: 86|ppo_ep: 1|act_loss: 0.6943359375|cri_loss: 0.4111328125|unsuper_loss: 0.0 +average reward score: -1.646484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.19%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.71 +epoch: 0|step: 87|ppo_ep: 1|act_loss: 0.63916015625|cri_loss: 0.377197265625|unsuper_loss: 0.0 +average reward score: -1.8642578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.72 +epoch: 0|step: 88|ppo_ep: 1|act_loss: 0.033294677734375|cri_loss: 0.0440673828125|unsuper_loss: 0.0 +average reward score: -1.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.22%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.72 +[2023-04-21 23:41:57,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=7, lr=[8.0095e-06, 8.0095e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:41:57,719] [INFO] [timer.py:199:stop] epoch=0/micro_step=90/global_step=90, RunningAvgSamplesPerSec=129.58072305568248, CurrSamplesPerSec=126.23191816135801, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:41:57,812] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=6, lr=[4.2000000000000004e-06, 4.2000000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 89|ppo_ep: 1|act_loss: -0.26611328125|cri_loss: -0.1024169921875|unsuper_loss: 0.0 +average reward score: -1.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.43%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.72 +epoch: 0|step: 90|ppo_ep: 1|act_loss: -0.6201171875|cri_loss: -0.181396484375|unsuper_loss: 0.0 +average reward score: -1.1005859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.28%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.72 +epoch: 0|step: 91|ppo_ep: 1|act_loss: 0.00115966796875|cri_loss: 0.0325927734375|unsuper_loss: 0.0 +average reward score: -1.5341796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.72 +epoch: 0|step: 92|ppo_ep: 1|act_loss: 1.02734375|cri_loss: 0.625|unsuper_loss: 0.0 +average reward score: -1.9716796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.43s (19.88%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.72 +epoch: 0|step: 93|ppo_ep: 1|act_loss: 0.95703125|cri_loss: 0.6015625|unsuper_loss: 0.0 +average reward score: -1.1279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.26%) |Training time=0.42s (19.62%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.73 +epoch: 0|step: 94|ppo_ep: 1|act_loss: 0.30322265625|cri_loss: 0.19775390625|unsuper_loss: 0.0 +average reward score: -1.9072265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.30%) |Training time=0.42s (19.49%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73 +epoch: 0|step: 95|ppo_ep: 1|act_loss: -0.740234375|cri_loss: -0.242919921875|unsuper_loss: 0.0 +average reward score: -1.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73 +epoch: 0|step: 96|ppo_ep: 1|act_loss: -0.53125|cri_loss: -0.1573486328125|unsuper_loss: 0.0 +average reward score: -1.275390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.44%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73 +epoch: 0|step: 97|ppo_ep: 1|act_loss: -0.0743408203125|cri_loss: 0.00048828125|unsuper_loss: 0.0 +average reward score: -1.3505859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.52%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.73 +epoch: 0|step: 98|ppo_ep: 1|act_loss: 0.43701171875|cri_loss: 0.259765625|unsuper_loss: 0.0 +average reward score: -1.8330078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.45%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.74 +[2023-04-21 23:42:19,339] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=7, lr=[8.974500000000002e-06, 8.974500000000002e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:42:19,358] [INFO] [timer.py:199:stop] epoch=0/micro_step=100/global_step=100, RunningAvgSamplesPerSec=129.23506099236363, CurrSamplesPerSec=127.73638388183566, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:42:19,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=6, lr=[4.7e-06, 4.7e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 99|ppo_ep: 1|act_loss: 0.5625|cri_loss: 0.34423828125|unsuper_loss: 0.0 +average reward score: -1.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.38%) |Training time=0.41s (17.87%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.73 +epoch: 0|step: 100|ppo_ep: 1|act_loss: 0.50439453125|cri_loss: 0.302978515625|unsuper_loss: 0.0 +average reward score: -2.001953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.34%) |Training time=0.42s (19.47%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.73 +epoch: 0|step: 101|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.0860595703125|unsuper_loss: 0.0 +average reward score: -1.330078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.73 +epoch: 0|step: 102|ppo_ep: 1|act_loss: 0.60986328125|cri_loss: 0.358154296875|unsuper_loss: 0.0 +average reward score: -1.5244140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.33%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73 +epoch: 0|step: 103|ppo_ep: 1|act_loss: 0.50732421875|cri_loss: 0.30029296875|unsuper_loss: 0.0 +average reward score: -1.869140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.42s (19.40%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.73 +epoch: 0|step: 104|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: 0.01934814453125|unsuper_loss: 0.0 +average reward score: -1.810546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.34%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.73 +epoch: 0|step: 105|ppo_ep: 1|act_loss: 0.12359619140625|cri_loss: 0.0972900390625|unsuper_loss: 0.0 +average reward score: -1.439453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.37%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.74 +epoch: 0|step: 106|ppo_ep: 1|act_loss: -0.090087890625|cri_loss: -0.0128173828125|unsuper_loss: 0.0 +average reward score: -2.216796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.37%) |Training time=0.42s (19.44%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.74 +epoch: 0|step: 107|ppo_ep: 1|act_loss: 0.38232421875|cri_loss: 0.24658203125|unsuper_loss: 0.0 +average reward score: -2.146484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.74 +epoch: 0|step: 108|ppo_ep: 1|act_loss: 0.6181640625|cri_loss: 0.38037109375|unsuper_loss: 0.0 +average reward score: -2.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.38%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74 +[2023-04-21 23:42:40,808] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=7, lr=[9.649706174538074e-06, 9.649706174538074e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:42:40,827] [INFO] [timer.py:199:stop] epoch=0/micro_step=110/global_step=110, RunningAvgSamplesPerSec=128.96985847860012, CurrSamplesPerSec=126.8304863240554, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:42:40,919] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=6, lr=[4.999729351164122e-06, 4.999729351164122e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 109|ppo_ep: 1|act_loss: -0.252197265625|cri_loss: -0.064697265625|unsuper_loss: 0.0 +average reward score: -2.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.37%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74 +epoch: 0|step: 110|ppo_ep: 1|act_loss: -0.040130615234375|cri_loss: 0.00396728515625|unsuper_loss: 0.0 +average reward score: -1.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.41%) |Training time=0.42s (19.40%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74 +epoch: 0|step: 111|ppo_ep: 1|act_loss: 0.931640625|cri_loss: 0.5419921875|unsuper_loss: 0.0 +average reward score: -2.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.75 +epoch: 0|step: 112|ppo_ep: 1|act_loss: 0.477294921875|cri_loss: 0.284912109375|unsuper_loss: 0.0 +average reward score: -2.404296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.41s (18.79%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.75 +epoch: 0|step: 113|ppo_ep: 1|act_loss: 0.552734375|cri_loss: 0.347412109375|unsuper_loss: 0.0 +average reward score: -2.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.24%) |Training time=0.41s (18.71%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.75 +epoch: 0|step: 114|ppo_ep: 1|act_loss: -0.68310546875|cri_loss: -0.2467041015625|unsuper_loss: 0.0 +average reward score: -2.908203125 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.88s (78.30%) |Training time=0.41s (17.04%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.73 +epoch: 0|step: 115|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.07562255859375|unsuper_loss: 0.0 +average reward score: -2.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.35%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73 +epoch: 0|step: 116|ppo_ep: 1|act_loss: 0.1025390625|cri_loss: 0.0999755859375|unsuper_loss: 0.0 +average reward score: -2.912109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.25%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74 +epoch: 0|step: 117|ppo_ep: 1|act_loss: 0.80419921875|cri_loss: 0.489013671875|unsuper_loss: 0.0 +average reward score: -2.669921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.74 +epoch: 0|step: 118|ppo_ep: 1|act_loss: 1.23828125|cri_loss: 0.736328125|unsuper_loss: 0.0 +average reward score: -2.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.41s (19.33%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74 +[2023-04-21 23:43:02,558] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=7, lr=[9.644483606235295e-06, 9.644483606235295e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:43:02,577] [INFO] [timer.py:199:stop] epoch=0/micro_step=120/global_step=120, RunningAvgSamplesPerSec=128.92096849256106, CurrSamplesPerSec=127.93716465015905, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:43:02,669] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=6, lr=[4.996685224712077e-06, 4.996685224712077e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 119|ppo_ep: 1|act_loss: 0.65234375|cri_loss: 0.39990234375|unsuper_loss: 0.0 +average reward score: -2.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.26%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.74 +epoch: 0|step: 120|ppo_ep: 1|act_loss: -0.394775390625|cri_loss: -0.135009765625|unsuper_loss: 0.0 +average reward score: -3.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.41s (19.07%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.74 +epoch: 0|step: 121|ppo_ep: 1|act_loss: -0.919921875|cri_loss: -0.265869140625|unsuper_loss: 0.0 +average reward score: -3.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.26%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74 +epoch: 0|step: 122|ppo_ep: 1|act_loss: -0.70068359375|cri_loss: -0.210693359375|unsuper_loss: 0.0 +average reward score: -3.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.74 +epoch: 0|step: 123|ppo_ep: 1|act_loss: 0.251953125|cri_loss: 0.165771484375|unsuper_loss: 0.0 +average reward score: -3.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.44%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.75 +epoch: 0|step: 124|ppo_ep: 1|act_loss: 1.0546875|cri_loss: 0.6337890625|unsuper_loss: 0.0 +average reward score: -3.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.02%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.75 +epoch: 0|step: 125|ppo_ep: 1|act_loss: 1.173828125|cri_loss: 0.6923828125|unsuper_loss: 0.0 +average reward score: -3.865234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.34%) |Training time=0.42s (19.54%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.75 +epoch: 0|step: 126|ppo_ep: 1|act_loss: 0.9951171875|cri_loss: 0.59619140625|unsuper_loss: 0.0 +average reward score: -3.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.41%) |Training time=0.42s (19.41%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.75 +epoch: 0|step: 127|ppo_ep: 1|act_loss: 0.256103515625|cri_loss: 0.2021484375|unsuper_loss: 0.0 +average reward score: -3.833984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.75 +epoch: 0|step: 128|ppo_ep: 1|act_loss: -0.39599609375|cri_loss: -0.1229248046875|unsuper_loss: 0.0 +average reward score: -3.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.18%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.75 +[2023-04-21 23:43:24,046] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=7, lr=[9.632739717588912e-06, 9.632739717588912e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:43:24,064] [INFO] [timer.py:199:stop] epoch=0/micro_step=130/global_step=130, RunningAvgSamplesPerSec=128.88130010143965, CurrSamplesPerSec=137.54423533149213, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:43:24,157] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=6, lr=[4.99026279355402e-06, 4.99026279355402e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 129|ppo_ep: 1|act_loss: 0.17138671875|cri_loss: 0.1422119140625|unsuper_loss: 0.0 +average reward score: -3.646484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.44s (20.09%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.75 +epoch: 0|step: 130|ppo_ep: 1|act_loss: 0.3515625|cri_loss: 0.251953125|unsuper_loss: 0.0 +average reward score: -3.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.51%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.75 +epoch: 0|step: 131|ppo_ep: 1|act_loss: 1.07421875|cri_loss: 0.6396484375|unsuper_loss: 0.0 +average reward score: -3.5 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.75 +epoch: 0|step: 132|ppo_ep: 1|act_loss: 0.6865234375|cri_loss: 0.406005859375|unsuper_loss: 0.0 +average reward score: -4.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.76 +epoch: 0|step: 133|ppo_ep: 1|act_loss: 0.328369140625|cri_loss: 0.204833984375|unsuper_loss: 0.0 +average reward score: -3.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.41%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76 +epoch: 0|step: 134|ppo_ep: 1|act_loss: -0.388427734375|cri_loss: -0.111572265625|unsuper_loss: 0.0 +average reward score: -3.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.42s (19.49%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76 +epoch: 0|step: 135|ppo_ep: 1|act_loss: -0.2587890625|cri_loss: -0.0894775390625|unsuper_loss: 0.0 +average reward score: -4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.42s (19.56%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.76 +epoch: 0|step: 136|ppo_ep: 1|act_loss: 0.365478515625|cri_loss: 0.2197265625|unsuper_loss: 0.0 +average reward score: -3.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.20%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.76 +epoch: 0|step: 137|ppo_ep: 1|act_loss: 0.2919921875|cri_loss: 0.183837890625|unsuper_loss: 0.0 +average reward score: -3.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76 +epoch: 0|step: 138|ppo_ep: 1|act_loss: -0.05572509765625|cri_loss: -0.000396728515625|unsuper_loss: 0.0 +average reward score: -4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.42s (19.38%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76 +[2023-04-21 23:43:45,499] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=7, lr=[9.61449039944247e-06, 9.61449039944247e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:43:45,517] [INFO] [timer.py:199:stop] epoch=0/micro_step=140/global_step=140, RunningAvgSamplesPerSec=128.69974239896064, CurrSamplesPerSec=127.29890520329567, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:43:45,610] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=6, lr=[4.980470747984265e-06, 4.980470747984265e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 139|ppo_ep: 1|act_loss: -0.386474609375|cri_loss: -0.15283203125|unsuper_loss: 0.0 +average reward score: -3.962890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.34%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76 +epoch: 0|step: 140|ppo_ep: 1|act_loss: -0.3408203125|cri_loss: -0.1376953125|unsuper_loss: 0.0 +average reward score: -4.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.38%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76 +epoch: 0|step: 141|ppo_ep: 1|act_loss: 0.11871337890625|cri_loss: 0.0736083984375|unsuper_loss: 0.0 +average reward score: -4.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.21%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.77 +epoch: 0|step: 142|ppo_ep: 1|act_loss: 0.28564453125|cri_loss: 0.182373046875|unsuper_loss: 0.0 +average reward score: -4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.05%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.77 +epoch: 0|step: 143|ppo_ep: 1|act_loss: -0.1136474609375|cri_loss: -0.02484130859375|unsuper_loss: 0.0 +average reward score: -4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.19%) |Training time=0.41s (18.65%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.77 +epoch: 0|step: 144|ppo_ep: 1|act_loss: 0.39404296875|cri_loss: 0.25|unsuper_loss: 0.0 +average reward score: -4.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.73%) |Training time=0.43s (18.30%) |Others=0.19 (7.97%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.76 +epoch: 0|step: 145|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: 0.034942626953125|unsuper_loss: 0.0 +average reward score: -4.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.48%) |Training time=0.41s (18.73%) |Others=0.13 (5.79%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.76 +epoch: 0|step: 146|ppo_ep: 1|act_loss: -0.2066650390625|cri_loss: -0.078125|unsuper_loss: 0.0 +average reward score: -4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.40s (18.90%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.76 +epoch: 0|step: 147|ppo_ep: 1|act_loss: 0.1812744140625|cri_loss: 0.15380859375|unsuper_loss: 0.0 +average reward score: -4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.00%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76 +epoch: 0|step: 148|ppo_ep: 1|act_loss: -0.1953125|cri_loss: -0.05718994140625|unsuper_loss: 0.0 +average reward score: -4.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.06%) |Training time=0.40s (18.74%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.76 +[2023-04-21 23:44:07,214] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=7, lr=[9.589760345240206e-06, 9.589760345240206e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:44:07,233] [INFO] [timer.py:199:stop] epoch=0/micro_step=150/global_step=150, RunningAvgSamplesPerSec=128.76410862507046, CurrSamplesPerSec=131.15845699069212, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:44:07,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=6, lr=[4.967322337776272e-06, 4.967322337776272e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 149|ppo_ep: 1|act_loss: 0.422119140625|cri_loss: 0.282958984375|unsuper_loss: 0.0 +average reward score: -4.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.98%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76 +epoch: 0|step: 150|ppo_ep: 1|act_loss: -0.370849609375|cri_loss: -0.1490478515625|unsuper_loss: 0.0 +average reward score: -4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.47%) |Training time=0.42s (19.34%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76 +epoch: 0|step: 151|ppo_ep: 1|act_loss: -0.1011962890625|cri_loss: 0.0057373046875|unsuper_loss: 0.0 +average reward score: -3.849609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (18.97%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.76 +epoch: 0|step: 152|ppo_ep: 1|act_loss: 0.1781005859375|cri_loss: 0.1380615234375|unsuper_loss: 0.0 +average reward score: -4.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.19%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77 +epoch: 0|step: 153|ppo_ep: 1|act_loss: 0.027191162109375|cri_loss: 0.043060302734375|unsuper_loss: 0.0 +average reward score: -4.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.19%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77 +epoch: 0|step: 154|ppo_ep: 1|act_loss: 0.3134765625|cri_loss: 0.208251953125|unsuper_loss: 0.0 +average reward score: -4.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.20%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77 +epoch: 0|step: 155|ppo_ep: 1|act_loss: 0.072509765625|cri_loss: 0.06378173828125|unsuper_loss: 0.0 +average reward score: -4.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.22%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.77 +epoch: 0|step: 156|ppo_ep: 1|act_loss: 0.433837890625|cri_loss: 0.249755859375|unsuper_loss: 0.0 +average reward score: -4.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.40s (18.89%) |Others=0.11 (5.25%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77 +epoch: 0|step: 157|ppo_ep: 1|act_loss: -0.005615234375|cri_loss: 0.020538330078125|unsuper_loss: 0.0 +average reward score: -3.845703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.12%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77 +epoch: 0|step: 158|ppo_ep: 1|act_loss: -0.234619140625|cri_loss: -0.06939697265625|unsuper_loss: 0.0 +average reward score: -4.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.08%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77 +[2023-04-21 23:44:28,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=7, lr=[9.558583017613959e-06, 9.558583017613959e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:44:28,714] [INFO] [timer.py:199:stop] epoch=0/micro_step=160/global_step=160, RunningAvgSamplesPerSec=128.69180454066543, CurrSamplesPerSec=112.99300832350458, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:44:28,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=6, lr=[4.950835354254168e-06, 4.950835354254168e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 159|ppo_ep: 1|act_loss: 0.468505859375|cri_loss: 0.272705078125|unsuper_loss: 0.0 +average reward score: -3.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.60%) |Training time=0.47s (21.32%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.77 +epoch: 0|step: 160|ppo_ep: 1|act_loss: 0.4345703125|cri_loss: 0.247802734375|unsuper_loss: 0.0 +average reward score: -3.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.41s (19.30%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.77 +epoch: 0|step: 161|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.03875732421875|unsuper_loss: 0.0 +average reward score: -3.806640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.03%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77 +epoch: 0|step: 162|ppo_ep: 1|act_loss: 0.009613037109375|cri_loss: 0.032318115234375|unsuper_loss: 0.0 +average reward score: -3.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.26%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77 +epoch: 0|step: 163|ppo_ep: 1|act_loss: 0.33349609375|cri_loss: 0.194580078125|unsuper_loss: 0.0 +average reward score: -3.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.57%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.77 +epoch: 0|step: 164|ppo_ep: 1|act_loss: 0.18798828125|cri_loss: 0.1229248046875|unsuper_loss: 0.0 +average reward score: -3.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.41s (19.34%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77 +epoch: 0|step: 165|ppo_ep: 1|act_loss: 0.05999755859375|cri_loss: 0.0836181640625|unsuper_loss: 0.0 +average reward score: -3.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +epoch: 0|step: 166|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: 0.04156494140625|unsuper_loss: 0.0 +average reward score: -3.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.41s (19.34%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 167|ppo_ep: 1|act_loss: -0.429443359375|cri_loss: -0.096923828125|unsuper_loss: 0.0 +average reward score: -3.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.29%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 168|ppo_ep: 1|act_loss: -0.4609375|cri_loss: -0.14404296875|unsuper_loss: 0.0 +average reward score: -3.419921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.47%) |Training time=0.41s (19.32%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +[2023-04-21 23:44:50,130] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=7, lr=[9.521000603104346e-06, 9.521000603104346e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:44:50,149] [INFO] [timer.py:199:stop] epoch=0/micro_step=170/global_step=170, RunningAvgSamplesPerSec=128.60437074368264, CurrSamplesPerSec=127.17719830200122, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:44:50,241] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=6, lr=[4.931032106219029e-06, 4.931032106219029e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 169|ppo_ep: 1|act_loss: 0.5595703125|cri_loss: 0.408447265625|unsuper_loss: 0.0 +average reward score: -3.689453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.41s (19.37%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.78 +epoch: 0|step: 170|ppo_ep: 1|act_loss: -0.053466796875|cri_loss: 0.0736083984375|unsuper_loss: 0.0 +average reward score: -3.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.42%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +epoch: 0|step: 171|ppo_ep: 1|act_loss: -0.260986328125|cri_loss: -0.1085205078125|unsuper_loss: 0.0 +average reward score: -3.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.96%) |Training time=0.40s (18.89%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.78 +epoch: 0|step: 172|ppo_ep: 1|act_loss: 0.1719970703125|cri_loss: 0.160888671875|unsuper_loss: 0.0 +average reward score: -3.751953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.07%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.78 +epoch: 0|step: 173|ppo_ep: 1|act_loss: 0.1490478515625|cri_loss: 0.10223388671875|unsuper_loss: 0.0 +average reward score: -3.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.31%) |Training time=0.41s (18.63%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.78 +epoch: 0|step: 174|ppo_ep: 1|act_loss: 0.10699462890625|cri_loss: 0.09259033203125|unsuper_loss: 0.0 +average reward score: -3.125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.95%) |Training time=0.41s (18.06%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.78 +epoch: 0|step: 175|ppo_ep: 1|act_loss: 0.077392578125|cri_loss: 0.059234619140625|unsuper_loss: 0.0 +average reward score: -2.970703125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.47%) |Training time=0.41s (17.77%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.77 +epoch: 0|step: 176|ppo_ep: 1|act_loss: -0.00567626953125|cri_loss: 0.0218963623046875|unsuper_loss: 0.0 +average reward score: -3.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.10%) |Training time=0.40s (18.73%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.77 +epoch: 0|step: 177|ppo_ep: 1|act_loss: -0.052734375|cri_loss: 0.01092529296875|unsuper_loss: 0.0 +average reward score: -3.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.41s (19.00%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77 +epoch: 0|step: 178|ppo_ep: 1|act_loss: 0.16015625|cri_loss: 0.0904541015625|unsuper_loss: 0.0 +average reward score: -3.587890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.04%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77 +[2023-04-21 23:45:11,884] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=7, lr=[9.47706395507748e-06, 9.47706395507748e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:45:11,903] [INFO] [timer.py:199:stop] epoch=0/micro_step=180/global_step=180, RunningAvgSamplesPerSec=128.7054493106549, CurrSamplesPerSec=129.51103786201853, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:45:11,995] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=6, lr=[4.907939389762475e-06, 4.907939389762475e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 179|ppo_ep: 1|act_loss: 0.210693359375|cri_loss: 0.1358642578125|unsuper_loss: 0.0 +average reward score: -3.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.15%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 180|ppo_ep: 1|act_loss: 0.0718994140625|cri_loss: 0.0391845703125|unsuper_loss: 0.0 +average reward score: -3.412109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.38%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.78 +epoch: 0|step: 181|ppo_ep: 1|act_loss: 0.176513671875|cri_loss: 0.0987548828125|unsuper_loss: 0.0 +average reward score: -3.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.99%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 182|ppo_ep: 1|act_loss: 0.27294921875|cri_loss: 0.1507568359375|unsuper_loss: 0.0 +average reward score: -3.998046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.20%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 183|ppo_ep: 1|act_loss: 0.1353759765625|cri_loss: 0.085205078125|unsuper_loss: 0.0 +average reward score: -3.275390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.20%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 184|ppo_ep: 1|act_loss: -0.13037109375|cri_loss: -0.0225830078125|unsuper_loss: 0.0 +average reward score: -3.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.22%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 185|ppo_ep: 1|act_loss: -0.154052734375|cri_loss: -0.0269775390625|unsuper_loss: 0.0 +average reward score: -3.068359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.38%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78 +epoch: 0|step: 186|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.023284912109375|unsuper_loss: 0.0 +average reward score: -3.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.01%) |Others=0.12 (5.39%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78 +epoch: 0|step: 187|ppo_ep: 1|act_loss: -0.1881103515625|cri_loss: -0.0428466796875|unsuper_loss: 0.0 +average reward score: -3.095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.17%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +epoch: 0|step: 188|ppo_ep: 1|act_loss: -0.1611328125|cri_loss: -0.051971435546875|unsuper_loss: 0.0 +average reward score: -3.404296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.25%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +[2023-04-21 23:45:33,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=7, lr=[9.426832524914468e-06, 9.426832524914468e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:45:33,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=190/global_step=190, RunningAvgSamplesPerSec=128.70097836160144, CurrSamplesPerSec=129.35488963398944, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:45:33,436] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=6, lr=[4.881588452008457e-06, 4.881588452008457e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 189|ppo_ep: 1|act_loss: -0.0740966796875|cri_loss: -0.0230712890625|unsuper_loss: 0.0 +average reward score: -3.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 190|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.0230865478515625|unsuper_loss: 0.0 +average reward score: -3.482421875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.50%) |Training time=0.41s (17.68%) |Others=0.11 (4.83%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.78 +epoch: 0|step: 191|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.017303466796875|unsuper_loss: 0.0 +average reward score: -3.125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.01%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 192|ppo_ep: 1|act_loss: 0.0064239501953125|cri_loss: 0.00643157958984375|unsuper_loss: 0.0 +average reward score: -3.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.28%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +epoch: 0|step: 193|ppo_ep: 1|act_loss: 0.1124267578125|cri_loss: 0.06622314453125|unsuper_loss: 0.0 +average reward score: -3.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.30%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 194|ppo_ep: 1|act_loss: 0.178466796875|cri_loss: 0.109130859375|unsuper_loss: 0.0 +average reward score: -3.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.22%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.78 +epoch: 0|step: 195|ppo_ep: 1|act_loss: 0.2174072265625|cri_loss: 0.133056640625|unsuper_loss: 0.0 +average reward score: -3.458984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 196|ppo_ep: 1|act_loss: 0.23974609375|cri_loss: 0.1435546875|unsuper_loss: 0.0 +average reward score: -3.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.41s (19.35%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +epoch: 0|step: 197|ppo_ep: 1|act_loss: 0.0567626953125|cri_loss: 0.032012939453125|unsuper_loss: 0.0 +average reward score: -3.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.19%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 198|ppo_ep: 1|act_loss: 0.14697265625|cri_loss: 0.0833740234375|unsuper_loss: 0.0 +average reward score: -3.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.22%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.78 +[2023-04-21 23:45:54,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=7, lr=[9.370374281566792e-06, 9.370374281566792e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:45:54,936] [INFO] [timer.py:199:stop] epoch=0/micro_step=200/global_step=200, RunningAvgSamplesPerSec=128.6778333771853, CurrSamplesPerSec=127.40669356852047, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:45:55,028] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=6, lr=[4.852014948832268e-06, 4.852014948832268e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 199|ppo_ep: 1|act_loss: -0.042144775390625|cri_loss: -0.01373291015625|unsuper_loss: 0.0 +average reward score: -3.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.41s (19.33%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 200|ppo_ep: 1|act_loss: -0.0169677734375|cri_loss: -0.00698089599609375|unsuper_loss: 0.0 +average reward score: -2.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.23%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.79 +epoch: 0|step: 201|ppo_ep: 1|act_loss: -0.0305633544921875|cri_loss: -0.0092926025390625|unsuper_loss: 0.0 +average reward score: -3.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.26%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 202|ppo_ep: 1|act_loss: -0.02667236328125|cri_loss: -0.00907135009765625|unsuper_loss: 0.0 +average reward score: -3.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.61%) |Training time=0.41s (19.19%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.79 +epoch: 0|step: 203|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.026336669921875|unsuper_loss: 0.0 +average reward score: -3.404296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.28%) |Training time=0.41s (18.66%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.79 +epoch: 0|step: 204|ppo_ep: 1|act_loss: -0.08294677734375|cri_loss: -0.0308380126953125|unsuper_loss: 0.0 +average reward score: -3.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.00%) |Training time=0.41s (18.07%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.78 +epoch: 0|step: 205|ppo_ep: 1|act_loss: 0.029876708984375|cri_loss: 0.0192718505859375|unsuper_loss: 0.0 +average reward score: -3.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 206|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.023468017578125|unsuper_loss: 0.0 +average reward score: -3.029296875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.67%) |Training time=0.41s (17.51%) |Others=0.11 (4.82%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.78 +epoch: 0|step: 207|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.00995635986328125|unsuper_loss: 0.0 +average reward score: -2.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 208|ppo_ep: 1|act_loss: 0.1396484375|cri_loss: 0.07757568359375|unsuper_loss: 0.0 +average reward score: -2.998046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.00%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +[2023-04-21 23:46:16,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=7, lr=[9.30776561958644e-06, 9.30776561958644e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:46:16,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=210/global_step=210, RunningAvgSamplesPerSec=128.72842312256287, CurrSamplesPerSec=129.26693370599412, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:46:16,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=6, lr=[4.819258896614014e-06, 4.819258896614014e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 209|ppo_ep: 1|act_loss: 0.133056640625|cri_loss: 0.07379150390625|unsuper_loss: 0.0 +average reward score: -3.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.12%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78 +epoch: 0|step: 210|ppo_ep: 1|act_loss: 0.1053466796875|cri_loss: 0.05902099609375|unsuper_loss: 0.0 +average reward score: -3.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.06%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 211|ppo_ep: 1|act_loss: 0.021881103515625|cri_loss: 0.01165771484375|unsuper_loss: 0.0 +average reward score: -3.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78 +epoch: 0|step: 212|ppo_ep: 1|act_loss: -0.00565338134765625|cri_loss: -0.0013027191162109375|unsuper_loss: 0.0 +average reward score: -3.228515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 213|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.0062713623046875|unsuper_loss: 0.0 +average reward score: -3.412109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (18.94%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78 +epoch: 0|step: 214|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.01617431640625|unsuper_loss: 0.0 +average reward score: -3.298828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.01%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78 +epoch: 0|step: 215|ppo_ep: 1|act_loss: -0.12060546875|cri_loss: -0.0494384765625|unsuper_loss: 0.0 +average reward score: -3.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.81%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 216|ppo_ep: 1|act_loss: 0.0006313323974609375|cri_loss: 0.00394439697265625|unsuper_loss: 0.0 +average reward score: -3.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.90%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 217|ppo_ep: 1|act_loss: -0.06268310546875|cri_loss: -0.0294189453125|unsuper_loss: 0.0 +average reward score: -3.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 218|ppo_ep: 1|act_loss: -0.043365478515625|cri_loss: -0.0163421630859375|unsuper_loss: 0.0 +average reward score: -3.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.40s (18.90%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +[2023-04-21 23:46:38,128] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=7, lr=[9.239091255755212e-06, 9.239091255755212e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:46:38,147] [INFO] [timer.py:199:stop] epoch=0/micro_step=220/global_step=220, RunningAvgSamplesPerSec=128.83310579299808, CurrSamplesPerSec=130.09917831217157, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:46:38,239] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=6, lr=[4.783364618091804e-06, 4.783364618091804e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 219|ppo_ep: 1|act_loss: -0.01348876953125|cri_loss: -0.00437164306640625|unsuper_loss: 0.0 +average reward score: -3.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 220|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.0106353759765625|unsuper_loss: 0.0 +average reward score: -2.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (18.99%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 221|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.0467529296875|unsuper_loss: 0.0 +average reward score: -3.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.54%) |Training time=0.41s (18.48%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.79 +epoch: 0|step: 222|ppo_ep: 1|act_loss: 0.0616455078125|cri_loss: 0.033416748046875|unsuper_loss: 0.0 +average reward score: -3.279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 223|ppo_ep: 1|act_loss: 0.0288848876953125|cri_loss: 0.0172119140625|unsuper_loss: 0.0 +average reward score: -3.076171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.12%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 224|ppo_ep: 1|act_loss: 0.043487548828125|cri_loss: 0.024383544921875|unsuper_loss: 0.0 +average reward score: -3.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.40s (18.90%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 225|ppo_ep: 1|act_loss: 0.0137481689453125|cri_loss: 0.00763702392578125|unsuper_loss: 0.0 +average reward score: -3.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.95%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 226|ppo_ep: 1|act_loss: -0.028289794921875|cri_loss: -0.01092529296875|unsuper_loss: 0.0 +average reward score: -3.255859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.10%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 227|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.006732940673828125|unsuper_loss: 0.0 +average reward score: -3.125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.89%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 228|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.003444671630859375|unsuper_loss: 0.0 +average reward score: -3.138671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (18.99%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +[2023-04-21 23:46:59,663] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=7, lr=[9.16444411445309e-06, 9.16444411445309e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:46:59,682] [INFO] [timer.py:199:stop] epoch=0/micro_step=230/global_step=230, RunningAvgSamplesPerSec=128.95552365421398, CurrSamplesPerSec=131.04000781059312, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:46:59,774] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=6, lr=[4.74438068238795e-06, 4.74438068238795e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 229|ppo_ep: 1|act_loss: 0.048187255859375|cri_loss: 0.0258636474609375|unsuper_loss: 0.0 +average reward score: -2.927734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (19.00%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.79 +epoch: 0|step: 230|ppo_ep: 1|act_loss: 0.04071044921875|cri_loss: 0.0237884521484375|unsuper_loss: 0.0 +average reward score: -3.255859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.19%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 231|ppo_ep: 1|act_loss: -0.0203094482421875|cri_loss: -0.0077056884765625|unsuper_loss: 0.0 +average reward score: -3.017578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 232|ppo_ep: 1|act_loss: 0.045654296875|cri_loss: 0.0247802734375|unsuper_loss: 0.0 +average reward score: -2.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.97%) |Training time=0.41s (18.88%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.79 +epoch: 0|step: 233|ppo_ep: 1|act_loss: 0.061920166015625|cri_loss: 0.032867431640625|unsuper_loss: 0.0 +average reward score: -2.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.23%) |Training time=0.41s (18.73%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.79 +epoch: 0|step: 234|ppo_ep: 1|act_loss: 0.034759521484375|cri_loss: 0.0200958251953125|unsuper_loss: 0.0 +average reward score: -2.947265625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.00%) |Training time=0.41s (18.04%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.79 +epoch: 0|step: 235|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.0116729736328125|unsuper_loss: 0.0 +average reward score: -3.146484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.92%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 236|ppo_ep: 1|act_loss: -0.003536224365234375|cri_loss: -7.43865966796875e-05|unsuper_loss: 0.0 +average reward score: -3.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.77%) |Training time=0.40s (17.42%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.78 +epoch: 0|step: 237|ppo_ep: 1|act_loss: 0.0286865234375|cri_loss: 0.01538848876953125|unsuper_loss: 0.0 +average reward score: -3.251953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.95%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78 +epoch: 0|step: 238|ppo_ep: 1|act_loss: 0.00986480712890625|cri_loss: 0.005401611328125|unsuper_loss: 0.0 +average reward score: -3.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.89%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +[2023-04-21 23:47:21,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=7, lr=[9.083925201920767e-06, 9.083925201920767e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:47:21,482] [INFO] [timer.py:199:stop] epoch=0/micro_step=240/global_step=240, RunningAvgSamplesPerSec=129.020879201406, CurrSamplesPerSec=129.08467586425778, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:47:21,574] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=6, lr=[4.702359839289306e-06, 4.702359839289306e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 239|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.00597381591796875|unsuper_loss: 0.0 +average reward score: -2.947265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 240|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00701141357421875|unsuper_loss: 0.0 +average reward score: -2.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.42s (19.53%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.79 +epoch: 0|step: 241|ppo_ep: 1|act_loss: 0.024871826171875|cri_loss: 0.01422119140625|unsuper_loss: 0.0 +average reward score: -3.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.26%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 242|ppo_ep: 1|act_loss: -0.00615692138671875|cri_loss: -0.001445770263671875|unsuper_loss: 0.0 +average reward score: -3.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.53%) |Training time=0.42s (19.34%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.79 +epoch: 0|step: 243|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.008575439453125|unsuper_loss: 0.0 +average reward score: -3.212890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 244|ppo_ep: 1|act_loss: -0.0282745361328125|cri_loss: -0.01092529296875|unsuper_loss: 0.0 +average reward score: -3.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.49%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 245|ppo_ep: 1|act_loss: 0.00045680999755859375|cri_loss: 0.0008997917175292969|unsuper_loss: 0.0 +average reward score: -3.115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.53%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 246|ppo_ep: 1|act_loss: 0.00936126708984375|cri_loss: 0.0063629150390625|unsuper_loss: 0.0 +average reward score: -3.408203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.37%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 247|ppo_ep: 1|act_loss: 0.04052734375|cri_loss: 0.0243377685546875|unsuper_loss: 0.0 +average reward score: -3.361328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.28%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 248|ppo_ep: 1|act_loss: 0.0257568359375|cri_loss: 0.0141754150390625|unsuper_loss: 0.0 +average reward score: -3.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.39%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +[2023-04-21 23:47:42,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=7, lr=[8.9976434695865e-06, 8.9976434695865e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:47:42,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=250/global_step=250, RunningAvgSamplesPerSec=128.88742633520977, CurrSamplesPerSec=125.36975737337413, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:47:43,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=6, lr=[4.657358947870691e-06, 4.657358947870691e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 249|ppo_ep: 1|act_loss: 0.06134033203125|cri_loss: 0.03369140625|unsuper_loss: 0.0 +average reward score: -3.513671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.48%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 250|ppo_ep: 1|act_loss: 0.021209716796875|cri_loss: 0.011810302734375|unsuper_loss: 0.0 +average reward score: -3.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.25%) |Training time=0.42s (19.59%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.79 +epoch: 0|step: 251|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.0036773681640625|unsuper_loss: 0.0 +average reward score: -2.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.64%) |Training time=0.41s (17.60%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.79 +epoch: 0|step: 252|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.0166778564453125|unsuper_loss: 0.0 +average reward score: -3.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 253|ppo_ep: 1|act_loss: 0.0095672607421875|cri_loss: 0.005504608154296875|unsuper_loss: 0.0 +average reward score: -3.162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 254|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.00714874267578125|unsuper_loss: 0.0 +average reward score: -3.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.20%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 255|ppo_ep: 1|act_loss: 0.0496826171875|cri_loss: 0.0277099609375|unsuper_loss: 0.0 +average reward score: -3.283203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 256|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.01080322265625|unsuper_loss: 0.0 +average reward score: -3.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.49%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 257|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.009918212890625|unsuper_loss: 0.0 +average reward score: -3.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 258|ppo_ep: 1|act_loss: 0.034423828125|cri_loss: 0.02008056640625|unsuper_loss: 0.0 +average reward score: -3.064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.35%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +[2023-04-21 23:48:04,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=7, lr=[8.905715666642176e-06, 8.905715666642176e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:48:04,619] [INFO] [timer.py:199:stop] epoch=0/micro_step=260/global_step=260, RunningAvgSamplesPerSec=128.81736360064403, CurrSamplesPerSec=128.79951864899283, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:48:04,711] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=6, lr=[4.609438899557964e-06, 4.609438899557964e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 259|ppo_ep: 1|act_loss: -0.0223846435546875|cri_loss: -0.009674072265625|unsuper_loss: 0.0 +average reward score: -2.919921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 260|ppo_ep: 1|act_loss: 0.025238037109375|cri_loss: 0.01513671875|unsuper_loss: 0.0 +average reward score: -3.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.42s (19.33%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.79 +epoch: 0|step: 261|ppo_ep: 1|act_loss: 0.0604248046875|cri_loss: 0.0311431884765625|unsuper_loss: 0.0 +average reward score: -3.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.44%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 262|ppo_ep: 1|act_loss: -0.00435638427734375|cri_loss: -0.0010166168212890625|unsuper_loss: 0.0 +average reward score: -3.255859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.07%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.79 +epoch: 0|step: 263|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0180206298828125|unsuper_loss: 0.0 +average reward score: -3.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.21%) |Training time=0.42s (18.90%) |Others=0.15 (6.89%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.79 +epoch: 0|step: 264|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0096435546875|unsuper_loss: 0.0 +average reward score: -3.310546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.42s (19.49%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 265|ppo_ep: 1|act_loss: 0.0830078125|cri_loss: 0.044769287109375|unsuper_loss: 0.0 +average reward score: -3.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.42s (19.60%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.79 +epoch: 0|step: 266|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.0130157470703125|unsuper_loss: 0.0 +average reward score: -2.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.45%) |Training time=0.41s (17.74%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.79 +epoch: 0|step: 267|ppo_ep: 1|act_loss: 0.033172607421875|cri_loss: 0.0172576904296875|unsuper_loss: 0.0 +average reward score: -3.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 268|ppo_ep: 1|act_loss: 0.0128936767578125|cri_loss: 0.00716400146484375|unsuper_loss: 0.0 +average reward score: -3.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.46%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +[2023-04-21 23:48:26,362] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=7, lr=[8.808266182068123e-06, 8.808266182068123e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:48:26,381] [INFO] [timer.py:199:stop] epoch=0/micro_step=270/global_step=270, RunningAvgSamplesPerSec=128.70210285554833, CurrSamplesPerSec=127.35821456360428, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:48:26,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=6, lr=[4.558664535734864e-06, 4.558664535734864e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 269|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0159454345703125|unsuper_loss: 0.0 +average reward score: -3.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 270|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0165863037109375|unsuper_loss: 0.0 +average reward score: -3.115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.42s (19.69%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.79 +epoch: 0|step: 271|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.021514892578125|unsuper_loss: 0.0 +average reward score: -3.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.42%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 272|ppo_ep: 1|act_loss: 0.0128631591796875|cri_loss: 0.007404327392578125|unsuper_loss: 0.0 +average reward score: -3.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.38%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 273|ppo_ep: 1|act_loss: -0.004085540771484375|cri_loss: -0.0012903213500976562|unsuper_loss: 0.0 +average reward score: -2.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.41%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 274|ppo_ep: 1|act_loss: 0.032745361328125|cri_loss: 0.0178070068359375|unsuper_loss: 0.0 +average reward score: -3.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.06%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 275|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.008392333984375|unsuper_loss: 0.0 +average reward score: -2.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.43s (19.83%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.79 +epoch: 0|step: 276|ppo_ep: 1|act_loss: 0.002796173095703125|cri_loss: 0.0027217864990234375|unsuper_loss: 0.0 +average reward score: -3.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.45%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 277|ppo_ep: 1|act_loss: 0.0311737060546875|cri_loss: 0.01837158203125|unsuper_loss: 0.0 +average reward score: -3.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.37%) |Training time=0.42s (19.44%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 278|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.006526947021484375|unsuper_loss: 0.0 +average reward score: -3.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.23%) |Training time=0.42s (19.59%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +[2023-04-21 23:48:47,839] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=7, lr=[8.705426876320388e-06, 8.705426876320388e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:48:47,857] [INFO] [timer.py:199:stop] epoch=0/micro_step=280/global_step=280, RunningAvgSamplesPerSec=128.57407445120526, CurrSamplesPerSec=125.00824554262898, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:48:47,950] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=6, lr=[4.5051045600050906e-06, 4.5051045600050906e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 279|ppo_ep: 1|act_loss: 0.0294342041015625|cri_loss: 0.016387939453125|unsuper_loss: 0.0 +average reward score: -3.021484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.50%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 280|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.002567291259765625|unsuper_loss: 0.0 +average reward score: -3.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.42s (19.66%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 281|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.005329132080078125|unsuper_loss: 0.0 +average reward score: -3.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.07%) |Training time=0.46s (20.16%) |Others=0.11 (4.78%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 282|ppo_ep: 1|act_loss: 0.0285797119140625|cri_loss: 0.0158538818359375|unsuper_loss: 0.0 +average reward score: -3.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.42s (19.53%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 283|ppo_ep: 1|act_loss: 0.0206298828125|cri_loss: 0.0119476318359375|unsuper_loss: 0.0 +average reward score: -3.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.53%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 284|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.01885986328125|unsuper_loss: 0.0 +average reward score: -2.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.37%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 285|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.0161285400390625|unsuper_loss: 0.0 +average reward score: -3.009765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.42s (19.74%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 286|ppo_ep: 1|act_loss: -0.007232666015625|cri_loss: -0.0033016204833984375|unsuper_loss: 0.0 +average reward score: -3.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.42s (19.60%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 287|ppo_ep: 1|act_loss: -0.016448974609375|cri_loss: -0.00637054443359375|unsuper_loss: 0.0 +average reward score: -3.037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.42s (19.64%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 288|ppo_ep: 1|act_loss: -0.0012378692626953125|cri_loss: 0.0008544921875|unsuper_loss: 0.0 +average reward score: -3.197265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.41s (19.15%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.79 +[2023-04-21 23:49:09,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=7, lr=[8.597336902908245e-06, 8.597336902908245e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:49:09,512] [INFO] [timer.py:199:stop] epoch=0/micro_step=290/global_step=290, RunningAvgSamplesPerSec=128.3833988250307, CurrSamplesPerSec=127.5106669200076, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:49:09,605] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=6, lr=[4.448831445228368e-06, 4.448831445228368e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 289|ppo_ep: 1|act_loss: -0.0286102294921875|cri_loss: -0.0133514404296875|unsuper_loss: 0.0 +average reward score: -3.240234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 290|ppo_ep: 1|act_loss: 0.0017309188842773438|cri_loss: 0.0014438629150390625|unsuper_loss: 0.0 +average reward score: -3.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.52%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 291|ppo_ep: 1|act_loss: 0.024749755859375|cri_loss: 0.01300811767578125|unsuper_loss: 0.0 +average reward score: -3.166015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.28%) |Training time=0.42s (19.60%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 292|ppo_ep: 1|act_loss: 0.0435791015625|cri_loss: 0.0221405029296875|unsuper_loss: 0.0 +average reward score: -3.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.41s (19.06%) |Others=0.12 (5.44%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.79 +epoch: 0|step: 293|ppo_ep: 1|act_loss: -0.0178680419921875|cri_loss: -0.0075836181640625|unsuper_loss: 0.0 +average reward score: -2.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.49%) |Training time=0.45s (20.43%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.79 +epoch: 0|step: 294|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.002841949462890625|unsuper_loss: 0.0 +average reward score: -3.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.52%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 295|ppo_ep: 1|act_loss: -0.031158447265625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0 +average reward score: -3.064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 296|ppo_ep: 1|act_loss: -0.013824462890625|cri_loss: -0.00616455078125|unsuper_loss: 0.0 +average reward score: -3.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.92%) |Training time=0.43s (19.05%) |Others=0.20 (9.03%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.79 +epoch: 0|step: 297|ppo_ep: 1|act_loss: 0.004711151123046875|cri_loss: 0.003017425537109375|unsuper_loss: 0.0 +average reward score: -3.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.42s (19.33%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.79 +epoch: 0|step: 298|ppo_ep: 1|act_loss: 0.04693603515625|cri_loss: 0.024688720703125|unsuper_loss: 0.0 +average reward score: -2.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.41s (19.33%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +[2023-04-21 23:49:31,158] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=7, lr=[8.484142520103355e-06, 8.484142520103355e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:49:31,176] [INFO] [timer.py:199:stop] epoch=0/micro_step=300/global_step=300, RunningAvgSamplesPerSec=128.25590216328837, CurrSamplesPerSec=127.5973400134045, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:49:31,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=6, lr=[4.389921335456253e-06, 4.389921335456253e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 299|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.008056640625|unsuper_loss: 0.0 +average reward score: -3.076171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 300|ppo_ep: 1|act_loss: 0.003688812255859375|cri_loss: 0.0025196075439453125|unsuper_loss: 0.0 +average reward score: -3.318359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.21%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 301|ppo_ep: 1|act_loss: 0.01056671142578125|cri_loss: 0.006107330322265625|unsuper_loss: 0.0 +average reward score: -3.095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.29%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 302|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.00943756103515625|unsuper_loss: 0.0 +average reward score: -3.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.20%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 303|ppo_ep: 1|act_loss: -0.0183868408203125|cri_loss: -0.0089263916015625|unsuper_loss: 0.0 +average reward score: -3.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.25%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 304|ppo_ep: 1|act_loss: -0.04052734375|cri_loss: -0.016693115234375|unsuper_loss: 0.0 +average reward score: -3.220703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.23%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 305|ppo_ep: 1|act_loss: 0.0142059326171875|cri_loss: 0.0087127685546875|unsuper_loss: 0.0 +average reward score: -2.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.47%) |Training time=0.42s (19.42%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 306|ppo_ep: 1|act_loss: 0.01409912109375|cri_loss: 0.0086669921875|unsuper_loss: 0.0 +average reward score: -3.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.31%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 307|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.0086517333984375|unsuper_loss: 0.0 +average reward score: -3.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.14%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 308|ppo_ep: 1|act_loss: 0.01383209228515625|cri_loss: 0.007350921630859375|unsuper_loss: 0.0 +average reward score: -3.076171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.13%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +[2023-04-21 23:49:52,621] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=7, lr=[8.36599689303536e-06, 8.36599689303536e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:49:52,639] [INFO] [timer.py:199:stop] epoch=0/micro_step=310/global_step=310, RunningAvgSamplesPerSec=128.2386799067045, CurrSamplesPerSec=127.45194167612301, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:49:52,732] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=6, lr=[4.328453942900402e-06, 4.328453942900402e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 309|ppo_ep: 1|act_loss: -0.00490570068359375|cri_loss: -0.002193450927734375|unsuper_loss: 0.0 +average reward score: -3.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 310|ppo_ep: 1|act_loss: 0.032989501953125|cri_loss: 0.01708984375|unsuper_loss: 0.0 +average reward score: -3.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.38%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 311|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.02001953125|unsuper_loss: 0.0 +average reward score: -2.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.25%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 312|ppo_ep: 1|act_loss: 0.00566864013671875|cri_loss: 0.0037441253662109375|unsuper_loss: 0.0 +average reward score: -3.068359375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.58%) |Training time=0.41s (17.58%) |Others=0.11 (4.84%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 313|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01445770263671875|unsuper_loss: 0.0 +average reward score: -2.875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 314|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.0169677734375|unsuper_loss: 0.0 +average reward score: -3.283203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.24%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 315|ppo_ep: 1|act_loss: 0.009674072265625|cri_loss: 0.00579071044921875|unsuper_loss: 0.0 +average reward score: -3.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.42s (19.49%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 316|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.0174560546875|unsuper_loss: 0.0 +average reward score: -3.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.41%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 317|ppo_ep: 1|act_loss: -0.0147857666015625|cri_loss: -0.006465911865234375|unsuper_loss: 0.0 +average reward score: -3.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.50%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79 +epoch: 0|step: 318|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00360870361328125|unsuper_loss: 0.0 +average reward score: -3.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.68%) |Training time=0.41s (19.11%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +[2023-04-21 23:50:14,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=7, lr=[8.243059886441706e-06, 8.243059886441706e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:50:14,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=320/global_step=320, RunningAvgSamplesPerSec=128.20887438921002, CurrSamplesPerSec=127.5467764957208, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:50:14,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=6, lr=[4.264512440072707e-06, 4.264512440072707e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 319|ppo_ep: 1|act_loss: -0.019866943359375|cri_loss: -0.00934600830078125|unsuper_loss: 0.0 +average reward score: -3.306640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.28%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 320|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.006603240966796875|unsuper_loss: 0.0 +average reward score: -3.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.18%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 321|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0189971923828125|unsuper_loss: 0.0 +average reward score: -3.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 322|ppo_ep: 1|act_loss: 0.0269317626953125|cri_loss: 0.0141143798828125|unsuper_loss: 0.0 +average reward score: -3.333984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.42s (19.37%) |Others=0.12 (5.44%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.79 +epoch: 0|step: 323|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0097808837890625|unsuper_loss: 0.0 +average reward score: -2.896484375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.84%) |Training time=0.44s (19.74%) |Others=0.12 (5.42%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.79 +epoch: 0|step: 324|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.018463134765625|unsuper_loss: 0.0 +average reward score: -2.994140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.01%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.79 +epoch: 0|step: 325|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.015655517578125|unsuper_loss: 0.0 +average reward score: -3.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.34%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79 +epoch: 0|step: 326|ppo_ep: 1|act_loss: 0.02117919921875|cri_loss: 0.01165771484375|unsuper_loss: 0.0 +average reward score: -3.185546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.11%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 327|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.0219573974609375|unsuper_loss: 0.0 +average reward score: -3.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.75%) |Training time=0.41s (17.50%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.79 +epoch: 0|step: 328|ppo_ep: 1|act_loss: 0.00531768798828125|cri_loss: 0.003467559814453125|unsuper_loss: 0.0 +average reward score: -2.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.40s (18.86%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +[2023-04-21 23:50:35,959] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=7, lr=[8.11549784835209e-06, 8.11549784835209e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:50:35,977] [INFO] [timer.py:199:stop] epoch=0/micro_step=330/global_step=330, RunningAvgSamplesPerSec=128.2441069419638, CurrSamplesPerSec=128.31952120768858, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:50:36,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=6, lr=[4.198183347243233e-06, 4.198183347243233e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 329|ppo_ep: 1|act_loss: -0.0521240234375|cri_loss: -0.0235595703125|unsuper_loss: 0.0 +average reward score: -2.904296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.21%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 330|ppo_ep: 1|act_loss: -0.00556182861328125|cri_loss: -0.0016345977783203125|unsuper_loss: 0.0 +average reward score: -3.076171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.30%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 331|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.01015472412109375|unsuper_loss: 0.0 +average reward score: -3.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 332|ppo_ep: 1|act_loss: -0.09765625|cri_loss: -0.032867431640625|unsuper_loss: 0.0 +average reward score: -2.998046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.40s (18.92%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.79 +epoch: 0|step: 333|ppo_ep: 1|act_loss: -0.01300811767578125|cri_loss: -0.0055694580078125|unsuper_loss: 0.0 +average reward score: -3.279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 334|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.01959228515625|unsuper_loss: 0.0 +average reward score: -2.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.97%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 335|ppo_ep: 1|act_loss: -0.397216796875|cri_loss: -0.002685546875|unsuper_loss: 0.0 +average reward score: -3.240234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.21%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79 +epoch: 0|step: 336|ppo_ep: 1|act_loss: 0.016204833984375|cri_loss: 0.0084686279296875|unsuper_loss: 0.0 +average reward score: -3.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.40s (18.92%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.79 +epoch: 0|step: 337|ppo_ep: 1|act_loss: 0.00872039794921875|cri_loss: 0.005458831787109375|unsuper_loss: 0.0 +average reward score: -3.240234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (19.01%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 338|ppo_ep: 1|act_loss: -0.020416259765625|cri_loss: -0.00981903076171875|unsuper_loss: 0.0 +average reward score: -3.111328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.91%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +[2023-04-21 23:50:57,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=7, lr=[7.983483385000299e-06, 7.983483385000299e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:50:57,407] [INFO] [timer.py:199:stop] epoch=0/micro_step=340/global_step=340, RunningAvgSamplesPerSec=128.31040792702186, CurrSamplesPerSec=131.94731062605743, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:50:57,500] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=6, lr=[4.129556415368261e-06, 4.129556415368261e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 339|ppo_ep: 1|act_loss: 0.0107269287109375|cri_loss: 0.006103515625|unsuper_loss: 0.0 +average reward score: -3.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 340|ppo_ep: 1|act_loss: -0.01557159423828125|cri_loss: -0.00743865966796875|unsuper_loss: 0.0 +average reward score: -3.150390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 341|ppo_ep: 1|act_loss: 0.03826904296875|cri_loss: 0.01959228515625|unsuper_loss: 0.0 +average reward score: -3.291015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.19%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 342|ppo_ep: 1|act_loss: 0.034515380859375|cri_loss: 0.0180816650390625|unsuper_loss: 0.0 +average reward score: -3.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.84%) |Training time=0.40s (17.35%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.79 +epoch: 0|step: 343|ppo_ep: 1|act_loss: -0.0108795166015625|cri_loss: -0.004795074462890625|unsuper_loss: 0.0 +average reward score: -3.037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.41s (18.89%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.79 +epoch: 0|step: 344|ppo_ep: 1|act_loss: -0.0290069580078125|cri_loss: -0.0127410888671875|unsuper_loss: 0.0 +average reward score: -3.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.78%) |Training time=0.41s (19.03%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79 +epoch: 0|step: 345|ppo_ep: 1|act_loss: -0.054351806640625|cri_loss: -0.0258941650390625|unsuper_loss: 0.0 +average reward score: -3.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 346|ppo_ep: 1|act_loss: 0.017852783203125|cri_loss: 0.01021575927734375|unsuper_loss: 0.0 +average reward score: -3.541015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.68%) |Training time=0.41s (19.13%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +epoch: 0|step: 347|ppo_ep: 1|act_loss: 0.0250396728515625|cri_loss: 0.01320648193359375|unsuper_loss: 0.0 +average reward score: -3.064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.06%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 348|ppo_ep: 1|act_loss: 0.00946807861328125|cri_loss: 0.00527191162109375|unsuper_loss: 0.0 +average reward score: -3.248046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.02%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +[2023-04-21 23:51:19,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=7, lr=[7.84719512726795e-06, 7.84719512726795e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:51:19,049] [INFO] [timer.py:199:stop] epoch=0/micro_step=350/global_step=350, RunningAvgSamplesPerSec=128.3622663332419, CurrSamplesPerSec=130.4942063169521, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:51:19,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=6, lr=[4.058724504646834e-06, 4.058724504646834e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 349|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.00795745849609375|unsuper_loss: 0.0 +average reward score: -3.275390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.04%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 350|ppo_ep: 1|act_loss: -0.00341796875|cri_loss: -0.0010671615600585938|unsuper_loss: 0.0 +average reward score: -3.029296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.16%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 351|ppo_ep: 1|act_loss: -0.00197601318359375|cri_loss: 0.0005855560302734375|unsuper_loss: 0.0 +average reward score: -3.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 352|ppo_ep: 1|act_loss: -0.046417236328125|cri_loss: -0.0222320556640625|unsuper_loss: 0.0 +average reward score: -3.353515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.42s (19.56%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.80 +epoch: 0|step: 353|ppo_ep: 1|act_loss: -0.042236328125|cri_loss: -0.0196533203125|unsuper_loss: 0.0 +average reward score: -3.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.82%) |Training time=0.43s (19.69%) |Others=0.12 (5.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.80 +epoch: 0|step: 354|ppo_ep: 1|act_loss: 0.1561279296875|cri_loss: 0.10369873046875|unsuper_loss: 0.0 +average reward score: -2.791015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.12%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 355|ppo_ep: 1|act_loss: -0.10009765625|cri_loss: -0.047027587890625|unsuper_loss: 0.0 +average reward score: -3.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 356|ppo_ep: 1|act_loss: -0.52490234375|cri_loss: -0.1448974609375|unsuper_loss: 0.0 +average reward score: -2.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.96%) |Training time=0.40s (18.84%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 357|ppo_ep: 1|act_loss: -0.1102294921875|cri_loss: -0.044647216796875|unsuper_loss: 0.0 +average reward score: -3.310546875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.96%) |Training time=0.40s (17.19%) |Others=0.11 (4.85%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.79 +epoch: 0|step: 358|ppo_ep: 1|act_loss: -0.145263671875|cri_loss: -0.0587158203125|unsuper_loss: 0.0 +average reward score: -2.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.11%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79 +[2023-04-21 23:51:40,706] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=7, lr=[7.70681748897618e-06, 7.70681748897618e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:51:40,725] [INFO] [timer.py:199:stop] epoch=0/micro_step=360/global_step=360, RunningAvgSamplesPerSec=128.3869736994276, CurrSamplesPerSec=130.07156673863915, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:51:40,817] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=6, lr=[3.985783458870134e-06, 3.985783458870134e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 359|ppo_ep: 1|act_loss: 0.08428955078125|cri_loss: 0.08966064453125|unsuper_loss: 0.0 +average reward score: -3.318359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.06%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79 +epoch: 0|step: 360|ppo_ep: 1|act_loss: 0.00537109375|cri_loss: 0.051544189453125|unsuper_loss: 0.0 +average reward score: -2.791015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.15%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 361|ppo_ep: 1|act_loss: 0.467041015625|cri_loss: 0.276611328125|unsuper_loss: 0.0 +average reward score: -2.962890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 362|ppo_ep: 1|act_loss: -0.0123291015625|cri_loss: -0.004428863525390625|unsuper_loss: 0.0 +average reward score: -2.564453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.23%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 363|ppo_ep: 1|act_loss: 0.067138671875|cri_loss: 0.0399169921875|unsuper_loss: 0.0 +average reward score: -3.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.17%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 364|ppo_ep: 1|act_loss: 0.08740234375|cri_loss: 0.052581787109375|unsuper_loss: 0.0 +average reward score: -3.0 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.78%) |Training time=0.41s (19.04%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 365|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.03924560546875|unsuper_loss: 0.0 +average reward score: -3.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.05%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 366|ppo_ep: 1|act_loss: 0.067138671875|cri_loss: 0.0382080078125|unsuper_loss: 0.0 +average reward score: -2.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.12%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 367|ppo_ep: 1|act_loss: -0.007171630859375|cri_loss: 0.011077880859375|unsuper_loss: 0.0 +average reward score: -2.583984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.15%) |Training time=0.40s (18.73%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 368|ppo_ep: 1|act_loss: 0.05474853515625|cri_loss: 0.030670166015625|unsuper_loss: 0.0 +average reward score: -2.748046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.03%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +[2023-04-21 23:52:02,156] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=7, lr=[7.56254041735236e-06, 7.56254041735236e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:52:02,175] [INFO] [timer.py:199:stop] epoch=0/micro_step=370/global_step=370, RunningAvgSamplesPerSec=128.42285901291487, CurrSamplesPerSec=128.94528427142566, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:52:02,267] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=6, lr=[3.910831975733717e-06, 3.910831975733717e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 369|ppo_ep: 1|act_loss: -0.0069732666015625|cri_loss: 0.0111236572265625|unsuper_loss: 0.0 +average reward score: -3.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.15%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 370|ppo_ep: 1|act_loss: 0.046630859375|cri_loss: 0.028472900390625|unsuper_loss: 0.0 +average reward score: -2.572265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.29%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80 +epoch: 0|step: 371|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.036376953125|unsuper_loss: 0.0 +average reward score: -3.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.03%) |Training time=0.41s (18.80%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.80 +epoch: 0|step: 372|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: 0.0005035400390625|unsuper_loss: 0.0 +average reward score: -2.599609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.93%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 373|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.009796142578125|unsuper_loss: 0.0 +average reward score: -3.115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.04%) |Training time=0.46s (20.81%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.80 +epoch: 0|step: 374|ppo_ep: 1|act_loss: 0.05450439453125|cri_loss: 0.02813720703125|unsuper_loss: 0.0 +average reward score: -2.833984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.07%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 375|ppo_ep: 1|act_loss: 0.49609375|cri_loss: 0.3818359375|unsuper_loss: 0.0 +average reward score: -2.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.11%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.80 +epoch: 0|step: 376|ppo_ep: 1|act_loss: 0.0114898681640625|cri_loss: 0.00635528564453125|unsuper_loss: 0.0 +average reward score: -2.494140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (18.93%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 377|ppo_ep: 1|act_loss: -0.131103515625|cri_loss: -0.00457763671875|unsuper_loss: 0.0 +average reward score: -2.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.95%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 378|ppo_ep: 1|act_loss: -0.07830810546875|cri_loss: -0.0125732421875|unsuper_loss: 0.0 +average reward score: -2.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.16%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +[2023-04-21 23:52:23,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=7, lr=[7.414559136009473e-06, 7.414559136009473e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:52:23,707] [INFO] [timer.py:199:stop] epoch=0/micro_step=380/global_step=380, RunningAvgSamplesPerSec=128.4265231794791, CurrSamplesPerSec=129.32609768130243, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:52:23,800] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=6, lr=[3.833971473288084e-06, 3.833971473288084e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 379|ppo_ep: 1|act_loss: 0.0286102294921875|cri_loss: 0.016632080078125|unsuper_loss: 0.0 +average reward score: -2.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 380|ppo_ep: 1|act_loss: 0.005611419677734375|cri_loss: 0.0057220458984375|unsuper_loss: 0.0 +average reward score: -2.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.24%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 381|ppo_ep: 1|act_loss: 0.0736083984375|cri_loss: 0.0499267578125|unsuper_loss: 0.0 +average reward score: -2.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.93%) |Training time=0.41s (18.99%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.80 +epoch: 0|step: 382|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: 0.00220489501953125|unsuper_loss: 0.0 +average reward score: -2.951171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.57%) |Training time=0.42s (19.26%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.80 +epoch: 0|step: 383|ppo_ep: 1|act_loss: 0.1107177734375|cri_loss: 0.07763671875|unsuper_loss: 0.0 +average reward score: -2.509765625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.34%) |Training time=0.43s (19.56%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.80 +epoch: 0|step: 384|ppo_ep: 1|act_loss: 0.1396484375|cri_loss: 0.0743408203125|unsuper_loss: 0.0 +average reward score: -2.666015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.12%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 385|ppo_ep: 1|act_loss: 0.2626953125|cri_loss: 0.140869140625|unsuper_loss: 0.0 +average reward score: -2.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80 +epoch: 0|step: 386|ppo_ep: 1|act_loss: 0.2802734375|cri_loss: 0.16455078125|unsuper_loss: 0.0 +average reward score: -2.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.12%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 387|ppo_ep: 1|act_loss: 0.134521484375|cri_loss: 0.08135986328125|unsuper_loss: 0.0 +average reward score: -2.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.87%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 388|ppo_ep: 1|act_loss: 0.43701171875|cri_loss: 0.2939453125|unsuper_loss: 0.0 +average reward score: -2.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.07%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +[2023-04-21 23:52:45,396] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[7.263073880785904e-06, 7.263073880785904e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:52:45,415] [INFO] [timer.py:199:stop] epoch=0/micro_step=390/global_step=390, RunningAvgSamplesPerSec=128.43768873407188, CurrSamplesPerSec=130.7252716427912, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:52:45,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=6, lr=[3.7553059527082913e-06, 3.7553059527082913e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 389|ppo_ep: 1|act_loss: -0.04669189453125|cri_loss: -0.006866455078125|unsuper_loss: 0.0 +average reward score: -1.6533203125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.65%) |Training time=0.41s (17.61%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.80 +epoch: 0|step: 390|ppo_ep: 1|act_loss: 0.0146636962890625|cri_loss: 0.017120361328125|unsuper_loss: 0.0 +average reward score: -2.138671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.23%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 391|ppo_ep: 1|act_loss: -0.08355712890625|cri_loss: 0.01141357421875|unsuper_loss: 0.0 +average reward score: -1.2939453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 392|ppo_ep: 1|act_loss: 0.0709228515625|cri_loss: 0.086181640625|unsuper_loss: 0.0 +average reward score: -2.099609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.98%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 393|ppo_ep: 1|act_loss: -0.19091796875|cri_loss: -0.0277099609375|unsuper_loss: 0.0 +average reward score: -1.607421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.93%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 394|ppo_ep: 1|act_loss: -0.13671875|cri_loss: -0.04266357421875|unsuper_loss: 0.0 +average reward score: -2.095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.01%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 395|ppo_ep: 1|act_loss: -0.0543212890625|cri_loss: -0.0179901123046875|unsuper_loss: 0.0 +average reward score: -2.056640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80 +epoch: 0|step: 396|ppo_ep: 1|act_loss: -0.08978271484375|cri_loss: -0.009521484375|unsuper_loss: 0.0 +average reward score: -1.8330078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (18.89%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 397|ppo_ep: 1|act_loss: 0.1724853515625|cri_loss: 0.1109619140625|unsuper_loss: 0.0 +average reward score: -2.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 398|ppo_ep: 1|act_loss: 0.001979827880859375|cri_loss: 0.0027256011962890625|unsuper_loss: 0.0 +average reward score: -1.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.22%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +[2023-04-21 23:53:06,868] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[7.108289628803138e-06, 7.108289628803138e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:53:06,886] [INFO] [timer.py:199:stop] epoch=0/micro_step=400/global_step=400, RunningAvgSamplesPerSec=128.47943609625108, CurrSamplesPerSec=132.42613482199326, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:53:06,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=6, lr=[3.6749418575683005e-06, 3.6749418575683005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 399|ppo_ep: 1|act_loss: -0.1566162109375|cri_loss: -0.063720703125|unsuper_loss: 0.0 +average reward score: -1.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.19%) |Training time=0.40s (18.74%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.80 +[2023-04-21 23:53:09,003] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-21 23:53:09,088] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 400|ppo_ep: 1|act_loss: 0.1396484375|cri_loss: 0.07928466796875|unsuper_loss: 0.0 +average reward score: -3.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.99%) |Training time=0.38s (18.15%) |Others=0.10 (4.86%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.80 +epoch: 0|step: 401|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.018402099609375|unsuper_loss: 0.0 +average reward score: -1.294921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.89%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 402|ppo_ep: 1|act_loss: 0.045501708984375|cri_loss: 0.03936767578125|unsuper_loss: 0.0 +average reward score: -0.3251953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.10%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 403|ppo_ep: 1|act_loss: 0.0083465576171875|cri_loss: 0.013916015625|unsuper_loss: 0.0 +average reward score: -1.5771484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 404|ppo_ep: 1|act_loss: 0.258544921875|cri_loss: 0.1500244140625|unsuper_loss: 0.0 +average reward score: -1.0009765625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.63%) |Training time=0.41s (17.58%) |Others=0.11 (4.79%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.80 +epoch: 0|step: 405|ppo_ep: 1|act_loss: 0.07568359375|cri_loss: 0.05047607421875|unsuper_loss: 0.0 +average reward score: -1.263671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 406|ppo_ep: 1|act_loss: 0.085693359375|cri_loss: 0.05291748046875|unsuper_loss: 0.0 +average reward score: -0.6455078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (18.92%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 407|ppo_ep: 1|act_loss: 0.181884765625|cri_loss: 0.09857177734375|unsuper_loss: 0.0 +average reward score: -2.736328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.98%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 408|ppo_ep: 1|act_loss: 0.158447265625|cri_loss: 0.0875244140625|unsuper_loss: 0.0 +average reward score: -1.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.17%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +[2023-04-21 23:53:28,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=8, lr=[6.966336175129223e-06, 6.966336175129223e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:53:28,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=410/global_step=410, RunningAvgSamplesPerSec=128.54989718289136, CurrSamplesPerSec=128.8609771786822, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:53:28,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[3.6012517207813124e-06, 3.6012517207813124e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 409|ppo_ep: 1|act_loss: 0.095703125|cri_loss: 0.05157470703125|unsuper_loss: 0.0 +average reward score: -2.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.16%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 410|ppo_ep: 1|act_loss: 0.05682373046875|cri_loss: 0.037109375|unsuper_loss: 0.0 +average reward score: -2.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.22%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 411|ppo_ep: 1|act_loss: -0.013153076171875|cri_loss: -0.00165557861328125|unsuper_loss: 0.0 +average reward score: -1.658203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 412|ppo_ep: 1|act_loss: -0.0157012939453125|cri_loss: 0.0013580322265625|unsuper_loss: 0.0 +average reward score: -1.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.42s (19.57%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.80 +epoch: 0|step: 413|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0190582275390625|unsuper_loss: 0.0 +average reward score: -2.056640625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.46%) |Training time=0.44s (19.59%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.80 +epoch: 0|step: 414|ppo_ep: 1|act_loss: -0.0297698974609375|cri_loss: -0.0040740966796875|unsuper_loss: 0.0 +average reward score: -0.94189453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.05%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 415|ppo_ep: 1|act_loss: -0.07012939453125|cri_loss: -0.0224456787109375|unsuper_loss: 0.0 +average reward score: -1.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80 +epoch: 0|step: 416|ppo_ep: 1|act_loss: 0.0183258056640625|cri_loss: 0.01177978515625|unsuper_loss: 0.0 +average reward score: -1.537109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.21%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80 +epoch: 0|step: 417|ppo_ep: 1|act_loss: 0.04827880859375|cri_loss: 0.031829833984375|unsuper_loss: 0.0 +average reward score: -1.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.40s (18.88%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 418|ppo_ep: 1|act_loss: 0.0028438568115234375|cri_loss: 0.00577545166015625|unsuper_loss: 0.0 +average reward score: -2.248046875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.76%) |Training time=0.40s (17.36%) |Others=0.11 (4.88%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.80 +[2023-04-21 23:53:50,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=8, lr=[6.805864300541598e-06, 6.805864300541598e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:53:50,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=420/global_step=420, RunningAvgSamplesPerSec=128.56527804260318, CurrSamplesPerSec=128.33657606523457, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:53:50,307] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[3.5179616991058513e-06, 3.5179616991058513e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 419|ppo_ep: 1|act_loss: 0.1199951171875|cri_loss: 0.06622314453125|unsuper_loss: 0.0 +average reward score: -1.939453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 420|ppo_ep: 1|act_loss: 0.07080078125|cri_loss: 0.040679931640625|unsuper_loss: 0.0 +average reward score: -0.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.25%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 421|ppo_ep: 1|act_loss: 0.10882568359375|cri_loss: 0.05914306640625|unsuper_loss: 0.0 +average reward score: -1.1044921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.20%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 422|ppo_ep: 1|act_loss: 0.1099853515625|cri_loss: 0.0634765625|unsuper_loss: 0.0 +average reward score: -1.3974609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 423|ppo_ep: 1|act_loss: 0.1416015625|cri_loss: 0.07623291015625|unsuper_loss: 0.0 +average reward score: -1.330078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.41s (18.99%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 424|ppo_ep: 1|act_loss: -0.062744140625|cri_loss: -0.02197265625|unsuper_loss: 0.0 +average reward score: -2.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.06%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 425|ppo_ep: 1|act_loss: -0.0972900390625|cri_loss: -0.0452880859375|unsuper_loss: 0.0 +average reward score: -0.423583984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 426|ppo_ep: 1|act_loss: -0.034149169921875|cri_loss: -0.012603759765625|unsuper_loss: 0.0 +average reward score: -1.7529296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 427|ppo_ep: 1|act_loss: 0.03985595703125|cri_loss: 0.031280517578125|unsuper_loss: 0.0 +average reward score: -1.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.11%) |Training time=0.40s (18.77%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.80 +epoch: 0|step: 428|ppo_ep: 1|act_loss: -0.0450439453125|cri_loss: -0.0140380859375|unsuper_loss: 0.0 +average reward score: -1.677734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +[2023-04-21 23:54:11,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=8, lr=[6.642712086789769e-06, 6.642712086789769e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:54:11,646] [INFO] [timer.py:199:stop] epoch=0/micro_step=430/global_step=430, RunningAvgSamplesPerSec=128.5960552961456, CurrSamplesPerSec=130.29430586188687, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:54:11,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[3.43329425717549e-06, 3.43329425717549e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 429|ppo_ep: 1|act_loss: -0.02001953125|cri_loss: -0.00357818603515625|unsuper_loss: 0.0 +average reward score: -1.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.08%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 430|ppo_ep: 1|act_loss: -0.10736083984375|cri_loss: -0.04388427734375|unsuper_loss: 0.0 +average reward score: -0.8544921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 431|ppo_ep: 1|act_loss: -0.02264404296875|cri_loss: -0.003387451171875|unsuper_loss: 0.0 +average reward score: -1.4892578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.20%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 432|ppo_ep: 1|act_loss: -0.01134490966796875|cri_loss: 1.52587890625e-05|unsuper_loss: 0.0 +average reward score: -1.4912109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 433|ppo_ep: 1|act_loss: 0.04595947265625|cri_loss: 0.03076171875|unsuper_loss: 0.0 +average reward score: -0.72021484375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.87%) |Training time=0.40s (17.38%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.80 +epoch: 0|step: 434|ppo_ep: 1|act_loss: 0.06781005859375|cri_loss: 0.0389404296875|unsuper_loss: 0.0 +average reward score: -0.701171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 435|ppo_ep: 1|act_loss: 0.159423828125|cri_loss: 0.0850830078125|unsuper_loss: 0.0 +average reward score: -1.384765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.41%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 436|ppo_ep: 1|act_loss: 0.006256103515625|cri_loss: 0.0110015869140625|unsuper_loss: 0.0 +average reward score: -0.005615234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 437|ppo_ep: 1|act_loss: 0.0570068359375|cri_loss: 0.031219482421875|unsuper_loss: 0.0 +average reward score: -0.34619140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 438|ppo_ep: 1|act_loss: 0.08782958984375|cri_loss: 0.05389404296875|unsuper_loss: 0.0 +average reward score: -0.9169921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.90%) |Training time=0.41s (18.96%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +[2023-04-21 23:54:33,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=8, lr=[6.477100297743388e-06, 6.477100297743388e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:54:33,245] [INFO] [timer.py:199:stop] epoch=0/micro_step=440/global_step=440, RunningAvgSamplesPerSec=128.61663589271615, CurrSamplesPerSec=129.5595441496437, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:54:33,331] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +[2023-04-21 23:54:33,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=8, lr=[3.356010516965486e-06, 3.356010516965486e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 439|ppo_ep: 1|act_loss: 0.0823974609375|cri_loss: 0.0489501953125|unsuper_loss: 0.0 +average reward score: -0.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.07%) |Training time=0.41s (19.20%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.80 +epoch: 0|step: 440|ppo_ep: 1|act_loss: 0.05072021484375|cri_loss: 0.03387451171875|unsuper_loss: 0.0 +average reward score: -0.57275390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.11%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 441|ppo_ep: 1|act_loss: -0.08514404296875|cri_loss: -0.037109375|unsuper_loss: 0.0 +average reward score: -0.2174072265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.41s (18.98%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 442|ppo_ep: 1|act_loss: -0.02337646484375|cri_loss: 0.000213623046875|unsuper_loss: 0.0 +average reward score: -1.169921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.43%) |Training time=0.41s (19.20%) |Others=0.12 (5.37%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.80 +epoch: 0|step: 443|ppo_ep: 1|act_loss: 0.06756591796875|cri_loss: 0.038299560546875|unsuper_loss: 0.0 +average reward score: -1.412109375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.45%) |Training time=0.44s (19.66%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.80 +epoch: 0|step: 444|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.0350341796875|unsuper_loss: 0.0 +average reward score: -0.250732421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.97%) |Training time=0.40s (18.88%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 445|ppo_ep: 1|act_loss: 0.0426025390625|cri_loss: 0.029022216796875|unsuper_loss: 0.0 +average reward score: -0.160888671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80 +epoch: 0|step: 446|ppo_ep: 1|act_loss: -0.05694580078125|cri_loss: -0.0224609375|unsuper_loss: 0.0 +average reward score: -1.337890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 447|ppo_ep: 1|act_loss: -0.225341796875|cri_loss: -0.06365966796875|unsuper_loss: 0.0 +average reward score: -0.393310546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.87%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 448|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.00424957275390625|unsuper_loss: 0.0 +average reward score: -1.837890625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.86%) |Training time=0.40s (17.37%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.80 +[2023-04-21 23:54:54,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=8, lr=[6.309253025362775e-06, 6.309253025362775e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:54:54,967] [INFO] [timer.py:199:stop] epoch=0/micro_step=450/global_step=450, RunningAvgSamplesPerSec=128.63759509044516, CurrSamplesPerSec=130.90759846520874, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:54:55,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=8, lr=[3.2690430183226814e-06, 3.2690430183226814e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 449|ppo_ep: 1|act_loss: 0.0267791748046875|cri_loss: 0.0187225341796875|unsuper_loss: 0.0 +average reward score: -0.71337890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.05%) |Training time=0.41s (18.87%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.80 +epoch: 0|step: 450|ppo_ep: 1|act_loss: 0.006755828857421875|cri_loss: 0.0121002197265625|unsuper_loss: 0.0 +average reward score: -0.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 451|ppo_ep: 1|act_loss: 0.0018024444580078125|cri_loss: 0.0056610107421875|unsuper_loss: 0.0 +average reward score: -0.9111328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.41s (18.99%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 452|ppo_ep: 1|act_loss: 0.0628662109375|cri_loss: 0.03326416015625|unsuper_loss: 0.0 +average reward score: -0.74658203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.91%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 453|ppo_ep: 1|act_loss: 0.912109375|cri_loss: 0.58984375|unsuper_loss: 0.0 +average reward score: -0.396728515625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.04%) |Training time=0.40s (18.82%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.80 +epoch: 0|step: 454|ppo_ep: 1|act_loss: -0.08624267578125|cri_loss: -0.029388427734375|unsuper_loss: 0.0 +average reward score: -0.7763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.93%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 455|ppo_ep: 1|act_loss: -0.068359375|cri_loss: -0.0296630859375|unsuper_loss: 0.0 +average reward score: -0.63037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.25%) |Training time=0.40s (18.65%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.80 +epoch: 0|step: 456|ppo_ep: 1|act_loss: -0.039031982421875|cri_loss: -0.0080718994140625|unsuper_loss: 0.0 +average reward score: -1.189453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.04%) |Training time=0.40s (18.82%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +epoch: 0|step: 457|ppo_ep: 1|act_loss: -0.0726318359375|cri_loss: -0.0270233154296875|unsuper_loss: 0.0 +average reward score: -1.064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.12%) |Training time=0.40s (18.75%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.80 +epoch: 0|step: 458|ppo_ep: 1|act_loss: -0.0672607421875|cri_loss: -0.027008056640625|unsuper_loss: 0.0 +average reward score: -1.244140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.86%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80 +[2023-04-21 23:55:16,358] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=8, lr=[6.139397386476498e-06, 6.139397386476498e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:55:16,377] [INFO] [timer.py:199:stop] epoch=0/micro_step=460/global_step=460, RunningAvgSamplesPerSec=128.72123352291797, CurrSamplesPerSec=131.59581302136237, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:55:16,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=8, lr=[3.181034915272797e-06, 3.181034915272797e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 459|ppo_ep: 1|act_loss: 0.0132598876953125|cri_loss: 0.01461029052734375|unsuper_loss: 0.0 +average reward score: -0.372314453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 460|ppo_ep: 1|act_loss: 0.023040771484375|cri_loss: 0.0181884765625|unsuper_loss: 0.0 +average reward score: -1.369140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80 +epoch: 0|step: 461|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.0188140869140625|unsuper_loss: 0.0 +average reward score: -1.083984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.88%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 462|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.041351318359375|unsuper_loss: 0.0 +average reward score: 0.066162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 463|ppo_ep: 1|act_loss: 0.15478515625|cri_loss: 0.08837890625|unsuper_loss: 0.0 +average reward score: -0.317138671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 464|ppo_ep: 1|act_loss: 0.084228515625|cri_loss: 0.044769287109375|unsuper_loss: 0.0 +average reward score: -1.904296875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.77%) |Training time=0.40s (17.46%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.80 +epoch: 0|step: 465|ppo_ep: 1|act_loss: 0.0687255859375|cri_loss: 0.0438232421875|unsuper_loss: 0.0 +average reward score: 0.1611328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.28%) |Training time=0.40s (18.58%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +[2023-04-21 23:55:31,622] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 466|ppo_ep: 1|act_loss: 1.84765625|cri_loss: 1.236328125|unsuper_loss: 0.0 +average reward score: -1.001953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.24%) |Training time=0.41s (19.01%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.80 +epoch: 0|step: 467|ppo_ep: 1|act_loss: -0.025054931640625|cri_loss: -0.00600433349609375|unsuper_loss: 0.0 +average reward score: -1.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 468|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0174407958984375|unsuper_loss: 0.0 +average reward score: -0.268310546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +[2023-04-21 23:55:37,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=8, lr=[5.967763215465968e-06, 5.967763215465968e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:55:37,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=470/global_step=470, RunningAvgSamplesPerSec=128.79156542499263, CurrSamplesPerSec=132.30629889979465, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:55:38,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=9, lr=[3.101036303152072e-06, 3.101036303152072e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 469|ppo_ep: 1|act_loss: -0.08905029296875|cri_loss: -0.01934814453125|unsuper_loss: 0.0 +average reward score: 0.006591796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.88%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 470|ppo_ep: 1|act_loss: 0.021240234375|cri_loss: 0.0150299072265625|unsuper_loss: 0.0 +average reward score: -1.2783203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.04%) |Training time=0.40s (18.84%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 471|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.015380859375|unsuper_loss: 0.0 +average reward score: -0.73095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.92%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 472|ppo_ep: 1|act_loss: -0.0802001953125|cri_loss: -0.0303192138671875|unsuper_loss: 0.0 +average reward score: -1.0439453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 473|ppo_ep: 1|act_loss: 0.0972900390625|cri_loss: 0.06365966796875|unsuper_loss: 0.0 +average reward score: -1.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.05%) |Training time=0.41s (18.07%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.80 +epoch: 0|step: 474|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: 0.0012054443359375|unsuper_loss: 0.0 +average reward score: -1.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 475|ppo_ep: 1|act_loss: 0.326904296875|cri_loss: 0.1962890625|unsuper_loss: 0.0 +average reward score: -1.3583984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.04%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80 +epoch: 0|step: 476|ppo_ep: 1|act_loss: -0.06396484375|cri_loss: -0.026275634765625|unsuper_loss: 0.0 +average reward score: -0.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 477|ppo_ep: 1|act_loss: 0.092529296875|cri_loss: 0.066650390625|unsuper_loss: 0.0 +average reward score: -0.70263671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.12%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 478|ppo_ep: 1|act_loss: -0.096435546875|cri_loss: -0.0382080078125|unsuper_loss: 0.0 +average reward score: 0.263427734375 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.51%) |Training time=0.45s (19.68%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.80 +[2023-04-21 23:55:59,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=8, lr=[5.794582753272854e-06, 5.794582753272854e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:55:59,674] [INFO] [timer.py:199:stop] epoch=0/micro_step=480/global_step=480, RunningAvgSamplesPerSec=128.78813216201362, CurrSamplesPerSec=128.829807779064, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:55:59,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=9, lr=[3.011380165315503e-06, 3.011380165315503e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 479|ppo_ep: 1|act_loss: -0.2470703125|cri_loss: -0.0894775390625|unsuper_loss: 0.0 +average reward score: -1.857421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +epoch: 0|step: 480|ppo_ep: 1|act_loss: -0.152099609375|cri_loss: -0.05255126953125|unsuper_loss: 0.0 +average reward score: -0.74072265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 481|ppo_ep: 1|act_loss: -0.06427001953125|cri_loss: -0.022613525390625|unsuper_loss: 0.0 +average reward score: -1.025390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80 +epoch: 0|step: 482|ppo_ep: 1|act_loss: 0.171630859375|cri_loss: 0.1043701171875|unsuper_loss: 0.0 +average reward score: -1.021484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.29%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80 +[2023-04-21 23:56:08,239] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 483|ppo_ep: 1|act_loss: 0.046478271484375|cri_loss: 0.03521728515625|unsuper_loss: 0.0 +average reward score: -1.263671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.70%) |Training time=0.39s (18.14%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.80 +epoch: 0|step: 484|ppo_ep: 1|act_loss: -0.138427734375|cri_loss: -0.059814453125|unsuper_loss: 0.0 +average reward score: -0.9912109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80 +epoch: 0|step: 485|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0283355712890625|unsuper_loss: 0.0 +average reward score: -1.763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 486|ppo_ep: 1|act_loss: -0.0673828125|cri_loss: -0.023956298828125|unsuper_loss: 0.0 +average reward score: -1.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.11%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 487|ppo_ep: 1|act_loss: -0.082275390625|cri_loss: -0.02606201171875|unsuper_loss: 0.0 +average reward score: -0.16162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.42s (19.35%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 488|ppo_ep: 1|act_loss: 0.11376953125|cri_loss: 0.06549072265625|unsuper_loss: 0.0 +average reward score: -2.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-21 23:56:21,083] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[5.637591896641978e-06, 5.637591896641978e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:56:21,102] [INFO] [timer.py:199:stop] epoch=0/micro_step=490/global_step=490, RunningAvgSamplesPerSec=128.80691152053797, CurrSamplesPerSec=129.65503757277642, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:56:21,194] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[2.9210320707989525e-06, 2.9210320707989525e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 489|ppo_ep: 1|act_loss: 0.3994140625|cri_loss: 0.248779296875|unsuper_loss: 0.0 +average reward score: -1.482421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 490|ppo_ep: 1|act_loss: 0.0994873046875|cri_loss: 0.056243896484375|unsuper_loss: 0.0 +average reward score: -2.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 491|ppo_ep: 1|act_loss: 0.0140380859375|cri_loss: 0.0095672607421875|unsuper_loss: 0.0 +average reward score: -0.3212890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.18%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 492|ppo_ep: 1|act_loss: 0.0175933837890625|cri_loss: 0.011749267578125|unsuper_loss: 0.0 +average reward score: -1.095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.42s (19.37%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 493|ppo_ep: 1|act_loss: 0.026275634765625|cri_loss: 0.01421356201171875|unsuper_loss: 0.0 +average reward score: -1.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.77%) |Training time=0.41s (18.48%) |Others=0.19 (8.75%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.81 +epoch: 0|step: 494|ppo_ep: 1|act_loss: -0.0537109375|cri_loss: -0.0220489501953125|unsuper_loss: 0.0 +average reward score: -1.533203125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.70%) |Training time=0.41s (18.34%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.81 +epoch: 0|step: 495|ppo_ep: 1|act_loss: -0.1220703125|cri_loss: -0.0538330078125|unsuper_loss: 0.0 +average reward score: -1.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.41%) |Training time=0.42s (19.47%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 496|ppo_ep: 1|act_loss: 0.113037109375|cri_loss: 0.06378173828125|unsuper_loss: 0.0 +average reward score: -0.7041015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.42s (19.55%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 497|ppo_ep: 1|act_loss: -0.0479736328125|cri_loss: -0.0192718505859375|unsuper_loss: 0.0 +average reward score: -1.0888671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 498|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.0181121826171875|unsuper_loss: 0.0 +average reward score: -1.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.36%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-21 23:56:42,686] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[5.462120543134245e-06, 5.462120543134245e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:56:42,705] [INFO] [timer.py:199:stop] epoch=0/micro_step=500/global_step=500, RunningAvgSamplesPerSec=128.77564245686753, CurrSamplesPerSec=126.7796903470301, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:56:42,797] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[2.830114271054013e-06, 2.830114271054013e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 499|ppo_ep: 1|act_loss: 0.13818359375|cri_loss: 0.07745361328125|unsuper_loss: 0.0 +average reward score: -0.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.42s (19.37%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 500|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.0131988525390625|unsuper_loss: 0.0 +average reward score: -0.96826171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.42%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 501|ppo_ep: 1|act_loss: 0.0234832763671875|cri_loss: 0.0179595947265625|unsuper_loss: 0.0 +average reward score: -1.193359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.41s (19.23%) |Others=0.12 (5.39%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 502|ppo_ep: 1|act_loss: -0.00209808349609375|cri_loss: 0.0024261474609375|unsuper_loss: 0.0 +average reward score: -0.5439453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.45%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 503|ppo_ep: 1|act_loss: 0.1025390625|cri_loss: 0.0540771484375|unsuper_loss: 0.0 +average reward score: 0.048095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.23%) |Training time=0.41s (17.91%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.81 +epoch: 0|step: 504|ppo_ep: 1|act_loss: 0.099609375|cri_loss: 0.0516357421875|unsuper_loss: 0.0 +average reward score: -0.701171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.16%) |Training time=0.40s (18.72%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 505|ppo_ep: 1|act_loss: 0.077392578125|cri_loss: 0.03948974609375|unsuper_loss: 0.0 +average reward score: -1.853515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.07%) |Training time=0.40s (18.81%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 506|ppo_ep: 1|act_loss: 0.029998779296875|cri_loss: 0.021331787109375|unsuper_loss: 0.0 +average reward score: -0.82861328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.04%) |Training time=0.40s (18.84%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 507|ppo_ep: 1|act_loss: 0.04705810546875|cri_loss: 0.0357666015625|unsuper_loss: 0.0 +average reward score: -0.2374267578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.41s (18.90%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 508|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.00775146484375|unsuper_loss: 0.0 +average reward score: -1.1572265625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.95%) |Training time=0.46s (20.18%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.81 +[2023-04-21 23:57:04,357] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[5.2857870916303926e-06, 5.2857870916303926e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:57:04,376] [INFO] [timer.py:199:stop] epoch=0/micro_step=510/global_step=510, RunningAvgSamplesPerSec=128.8105377562441, CurrSamplesPerSec=134.4819851007229, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:57:04,468] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[2.7387497884095297e-06, 2.7387497884095297e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 509|ppo_ep: 1|act_loss: 0.045013427734375|cri_loss: 0.0234375|unsuper_loss: 0.0 +average reward score: -0.90380859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.14%) |Training time=0.40s (18.74%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 510|ppo_ep: 1|act_loss: -0.018096923828125|cri_loss: -0.0037841796875|unsuper_loss: 0.0 +average reward score: -0.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.85%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 511|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.0094146728515625|unsuper_loss: 0.0 +average reward score: -1.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.09%) |Training time=0.41s (18.83%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81 +epoch: 0|step: 512|ppo_ep: 1|act_loss: -0.01555633544921875|cri_loss: -0.003448486328125|unsuper_loss: 0.0 +average reward score: -1.1650390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.89%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 513|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0181121826171875|unsuper_loss: 0.0 +average reward score: 0.0504150390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.12%) |Training time=0.40s (18.75%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 514|ppo_ep: 1|act_loss: 0.25244140625|cri_loss: 0.17333984375|unsuper_loss: 0.0 +average reward score: -1.330078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.05%) |Training time=0.40s (18.83%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 515|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0131683349609375|unsuper_loss: 0.0 +average reward score: -1.1142578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.88%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 516|ppo_ep: 1|act_loss: -0.0731201171875|cri_loss: -0.03143310546875|unsuper_loss: 0.0 +average reward score: 0.60595703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.10%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 517|ppo_ep: 1|act_loss: 0.055328369140625|cri_loss: 0.032196044921875|unsuper_loss: 0.0 +average reward score: -1.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.10%) |Training time=0.40s (18.77%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 518|ppo_ep: 1|act_loss: 0.06329345703125|cri_loss: 0.0347900390625|unsuper_loss: 0.0 +average reward score: -1.052734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.95%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-21 23:57:25,811] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[5.108830141743677e-06, 5.108830141743677e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:57:25,829] [INFO] [timer.py:199:stop] epoch=0/micro_step=520/global_step=520, RunningAvgSamplesPerSec=128.87020056334387, CurrSamplesPerSec=131.38493736546326, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:57:25,922] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[2.647062249608123e-06, 2.647062249608123e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 519|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.025726318359375|unsuper_loss: 0.0 +average reward score: -0.285888671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.95%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 520|ppo_ep: 1|act_loss: 0.00290679931640625|cri_loss: 0.004146575927734375|unsuper_loss: 0.0 +average reward score: -1.4697265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.07%) |Training time=0.41s (18.85%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.81 +epoch: 0|step: 521|ppo_ep: 1|act_loss: 0.1004638671875|cri_loss: 0.05633544921875|unsuper_loss: 0.0 +average reward score: -1.466796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.06%) |Training time=0.40s (18.80%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 522|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.028961181640625|unsuper_loss: 0.0 +average reward score: -0.830078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.85%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 523|ppo_ep: 1|act_loss: -0.016326904296875|cri_loss: -0.002349853515625|unsuper_loss: 0.0 +average reward score: -0.36962890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.93%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 524|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.0178985595703125|unsuper_loss: 0.0 +average reward score: -0.12548828125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.81s (78.13%) |Training time=0.40s (17.11%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.81 +epoch: 0|step: 525|ppo_ep: 1|act_loss: 0.04315185546875|cri_loss: 0.022186279296875|unsuper_loss: 0.0 +average reward score: -1.3505859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 526|ppo_ep: 1|act_loss: -0.0006580352783203125|cri_loss: 0.001468658447265625|unsuper_loss: 0.0 +average reward score: -1.166015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.98%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 527|ppo_ep: 1|act_loss: 0.50244140625|cri_loss: 0.341796875|unsuper_loss: 0.0 +average reward score: 0.2208251953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.06%) |Training time=0.40s (18.80%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 528|ppo_ep: 1|act_loss: -0.14111328125|cri_loss: -0.06561279296875|unsuper_loss: 0.0 +average reward score: -0.4248046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-21 23:57:47,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[4.93148913675299e-06, 4.93148913675299e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:57:47,453] [INFO] [timer.py:199:stop] epoch=0/micro_step=530/global_step=530, RunningAvgSamplesPerSec=128.93183578328944, CurrSamplesPerSec=131.0393681261014, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:57:47,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[2.5551757185248656e-06, 2.5551757185248656e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 529|ppo_ep: 1|act_loss: -0.06744384765625|cri_loss: -0.027130126953125|unsuper_loss: 0.0 +average reward score: -1.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 530|ppo_ep: 1|act_loss: -0.12005615234375|cri_loss: -0.0482177734375|unsuper_loss: 0.0 +average reward score: -0.04510498046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 531|ppo_ep: 1|act_loss: -0.0714111328125|cri_loss: -0.02630615234375|unsuper_loss: 0.0 +average reward score: -1.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 532|ppo_ep: 1|act_loss: -0.00323486328125|cri_loss: 0.02301025390625|unsuper_loss: 0.0 +average reward score: -0.34423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.42s (19.22%) |Others=0.12 (5.73%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.81 +epoch: 0|step: 533|ppo_ep: 1|act_loss: 0.04156494140625|cri_loss: 0.02337646484375|unsuper_loss: 0.0 +average reward score: -0.6787109375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.08%) |Training time=0.40s (18.02%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.81 +epoch: 0|step: 534|ppo_ep: 1|act_loss: -0.1334228515625|cri_loss: -0.061859130859375|unsuper_loss: 0.0 +average reward score: -0.400146484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 535|ppo_ep: 1|act_loss: 0.0897216796875|cri_loss: 0.05316162109375|unsuper_loss: 0.0 +average reward score: -0.697265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.21%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 536|ppo_ep: 1|act_loss: -0.072998046875|cri_loss: -0.0137939453125|unsuper_loss: 0.0 +average reward score: -0.5869140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 537|ppo_ep: 1|act_loss: 0.0777587890625|cri_loss: 0.043487548828125|unsuper_loss: 0.0 +average reward score: -0.93505859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 538|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.035186767578125|unsuper_loss: 0.0 +average reward score: -0.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-21 23:58:09,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[4.754004039608327e-06, 4.754004039608327e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:58:09,198] [INFO] [timer.py:199:stop] epoch=0/micro_step=540/global_step=540, RunningAvgSamplesPerSec=128.9580352665631, CurrSamplesPerSec=133.35439166833916, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:58:09,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[2.46321452829447e-06, 2.46321452829447e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 539|ppo_ep: 1|act_loss: 0.716796875|cri_loss: 0.46044921875|unsuper_loss: 0.0 +average reward score: -0.89599609375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.00%) |Training time=0.40s (17.31%) |Others=0.11 (4.70%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.81 +epoch: 0|step: 540|ppo_ep: 1|act_loss: -0.08453369140625|cri_loss: -0.0290374755859375|unsuper_loss: 0.0 +average reward score: -0.1602783203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 541|ppo_ep: 1|act_loss: 0.0238800048828125|cri_loss: 0.01302337646484375|unsuper_loss: 0.0 +average reward score: -0.55712890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 542|ppo_ep: 1|act_loss: 0.022369384765625|cri_loss: 0.011962890625|unsuper_loss: 0.0 +average reward score: -0.40478515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 543|ppo_ep: 1|act_loss: 0.000453948974609375|cri_loss: 0.0005359649658203125|unsuper_loss: 0.0 +average reward score: -0.52783203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 544|ppo_ep: 1|act_loss: -0.10272216796875|cri_loss: -0.04510498046875|unsuper_loss: 0.0 +average reward score: 0.463134765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.12%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 545|ppo_ep: 1|act_loss: -0.06622314453125|cri_loss: -0.0285797119140625|unsuper_loss: 0.0 +average reward score: -0.488037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (18.99%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 546|ppo_ep: 1|act_loss: 0.054931640625|cri_loss: 0.03485107421875|unsuper_loss: 0.0 +average reward score: -0.81201171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 547|ppo_ep: 1|act_loss: 0.05303955078125|cri_loss: 0.0277099609375|unsuper_loss: 0.0 +average reward score: -1.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 548|ppo_ep: 1|act_loss: 0.01468658447265625|cri_loss: 0.00971221923828125|unsuper_loss: 0.0 +average reward score: -1.755859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-21 23:58:30,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[4.576615008233078e-06, 4.576615008233078e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:58:30,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=550/global_step=550, RunningAvgSamplesPerSec=128.96546878661252, CurrSamplesPerSec=129.5923189519664, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:58:30,720] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[2.371303113074134e-06, 2.371303113074134e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 549|ppo_ep: 1|act_loss: -0.003719329833984375|cri_loss: 0.00133514404296875|unsuper_loss: 0.0 +average reward score: -0.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.15%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 550|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.020294189453125|unsuper_loss: 0.0 +average reward score: -1.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.38%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 551|ppo_ep: 1|act_loss: 0.033416748046875|cri_loss: 0.02197265625|unsuper_loss: 0.0 +average reward score: -0.27392578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.15%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 552|ppo_ep: 1|act_loss: 0.0675048828125|cri_loss: 0.03875732421875|unsuper_loss: 0.0 +average reward score: -0.7763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 553|ppo_ep: 1|act_loss: 0.0059967041015625|cri_loss: 0.0139923095703125|unsuper_loss: 0.0 +average reward score: -0.435546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 554|ppo_ep: 1|act_loss: 0.0028285980224609375|cri_loss: 0.004730224609375|unsuper_loss: 0.0 +average reward score: -0.368408203125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.26%) |Training time=0.50s (21.88%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.81 +epoch: 0|step: 555|ppo_ep: 1|act_loss: 0.0911865234375|cri_loss: 0.047454833984375|unsuper_loss: 0.0 +average reward score: -0.98876953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.88%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 556|ppo_ep: 1|act_loss: 0.09619140625|cri_loss: 0.05340576171875|unsuper_loss: 0.0 +average reward score: -0.49072265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.89%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 557|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.00739288330078125|unsuper_loss: 0.0 +average reward score: -1.9482421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.87%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 558|ppo_ep: 1|act_loss: 0.089111328125|cri_loss: 0.04901123046875|unsuper_loss: 0.0 +average reward score: -1.083984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.05%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-21 23:58:52,166] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[4.399562070562508e-06, 4.399562070562508e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:58:52,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=560/global_step=560, RunningAvgSamplesPerSec=128.95737058219476, CurrSamplesPerSec=130.43333521215456, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:58:52,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[2.279565839669693e-06, 2.279565839669693e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 559|ppo_ep: 1|act_loss: 0.047607421875|cri_loss: 0.025360107421875|unsuper_loss: 0.0 +average reward score: -1.916015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.02%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 560|ppo_ep: 1|act_loss: 0.0312042236328125|cri_loss: 0.02032470703125|unsuper_loss: 0.0 +average reward score: -1.3662109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 561|ppo_ep: 1|act_loss: 0.07525634765625|cri_loss: 0.038787841796875|unsuper_loss: 0.0 +average reward score: -1.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.85%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 562|ppo_ep: 1|act_loss: 0.0253753662109375|cri_loss: 0.015106201171875|unsuper_loss: 0.0 +average reward score: -1.615234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.43s (19.85%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.81 +epoch: 0|step: 563|ppo_ep: 1|act_loss: -0.0050201416015625|cri_loss: 0.00833892822265625|unsuper_loss: 0.0 +average reward score: 0.74658203125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.30%) |Training time=0.40s (17.80%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.81 +epoch: 0|step: 564|ppo_ep: 1|act_loss: 0.0182342529296875|cri_loss: 0.0108184814453125|unsuper_loss: 0.0 +average reward score: -0.347900390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 565|ppo_ep: 1|act_loss: 0.01153564453125|cri_loss: 0.007289886474609375|unsuper_loss: 0.0 +average reward score: 0.007080078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.02%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 566|ppo_ep: 1|act_loss: -0.0457763671875|cri_loss: -0.02069091796875|unsuper_loss: 0.0 +average reward score: -0.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.41s (19.08%) |Others=0.12 (5.46%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 567|ppo_ep: 1|act_loss: 0.033111572265625|cri_loss: 0.0208587646484375|unsuper_loss: 0.0 +average reward score: -1.6044921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 568|ppo_ep: 1|act_loss: 0.07696533203125|cri_loss: 0.043701171875|unsuper_loss: 0.0 +average reward score: -0.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-21 23:59:13,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[4.223084799758111e-06, 4.223084799758111e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:59:13,925] [INFO] [timer.py:199:stop] epoch=0/micro_step=570/global_step=570, RunningAvgSamplesPerSec=128.97926272081045, CurrSamplesPerSec=133.12384933005495, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:59:14,018] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[2.1881268392529074e-06, 2.1881268392529074e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 569|ppo_ep: 1|act_loss: 0.08038330078125|cri_loss: 0.04248046875|unsuper_loss: 0.0 +average reward score: -0.7685546875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.80%) |Training time=0.40s (17.45%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.81 +epoch: 0|step: 570|ppo_ep: 1|act_loss: 0.088623046875|cri_loss: 0.045562744140625|unsuper_loss: 0.0 +average reward score: -0.7060546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.08%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 571|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.005352020263671875|unsuper_loss: 0.0 +average reward score: -1.9697265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 572|ppo_ep: 1|act_loss: -0.0545654296875|cri_loss: -0.025299072265625|unsuper_loss: 0.0 +average reward score: -1.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.94%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 573|ppo_ep: 1|act_loss: 0.0059967041015625|cri_loss: 0.009002685546875|unsuper_loss: 0.0 +average reward score: 0.2705078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.13%) |Training time=0.40s (18.72%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 574|ppo_ep: 1|act_loss: -0.1192626953125|cri_loss: -0.054412841796875|unsuper_loss: 0.0 +average reward score: -0.132568359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 575|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.002105712890625|unsuper_loss: 0.0 +average reward score: -0.25244140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 576|ppo_ep: 1|act_loss: 0.136962890625|cri_loss: 0.0953369140625|unsuper_loss: 0.0 +average reward score: -1.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 577|ppo_ep: 1|act_loss: -0.00135040283203125|cri_loss: 0.005489349365234375|unsuper_loss: 0.0 +average reward score: -1.443359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.86%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 578|ppo_ep: 1|act_loss: 0.19091796875|cri_loss: 0.109130859375|unsuper_loss: 0.0 +average reward score: -1.5791015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-21 23:59:35,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=9, lr=[4.047421990037345e-06, 4.047421990037345e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:59:35,355] [INFO] [timer.py:199:stop] epoch=0/micro_step=580/global_step=580, RunningAvgSamplesPerSec=129.0139638309541, CurrSamplesPerSec=131.0647044061024, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:59:35,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=9, lr=[2.097109839397588e-06, 2.097109839397588e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 579|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: 0.0020294189453125|unsuper_loss: 0.0 +average reward score: -0.6689453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.01%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 580|ppo_ep: 1|act_loss: -0.198486328125|cri_loss: -0.082763671875|unsuper_loss: 0.0 +average reward score: -1.0498046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.93%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 581|ppo_ep: 1|act_loss: 0.08685302734375|cri_loss: 0.061767578125|unsuper_loss: 0.0 +average reward score: -0.341796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.11%) |Training time=0.40s (18.76%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 582|ppo_ep: 1|act_loss: -0.051513671875|cri_loss: -0.02471923828125|unsuper_loss: 0.0 +average reward score: -2.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.06%) |Training time=0.40s (18.80%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 583|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.024383544921875|unsuper_loss: 0.0 +average reward score: -0.962890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.89%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 584|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.025604248046875|unsuper_loss: 0.0 +average reward score: -0.109130859375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.85%) |Training time=0.40s (17.38%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 585|ppo_ep: 1|act_loss: 0.0477294921875|cri_loss: 0.0253143310546875|unsuper_loss: 0.0 +average reward score: -1.673828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.08%) |Training time=0.40s (18.77%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 586|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.03692626953125|unsuper_loss: 0.0 +average reward score: -1.9365234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.02%) |Training time=0.40s (18.85%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 587|ppo_ep: 1|act_loss: -0.0389404296875|cri_loss: -0.0133819580078125|unsuper_loss: 0.0 +average reward score: -0.0748291015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.05%) |Training time=0.40s (18.81%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 588|ppo_ep: 1|act_loss: 0.03643798828125|cri_loss: 0.019317626953125|unsuper_loss: 0.0 +average reward score: -0.373779296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.07%) |Training time=0.40s (18.79%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +[2023-04-21 23:59:56,914] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=9, lr=[3.872811333557339e-06, 3.872811333557339e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-21 23:59:56,932] [INFO] [timer.py:199:stop] epoch=0/micro_step=590/global_step=590, RunningAvgSamplesPerSec=129.0828609652265, CurrSamplesPerSec=133.9209591545136, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-21 23:59:57,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=9, lr=[2.0066379966618336e-06, 2.0066379966618336e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 589|ppo_ep: 1|act_loss: -0.04449462890625|cri_loss: -0.0187225341796875|unsuper_loss: 0.0 +average reward score: -0.70556640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.07%) |Training time=0.40s (18.80%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81 +epoch: 0|step: 590|ppo_ep: 1|act_loss: -0.0704345703125|cri_loss: -0.0311431884765625|unsuper_loss: 0.0 +average reward score: -1.0146484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 591|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.01136016845703125|unsuper_loss: 0.0 +average reward score: 0.38037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.13%) |Training time=0.40s (18.74%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 592|ppo_ep: 1|act_loss: -0.055938720703125|cri_loss: -0.02142333984375|unsuper_loss: 0.0 +average reward score: -0.204345703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.92%) |Training time=0.41s (18.97%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.81 +epoch: 0|step: 593|ppo_ep: 1|act_loss: -0.00064849853515625|cri_loss: 0.0021076202392578125|unsuper_loss: 0.0 +average reward score: -1.3173828125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.14%) |Training time=0.40s (17.96%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.81 +epoch: 0|step: 594|ppo_ep: 1|act_loss: 0.16748046875|cri_loss: 0.0906982421875|unsuper_loss: 0.0 +average reward score: -0.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.90%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 595|ppo_ep: 1|act_loss: 0.06268310546875|cri_loss: 0.035736083984375|unsuper_loss: 0.0 +average reward score: -0.80419921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.12%) |Training time=0.40s (18.74%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 596|ppo_ep: 1|act_loss: 0.0067596435546875|cri_loss: 0.0061798095703125|unsuper_loss: 0.0 +average reward score: -0.931640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.10%) |Training time=0.40s (18.76%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 597|ppo_ep: 1|act_loss: 0.08984375|cri_loss: 0.05364990234375|unsuper_loss: 0.0 +average reward score: 0.299560546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.88%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 598|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.022216796875|unsuper_loss: 0.0 +average reward score: -0.8349609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.02%) |Training time=0.40s (18.84%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:00:18,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=9, lr=[3.699489098789857e-06, 3.699489098789857e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:00:18,633] [INFO] [timer.py:199:stop] epoch=0/micro_step=600/global_step=600, RunningAvgSamplesPerSec=129.141982278309, CurrSamplesPerSec=133.55582532966616, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:00:18,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=9, lr=[1.916833729942931e-06, 1.916833729942931e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 599|ppo_ep: 1|act_loss: -0.0003910064697265625|cri_loss: 0.0005998611450195312|unsuper_loss: 0.0 +average reward score: -1.083984375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.79%) |Training time=0.40s (17.44%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 600|ppo_ep: 1|act_loss: 0.0038242340087890625|cri_loss: 0.00618743896484375|unsuper_loss: 0.0 +average reward score: -0.68017578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.86%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 601|ppo_ep: 1|act_loss: -0.0005950927734375|cri_loss: 0.00839996337890625|unsuper_loss: 0.0 +average reward score: -0.7646484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.91%) |Training time=0.41s (18.94%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 602|ppo_ep: 1|act_loss: -0.06658935546875|cri_loss: -0.0243072509765625|unsuper_loss: 0.0 +average reward score: -0.162353515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 603|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.00897216796875|unsuper_loss: 0.0 +average reward score: 0.247314453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 604|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.01361846923828125|unsuper_loss: 0.0 +average reward score: -0.9736328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 605|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.021392822265625|unsuper_loss: 0.0 +average reward score: -1.021484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.94%) |Training time=0.40s (18.91%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 606|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.0121307373046875|unsuper_loss: 0.0 +average reward score: -0.0771484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.41s (18.99%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 607|ppo_ep: 1|act_loss: -0.104248046875|cri_loss: -0.04425048828125|unsuper_loss: 0.0 +average reward score: 0.28271484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.91%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 608|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.015960693359375|unsuper_loss: 0.0 +average reward score: -1.5947265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-22 00:00:40,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=9, lr=[3.5276898108226314e-06, 3.5276898108226314e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:00:40,049] [INFO] [timer.py:199:stop] epoch=0/micro_step=610/global_step=610, RunningAvgSamplesPerSec=129.1724633970564, CurrSamplesPerSec=131.91333396889732, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:00:40,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=9, lr=[1.8278185548303789e-06, 1.8278185548303789e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 609|ppo_ep: 1|act_loss: -0.0159759521484375|cri_loss: -0.0038909912109375|unsuper_loss: 0.0 +average reward score: -0.2020263671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 610|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.0260009765625|unsuper_loss: 0.0 +average reward score: -0.194091796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.41s (19.05%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 611|ppo_ep: 1|act_loss: -0.04132080078125|cri_loss: -0.0162200927734375|unsuper_loss: 0.0 +average reward score: -0.73681640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.86%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 612|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.020477294921875|unsuper_loss: 0.0 +average reward score: -0.161376953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.41s (19.00%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 613|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.021728515625|unsuper_loss: 0.0 +average reward score: -1.361328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 614|ppo_ep: 1|act_loss: 0.0789794921875|cri_loss: 0.04876708984375|unsuper_loss: 0.0 +average reward score: -1.2548828125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.74%) |Training time=0.40s (17.48%) |Others=0.11 (4.78%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 615|ppo_ep: 1|act_loss: 0.032196044921875|cri_loss: 0.0184326171875|unsuper_loss: 0.0 +average reward score: -0.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 616|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.01983642578125|unsuper_loss: 0.0 +average reward score: -0.634765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.20%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 617|ppo_ep: 1|act_loss: -0.069580078125|cri_loss: -0.0295562744140625|unsuper_loss: 0.0 +average reward score: -0.221435546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.82%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 618|ppo_ep: 1|act_loss: 0.048736572265625|cri_loss: 0.02740478515625|unsuper_loss: 0.0 +average reward score: -2.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:01:01,606] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=9, lr=[3.3576459340197268e-06, 3.3576459340197268e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:01:01,625] [INFO] [timer.py:199:stop] epoch=0/micro_step=620/global_step=620, RunningAvgSamplesPerSec=129.19904141078314, CurrSamplesPerSec=128.60686758116876, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:01:01,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=9, lr=[1.7397129191812058e-06, 1.7397129191812058e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 619|ppo_ep: 1|act_loss: -0.07763671875|cri_loss: -0.032867431640625|unsuper_loss: 0.0 +average reward score: -0.50048828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 620|ppo_ep: 1|act_loss: 0.1051025390625|cri_loss: 0.05865478515625|unsuper_loss: 0.0 +average reward score: -0.99365234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 621|ppo_ep: 1|act_loss: -0.048065185546875|cri_loss: -0.0211639404296875|unsuper_loss: 0.0 +average reward score: 0.398681640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 622|ppo_ep: 1|act_loss: 0.07000732421875|cri_loss: 0.042724609375|unsuper_loss: 0.0 +average reward score: -1.185546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.57%) |Training time=0.42s (19.35%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.81 +epoch: 0|step: 623|ppo_ep: 1|act_loss: 0.011199951171875|cri_loss: 0.01171875|unsuper_loss: 0.0 +average reward score: -1.3115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.09%) |Training time=0.40s (18.00%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.81 +epoch: 0|step: 624|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0 +average reward score: -1.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.27%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 625|ppo_ep: 1|act_loss: -0.02935791015625|cri_loss: -0.0076904296875|unsuper_loss: 0.0 +average reward score: -0.83349609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.16%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 626|ppo_ep: 1|act_loss: 0.03070068359375|cri_loss: 0.0194854736328125|unsuper_loss: 0.0 +average reward score: 0.1630859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.29%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 627|ppo_ep: 1|act_loss: 0.02642822265625|cri_loss: 0.02081298828125|unsuper_loss: 0.0 +average reward score: -0.183349609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.33%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 628|ppo_ep: 1|act_loss: 0.00371551513671875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0 +average reward score: -0.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.10%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81 +[2023-04-22 00:01:23,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=9, lr=[3.1895875574702854e-06, 3.1895875574702854e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:01:23,324] [INFO] [timer.py:199:stop] epoch=0/micro_step=630/global_step=630, RunningAvgSamplesPerSec=129.2124712085752, CurrSamplesPerSec=132.39765740367173, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:01:23,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=9, lr=[1.6526360401400443e-06, 1.6526360401400443e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 629|ppo_ep: 1|act_loss: -0.08563232421875|cri_loss: -0.035675048828125|unsuper_loss: 0.0 +average reward score: -1.3447265625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.71%) |Training time=0.40s (17.54%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 630|ppo_ep: 1|act_loss: -0.03173828125|cri_loss: -0.0081024169921875|unsuper_loss: 0.0 +average reward score: -0.296630859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.25%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 631|ppo_ep: 1|act_loss: -0.0943603515625|cri_loss: -0.040283203125|unsuper_loss: 0.0 +average reward score: -0.37939453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 632|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: -0.0235595703125|unsuper_loss: 0.0 +average reward score: -0.5087890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.21%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 633|ppo_ep: 1|act_loss: -0.08282470703125|cri_loss: -0.031707763671875|unsuper_loss: 0.0 +average reward score: -0.88720703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.24%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 634|ppo_ep: 1|act_loss: -0.0310821533203125|cri_loss: -0.0131072998046875|unsuper_loss: 0.0 +average reward score: -0.9306640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 635|ppo_ep: 1|act_loss: -0.076904296875|cri_loss: -0.0321044921875|unsuper_loss: 0.0 +average reward score: -0.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 636|ppo_ep: 1|act_loss: -0.0421142578125|cri_loss: -0.01532745361328125|unsuper_loss: 0.0 +average reward score: -1.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.97%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 637|ppo_ep: 1|act_loss: 0.028472900390625|cri_loss: 0.020843505859375|unsuper_loss: 0.0 +average reward score: -0.171142578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.20%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 638|ppo_ep: 1|act_loss: -0.00150299072265625|cri_loss: 0.001399993896484375|unsuper_loss: 0.0 +average reward score: -0.461669921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:01:44,720] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=9, lr=[3.0237420836513046e-06, 3.0237420836513046e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:01:44,739] [INFO] [timer.py:199:stop] epoch=0/micro_step=640/global_step=640, RunningAvgSamplesPerSec=129.20022608214921, CurrSamplesPerSec=127.59636959272474, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:01:44,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=9, lr=[1.5667057428245102e-06, 1.5667057428245102e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 639|ppo_ep: 1|act_loss: 0.00522613525390625|cri_loss: 0.006351470947265625|unsuper_loss: 0.0 +average reward score: 0.156005859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.30%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 640|ppo_ep: 1|act_loss: 0.1121826171875|cri_loss: 0.060943603515625|unsuper_loss: 0.0 +average reward score: -0.46044921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.06%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 641|ppo_ep: 1|act_loss: 0.050201416015625|cri_loss: 0.03204345703125|unsuper_loss: 0.0 +average reward score: -0.8583984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.08%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 642|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.00608062744140625|unsuper_loss: 0.0 +average reward score: -1.376953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.35%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 643|ppo_ep: 1|act_loss: 0.06982421875|cri_loss: 0.036041259765625|unsuper_loss: 0.0 +average reward score: -0.19677734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 644|ppo_ep: 1|act_loss: -0.039306640625|cri_loss: -0.0154571533203125|unsuper_loss: 0.0 +average reward score: -0.955078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 645|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: 0.003551483154296875|unsuper_loss: 0.0 +average reward score: 0.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.19%) |Training time=0.46s (20.80%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.81 +epoch: 0|step: 646|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.0043487548828125|unsuper_loss: 0.0 +average reward score: -0.84423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 647|ppo_ep: 1|act_loss: 0.103759765625|cri_loss: 0.053558349609375|unsuper_loss: 0.0 +average reward score: -2.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.97%) |Training time=0.40s (18.89%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 648|ppo_ep: 1|act_loss: 0.043792724609375|cri_loss: 0.0270233154296875|unsuper_loss: 0.0 +average reward score: -1.533203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:02:06,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=9, lr=[2.860333920725707e-06, 2.860333920725707e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:02:06,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=650/global_step=650, RunningAvgSamplesPerSec=129.208791284019, CurrSamplesPerSec=131.42469047796234, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:02:06,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=9, lr=[1.482038300894149e-06, 1.482038300894149e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 649|ppo_ep: 1|act_loss: -0.0606689453125|cri_loss: -0.0254974365234375|unsuper_loss: 0.0 +average reward score: 0.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 650|ppo_ep: 1|act_loss: 0.0689697265625|cri_loss: 0.039520263671875|unsuper_loss: 0.0 +average reward score: 0.5341796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 651|ppo_ep: 1|act_loss: -0.0679931640625|cri_loss: -0.027801513671875|unsuper_loss: 0.0 +average reward score: -0.249755859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 652|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0246429443359375|unsuper_loss: 0.0 +average reward score: -0.398193359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.41s (19.16%) |Others=0.12 (5.47%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.81 +epoch: 0|step: 653|ppo_ep: 1|act_loss: -0.047332763671875|cri_loss: -0.0184478759765625|unsuper_loss: 0.0 +average reward score: -0.6884765625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.99%) |Training time=0.41s (18.10%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.81 +epoch: 0|step: 654|ppo_ep: 1|act_loss: -0.01456451416015625|cri_loss: -0.00463104248046875|unsuper_loss: 0.0 +average reward score: -1.439453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 655|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01381683349609375|unsuper_loss: 0.0 +average reward score: -0.607421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 656|ppo_ep: 1|act_loss: 0.0105743408203125|cri_loss: 0.01410675048828125|unsuper_loss: 0.0 +average reward score: -1.7607421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.06%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 657|ppo_ep: 1|act_loss: -0.0599365234375|cri_loss: -0.0264739990234375|unsuper_loss: 0.0 +average reward score: 0.05181884765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.03%) |Training time=0.40s (18.83%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 658|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.025360107421875|unsuper_loss: 0.0 +average reward score: -0.494384765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.97%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:02:27,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=9, lr=[2.6995841788920667e-06, 2.6995841788920667e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:02:27,771] [INFO] [timer.py:199:stop] epoch=0/micro_step=660/global_step=660, RunningAvgSamplesPerSec=129.23248095416145, CurrSamplesPerSec=129.5397872044721, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:02:27,864] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=9, lr=[1.3987482792186873e-06, 1.3987482792186873e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 659|ppo_ep: 1|act_loss: -0.1041259765625|cri_loss: -0.048553466796875|unsuper_loss: 0.0 +average reward score: -0.80615234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.41s (18.96%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.81 +epoch: 0|step: 660|ppo_ep: 1|act_loss: -0.0287628173828125|cri_loss: -0.0077362060546875|unsuper_loss: 0.0 +average reward score: -0.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.88%) |Training time=0.50s (22.27%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.81 +epoch: 0|step: 661|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.031097412109375|unsuper_loss: 0.0 +average reward score: 0.41943359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.86%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81 +epoch: 0|step: 662|ppo_ep: 1|act_loss: -0.1300048828125|cri_loss: -0.054443359375|unsuper_loss: 0.0 +average reward score: -0.875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 663|ppo_ep: 1|act_loss: -0.0911865234375|cri_loss: -0.0396728515625|unsuper_loss: 0.0 +average reward score: -0.47216796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 664|ppo_ep: 1|act_loss: -0.007312774658203125|cri_loss: -0.00016021728515625|unsuper_loss: 0.0 +average reward score: -0.41162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.09%) |Training time=0.40s (18.75%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81 +epoch: 0|step: 665|ppo_ep: 1|act_loss: 0.006931304931640625|cri_loss: 0.00670623779296875|unsuper_loss: 0.0 +average reward score: 0.76318359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.05%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 666|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.030670166015625|unsuper_loss: 0.0 +average reward score: -1.1728515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 667|ppo_ep: 1|act_loss: -0.04705810546875|cri_loss: -0.018035888671875|unsuper_loss: 0.0 +average reward score: 0.5361328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 668|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.0204620361328125|unsuper_loss: 0.0 +average reward score: -0.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:02:49,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=9, lr=[2.5417103711968625e-06, 2.5417103711968625e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:02:49,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=670/global_step=670, RunningAvgSamplesPerSec=129.22857543246127, CurrSamplesPerSec=129.82911444443477, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:02:49,400] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=9, lr=[1.316948378858478e-06, 1.316948378858478e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 669|ppo_ep: 1|act_loss: 0.0263671875|cri_loss: 0.016326904296875|unsuper_loss: 0.0 +average reward score: 0.2373046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.09%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 670|ppo_ep: 1|act_loss: 0.0963134765625|cri_loss: 0.05322265625|unsuper_loss: 0.0 +average reward score: -0.88916015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.42s (19.35%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 671|ppo_ep: 1|act_loss: 0.0141448974609375|cri_loss: 0.01035308837890625|unsuper_loss: 0.0 +average reward score: -1.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 672|ppo_ep: 1|act_loss: 0.0086212158203125|cri_loss: 0.0061492919921875|unsuper_loss: 0.0 +average reward score: -1.0087890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.82 +epoch: 0|step: 673|ppo_ep: 1|act_loss: 0.0135345458984375|cri_loss: 0.01085662841796875|unsuper_loss: 0.0 +average reward score: -0.50830078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.82 +epoch: 0|step: 674|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0066375732421875|unsuper_loss: 0.0 +average reward score: 0.37548828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.82 +epoch: 0|step: 675|ppo_ep: 1|act_loss: -0.01021575927734375|cri_loss: 0.0|unsuper_loss: 0.0 +average reward score: 0.1141357421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.19%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.82 +epoch: 0|step: 676|ppo_ep: 1|act_loss: -0.041412353515625|cri_loss: -0.0190277099609375|unsuper_loss: 0.0 +average reward score: -0.9560546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.63%) |Training time=0.41s (19.26%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.82 +epoch: 0|step: 677|ppo_ep: 1|act_loss: 0.1141357421875|cri_loss: 0.06134033203125|unsuper_loss: 0.0 +average reward score: -2.740234375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.80%) |Training time=0.40s (17.33%) |Others=0.11 (4.87%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81 +epoch: 0|step: 678|ppo_ep: 1|act_loss: 0.0186309814453125|cri_loss: 0.0108795166015625|unsuper_loss: 0.0 +average reward score: -1.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.41s (18.83%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81 +[2023-04-22 00:03:10,942] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=9, lr=[2.386926119214098e-06, 2.386926119214098e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:03:10,960] [INFO] [timer.py:199:stop] epoch=0/micro_step=680/global_step=680, RunningAvgSamplesPerSec=129.2317742094129, CurrSamplesPerSec=129.0109740964417, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:03:11,052] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=9, lr=[1.2367492845668901e-06, 1.2367492845668901e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 679|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.030242919921875|unsuper_loss: 0.0 +average reward score: 0.059844970703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.14%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 680|ppo_ep: 1|act_loss: 0.075439453125|cri_loss: 0.0390625|unsuper_loss: 0.0 +average reward score: -1.162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 681|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.016357421875|unsuper_loss: 0.0 +average reward score: -0.341552734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 682|ppo_ep: 1|act_loss: 0.05267333984375|cri_loss: 0.0274200439453125|unsuper_loss: 0.0 +average reward score: -1.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.41s (19.14%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81 +epoch: 0|step: 683|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.030517578125|unsuper_loss: 0.0 +average reward score: -1.4345703125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.13%) |Training time=0.40s (17.97%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.81 +epoch: 0|step: 684|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.01953125|unsuper_loss: 0.0 +average reward score: -1.2958984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 685|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: 0.002941131591796875|unsuper_loss: 0.0 +average reward score: -0.37841796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 686|ppo_ep: 1|act_loss: -0.1070556640625|cri_loss: -0.04705810546875|unsuper_loss: 0.0 +average reward score: -0.7021484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.64%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 687|ppo_ep: 1|act_loss: 0.1385498046875|cri_loss: 0.0823974609375|unsuper_loss: 0.0 +average reward score: 0.66650390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 688|ppo_ep: 1|act_loss: -0.055328369140625|cri_loss: -0.022857666015625|unsuper_loss: 0.0 +average reward score: -0.525390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:03:32,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=9, lr=[2.2354408639905283e-06, 2.2354408639905283e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:03:32,546] [INFO] [timer.py:199:stop] epoch=0/micro_step=690/global_step=690, RunningAvgSamplesPerSec=129.22223728355166, CurrSamplesPerSec=127.72447132669355, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:03:32,638] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=9, lr=[1.158259515020999e-06, 1.158259515020999e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 689|ppo_ep: 1|act_loss: 0.058563232421875|cri_loss: 0.03790283203125|unsuper_loss: 0.0 +average reward score: -0.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 690|ppo_ep: 1|act_loss: -0.01146697998046875|cri_loss: -0.001739501953125|unsuper_loss: 0.0 +average reward score: -0.029541015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.30%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 691|ppo_ep: 1|act_loss: 0.06390380859375|cri_loss: 0.039794921875|unsuper_loss: 0.0 +average reward score: -1.775390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.29%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 692|ppo_ep: 1|act_loss: 0.049163818359375|cri_loss: 0.02947998046875|unsuper_loss: 0.0 +average reward score: -0.57958984375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.61%) |Training time=0.41s (17.63%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.81 +epoch: 0|step: 693|ppo_ep: 1|act_loss: -0.039581298828125|cri_loss: -0.018524169921875|unsuper_loss: 0.0 +average reward score: -1.0068359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.19%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 694|ppo_ep: 1|act_loss: 0.01068878173828125|cri_loss: 0.00983428955078125|unsuper_loss: 0.0 +average reward score: -0.206298828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.42s (19.34%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 695|ppo_ep: 1|act_loss: 0.01065826416015625|cri_loss: 0.01073455810546875|unsuper_loss: 0.0 +average reward score: -0.57568359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.42s (19.35%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 696|ppo_ep: 1|act_loss: -0.0494384765625|cri_loss: -0.0189971923828125|unsuper_loss: 0.0 +average reward score: 0.024169921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.18%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 697|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.005123138427734375|unsuper_loss: 0.0 +average reward score: -0.35400390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 698|ppo_ep: 1|act_loss: 0.110107421875|cri_loss: 0.06536865234375|unsuper_loss: 0.0 +average reward score: -0.66943359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:03:54,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=9, lr=[2.087459582647641e-06, 2.087459582647641e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:03:54,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=700/global_step=700, RunningAvgSamplesPerSec=129.19963900934587, CurrSamplesPerSec=125.25439802307102, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:03:54,270] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=9, lr=[1.0815852759832337e-06, 1.0815852759832337e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 699|ppo_ep: 1|act_loss: 0.0067901611328125|cri_loss: 0.00766754150390625|unsuper_loss: 0.0 +average reward score: -1.095703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.42%) |Training time=0.42s (19.48%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 700|ppo_ep: 1|act_loss: 0.0031528472900390625|cri_loss: 0.00409698486328125|unsuper_loss: 0.0 +average reward score: -0.25732421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.42%) |Training time=0.42s (19.45%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 701|ppo_ep: 1|act_loss: -0.05828857421875|cri_loss: -0.0258941650390625|unsuper_loss: 0.0 +average reward score: 0.0322265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.39%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 702|ppo_ep: 1|act_loss: 0.029266357421875|cri_loss: 0.018768310546875|unsuper_loss: 0.0 +average reward score: -1.029296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 703|ppo_ep: 1|act_loss: -0.07574462890625|cri_loss: -0.0323486328125|unsuper_loss: 0.0 +average reward score: -1.5322265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.37%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 704|ppo_ep: 1|act_loss: 0.091064453125|cri_loss: 0.047698974609375|unsuper_loss: 0.0 +average reward score: -0.6513671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.51%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:04:07,023] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-22 00:04:07,108] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 705|ppo_ep: 1|act_loss: 0.007541656494140625|cri_loss: 0.006694793701171875|unsuper_loss: 0.0 +average reward score: -0.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.86%) |Training time=0.39s (18.34%) |Others=0.10 (4.79%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.82 +epoch: 0|step: 706|ppo_ep: 1|act_loss: -0.07000732421875|cri_loss: -0.02325439453125|unsuper_loss: 0.0 +average reward score: 0.00335693359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.41s (19.10%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.82 +[2023-04-22 00:04:11,406] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 707|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.01192474365234375|unsuper_loss: 0.0 +average reward score: 0.06884765625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.90%) |Training time=0.41s (19.34%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.82 +epoch: 0|step: 708|ppo_ep: 1|act_loss: -0.0141754150390625|cri_loss: -0.00247955322265625|unsuper_loss: 0.0 +average reward score: -0.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.33%) |Training time=0.41s (17.91%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81 +[2023-04-22 00:04:15,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[1.957437924380642e-06, 1.957437924380642e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:04:15,772] [INFO] [timer.py:199:stop] epoch=0/micro_step=710/global_step=710, RunningAvgSamplesPerSec=129.18221159566644, CurrSamplesPerSec=126.92775908472775, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:04:15,864] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=11, lr=[1.0216228722853735e-06, 1.0216228722853735e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 709|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.04412841796875|unsuper_loss: 0.0 +average reward score: -1.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.35%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 710|ppo_ep: 1|act_loss: 0.086669921875|cri_loss: 0.0509033203125|unsuper_loss: 0.0 +average reward score: -0.283447265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.48%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 711|ppo_ep: 1|act_loss: 0.01175689697265625|cri_loss: 0.01348876953125|unsuper_loss: 0.0 +average reward score: -0.767578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.08%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 712|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.00286865234375|unsuper_loss: 0.0 +average reward score: 0.04656982421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.43s (19.73%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.81 +epoch: 0|step: 713|ppo_ep: 1|act_loss: -0.06451416015625|cri_loss: -0.026153564453125|unsuper_loss: 0.0 +average reward score: -0.8388671875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.72%) |Training time=0.41s (18.39%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.81 +epoch: 0|step: 714|ppo_ep: 1|act_loss: 0.06732177734375|cri_loss: 0.03741455078125|unsuper_loss: 0.0 +average reward score: -0.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.28%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 715|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.0279388427734375|unsuper_loss: 0.0 +average reward score: -0.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.42s (19.41%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 716|ppo_ep: 1|act_loss: 0.09375|cri_loss: 0.052093505859375|unsuper_loss: 0.0 +average reward score: -2.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.36%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 717|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00740814208984375|unsuper_loss: 0.0 +average reward score: -0.42138671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.38%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 718|ppo_ep: 1|act_loss: -0.0660400390625|cri_loss: -0.0230865478515625|unsuper_loss: 0.0 +average reward score: -0.5458984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.28%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:04:37,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[1.8166617060316112e-06, 1.8166617060316112e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:04:37,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=720/global_step=720, RunningAvgSamplesPerSec=129.14318521309033, CurrSamplesPerSec=124.90552173273748, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:04:37,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=11, lr=[9.48476295431443e-07, 9.48476295431443e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 719|ppo_ep: 1|act_loss: -0.054290771484375|cri_loss: -0.024017333984375|unsuper_loss: 0.0 +average reward score: -0.84033203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.50%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 720|ppo_ep: 1|act_loss: 0.05023193359375|cri_loss: 0.03253173828125|unsuper_loss: 0.0 +average reward score: -0.529296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.42%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 721|ppo_ep: 1|act_loss: -0.054718017578125|cri_loss: -0.020660400390625|unsuper_loss: 0.0 +average reward score: -0.69482421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.41s (19.34%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 722|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.00696563720703125|unsuper_loss: 0.0 +average reward score: -1.119140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 723|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.0194854736328125|unsuper_loss: 0.0 +average reward score: 0.0269775390625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.44%) |Training time=0.41s (17.81%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81 +epoch: 0|step: 724|ppo_ep: 1|act_loss: 0.037017822265625|cri_loss: 0.019287109375|unsuper_loss: 0.0 +average reward score: -0.7197265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.33%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 725|ppo_ep: 1|act_loss: -0.0596923828125|cri_loss: -0.023834228515625|unsuper_loss: 0.0 +average reward score: -0.12841796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.38%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 726|ppo_ep: 1|act_loss: 0.05340576171875|cri_loss: 0.03375244140625|unsuper_loss: 0.0 +average reward score: 0.2296142578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.21%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 727|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.019775390625|unsuper_loss: 0.0 +average reward score: -0.9541015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 728|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.032928466796875|unsuper_loss: 0.0 +average reward score: -0.59716796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:04:58,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[1.6799561183392554e-06, 1.6799561183392554e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:04:58,987] [INFO] [timer.py:199:stop] epoch=0/micro_step=730/global_step=730, RunningAvgSamplesPerSec=129.10765431344663, CurrSamplesPerSec=126.06426925724891, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:04:59,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=11, lr=[8.774291101150409e-07, 8.774291101150409e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 729|ppo_ep: 1|act_loss: -0.039642333984375|cri_loss: -0.01251983642578125|unsuper_loss: 0.0 +average reward score: 0.05914306640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.43%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 730|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.0208740234375|unsuper_loss: 0.0 +average reward score: -0.266357421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.52%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 731|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: 0.00186920166015625|unsuper_loss: 0.0 +average reward score: -0.82177734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.42s (19.56%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 732|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.04595947265625|unsuper_loss: 0.0 +average reward score: -0.93408203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.24%) |Training time=0.42s (19.64%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 733|ppo_ep: 1|act_loss: -0.025909423828125|cri_loss: -0.01158905029296875|unsuper_loss: 0.0 +average reward score: -1.8427734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.54%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 734|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.023162841796875|unsuper_loss: 0.0 +average reward score: -0.4833984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.57%) |Training time=0.42s (19.34%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81 +epoch: 0|step: 735|ppo_ep: 1|act_loss: 0.119140625|cri_loss: 0.0655517578125|unsuper_loss: 0.0 +average reward score: -0.5087890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.42s (19.74%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 736|ppo_ep: 1|act_loss: 0.006641387939453125|cri_loss: 0.0052642822265625|unsuper_loss: 0.0 +average reward score: -0.966796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.13%) |Training time=0.42s (19.75%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 737|ppo_ep: 1|act_loss: -0.04205322265625|cri_loss: -0.0151519775390625|unsuper_loss: 0.0 +average reward score: -0.308837890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.42s (19.60%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 738|ppo_ep: 1|act_loss: 0.1021728515625|cri_loss: 0.0552978515625|unsuper_loss: 0.0 +average reward score: -0.8564453125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.30%) |Training time=0.41s (17.93%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.81 +[2023-04-22 00:05:20,620] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[1.5475061398205608e-06, 1.5475061398205608e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:05:20,638] [INFO] [timer.py:199:stop] epoch=0/micro_step=740/global_step=740, RunningAvgSamplesPerSec=129.03885715382538, CurrSamplesPerSec=123.61775292240925, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:05:20,731] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=11, lr=[8.085774514188454e-07, 8.085774514188454e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 739|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.0066375732421875|unsuper_loss: 0.0 +average reward score: 0.219970703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.25%) |Training time=0.42s (19.64%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 740|ppo_ep: 1|act_loss: -0.050323486328125|cri_loss: -0.01593017578125|unsuper_loss: 0.0 +average reward score: -0.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.24%) |Training time=0.42s (19.64%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 741|ppo_ep: 1|act_loss: -0.1131591796875|cri_loss: -0.0484619140625|unsuper_loss: 0.0 +average reward score: -0.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.42s (19.41%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.81 +epoch: 0|step: 742|ppo_ep: 1|act_loss: 0.0104217529296875|cri_loss: 0.00846099853515625|unsuper_loss: 0.0 +average reward score: -1.607421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.44s (20.14%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.81 +epoch: 0|step: 743|ppo_ep: 1|act_loss: -0.0821533203125|cri_loss: -0.036956787109375|unsuper_loss: 0.0 +average reward score: -1.2998046875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.94%) |Training time=0.41s (18.17%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.81 +epoch: 0|step: 744|ppo_ep: 1|act_loss: -0.048980712890625|cri_loss: -0.0201416015625|unsuper_loss: 0.0 +average reward score: -0.8017578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.42s (19.42%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 745|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.0076904296875|unsuper_loss: 0.0 +average reward score: -1.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.05%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 746|ppo_ep: 1|act_loss: -0.087158203125|cri_loss: -0.0394287109375|unsuper_loss: 0.0 +average reward score: -0.9384765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 747|ppo_ep: 1|act_loss: 0.04754638671875|cri_loss: 0.032012939453125|unsuper_loss: 0.0 +average reward score: -0.67919921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 748|ppo_ep: 1|act_loss: -0.0220489501953125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0 +average reward score: -1.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:05:42,230] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=10, lr=[1.4194909906596752e-06, 1.4194909906596752e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:05:42,248] [INFO] [timer.py:199:stop] epoch=0/micro_step=750/global_step=750, RunningAvgSamplesPerSec=129.0158345602834, CurrSamplesPerSec=132.17691641062004, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:05:42,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=11, lr=[7.420144836233564e-07, 7.420144836233564e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 749|ppo_ep: 1|act_loss: 0.0299072265625|cri_loss: 0.018341064453125|unsuper_loss: 0.0 +average reward score: -1.2841796875 +------------------------------------------------------------------------------------- +|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.63s (68.29%) |Training time=0.40s (16.95%) |Others=0.35 (14.75%)|CurSamplesPerSec=13.40 |AvgSamplesPerSec=14.81 +epoch: 0|step: 750|ppo_ep: 1|act_loss: -0.0872802734375|cri_loss: -0.0391845703125|unsuper_loss: 0.0 +average reward score: -0.34130859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 751|ppo_ep: 1|act_loss: 0.024627685546875|cri_loss: 0.019012451171875|unsuper_loss: 0.0 +average reward score: 0.54443359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 752|ppo_ep: 1|act_loss: 0.0195770263671875|cri_loss: 0.01739501953125|unsuper_loss: 0.0 +average reward score: -0.92041015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.04%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 753|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.0450439453125|unsuper_loss: 0.0 +average reward score: -0.34716796875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.87%) |Training time=0.40s (17.39%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.81 +epoch: 0|step: 754|ppo_ep: 1|act_loss: 0.07269287109375|cri_loss: 0.048675537109375|unsuper_loss: 0.0 +average reward score: -0.7353515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.98%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 755|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.021240234375|unsuper_loss: 0.0 +average reward score: -0.09405517578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.07%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 756|ppo_ep: 1|act_loss: -0.03216552734375|cri_loss: -0.003326416015625|unsuper_loss: 0.0 +average reward score: -1.314453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 757|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.00621795654296875|unsuper_loss: 0.0 +average reward score: -0.67822265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 758|ppo_ep: 1|act_loss: 0.05035400390625|cri_loss: 0.027923583984375|unsuper_loss: 0.0 +average reward score: -1.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.88%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:06:04,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=10, lr=[1.2960838902022257e-06, 1.2960838902022257e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:06:04,126] [INFO] [timer.py:199:stop] epoch=0/micro_step=760/global_step=760, RunningAvgSamplesPerSec=129.03406317774622, CurrSamplesPerSec=130.88857704291348, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:06:04,219] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=11, lr=[6.778302741448104e-07, 6.778302741448104e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 759|ppo_ep: 1|act_loss: 0.1380615234375|cri_loss: 0.08697509765625|unsuper_loss: 0.0 +average reward score: 0.20556640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.98%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 760|ppo_ep: 1|act_loss: -0.0143280029296875|cri_loss: 0.00555419921875|unsuper_loss: 0.0 +average reward score: 0.45263671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.94%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 761|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.02337646484375|unsuper_loss: 0.0 +average reward score: -0.7080078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 762|ppo_ep: 1|act_loss: -0.0660400390625|cri_loss: -0.02301025390625|unsuper_loss: 0.0 +average reward score: -0.9970703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.11%) |Training time=0.41s (18.79%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.81 +epoch: 0|step: 763|ppo_ep: 1|act_loss: 0.0482177734375|cri_loss: 0.0267181396484375|unsuper_loss: 0.0 +average reward score: -0.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 764|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.014190673828125|unsuper_loss: 0.0 +average reward score: -0.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 765|ppo_ep: 1|act_loss: 0.1265869140625|cri_loss: 0.0743408203125|unsuper_loss: 0.0 +average reward score: -0.548828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.02%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 766|ppo_ep: 1|act_loss: 0.1041259765625|cri_loss: 0.0673828125|unsuper_loss: 0.0 +average reward score: -1.3291015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.26%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 767|ppo_ep: 1|act_loss: 0.0036773681640625|cri_loss: 0.007312774658203125|unsuper_loss: 0.0 +average reward score: -1.1064453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 768|ppo_ep: 1|act_loss: 0.05328369140625|cri_loss: 0.034423828125|unsuper_loss: 0.0 +average reward score: -0.29150390625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.84%) |Training time=0.40s (17.41%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.81 +[2023-04-22 00:06:25,760] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=10, lr=[1.1774518225694339e-06, 1.1774518225694339e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:06:25,778] [INFO] [timer.py:199:stop] epoch=0/micro_step=770/global_step=770, RunningAvgSamplesPerSec=129.05144542838423, CurrSamplesPerSec=131.80426019163104, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:06:25,871] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=11, lr=[6.16111671663524e-07, 6.16111671663524e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 769|ppo_ep: 1|act_loss: -0.07861328125|cri_loss: -0.0296630859375|unsuper_loss: 0.0 +average reward score: -0.77685546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.41s (18.92%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 770|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.043121337890625|unsuper_loss: 0.0 +average reward score: -0.8037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 771|ppo_ep: 1|act_loss: 0.098388671875|cri_loss: 0.064208984375|unsuper_loss: 0.0 +average reward score: -0.7880859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.93%) |Training time=0.41s (18.97%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.81 +epoch: 0|step: 772|ppo_ep: 1|act_loss: -0.03338623046875|cri_loss: -0.004425048828125|unsuper_loss: 0.0 +average reward score: -0.031341552734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.02%) |Training time=0.41s (18.94%) |Others=0.11 (5.04%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.81 +epoch: 0|step: 773|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.01087188720703125|unsuper_loss: 0.0 +average reward score: -0.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.15%) |Training time=0.40s (17.97%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.81 +epoch: 0|step: 774|ppo_ep: 1|act_loss: -0.04620361328125|cri_loss: -0.00244140625|unsuper_loss: 0.0 +average reward score: -1.115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 775|ppo_ep: 1|act_loss: 0.02081298828125|cri_loss: 0.0173797607421875|unsuper_loss: 0.0 +average reward score: -0.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 776|ppo_ep: 1|act_loss: 0.1087646484375|cri_loss: 0.0655517578125|unsuper_loss: 0.0 +average reward score: 0.035125732421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 777|ppo_ep: 1|act_loss: 0.06744384765625|cri_loss: 0.0421142578125|unsuper_loss: 0.0 +average reward score: -0.5947265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 778|ppo_ep: 1|act_loss: 0.06231689453125|cri_loss: 0.03839111328125|unsuper_loss: 0.0 +average reward score: -0.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +[2023-04-22 00:06:47,377] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=10, lr=[1.0637553107092514e-06, 1.0637553107092514e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:06:47,395] [INFO] [timer.py:199:stop] epoch=0/micro_step=780/global_step=780, RunningAvgSamplesPerSec=129.06477217961265, CurrSamplesPerSec=129.224120318531, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:06:47,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=11, lr=[5.569421886075563e-07, 5.569421886075563e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 779|ppo_ep: 1|act_loss: 0.035858154296875|cri_loss: 0.02398681640625|unsuper_loss: 0.0 +average reward score: -0.007568359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.10%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 780|ppo_ep: 1|act_loss: -0.0109710693359375|cri_loss: 0.0048370361328125|unsuper_loss: 0.0 +average reward score: -0.6044921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 781|ppo_ep: 1|act_loss: -0.029937744140625|cri_loss: -0.0052337646484375|unsuper_loss: 0.0 +average reward score: -0.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.05%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 782|ppo_ep: 1|act_loss: 0.002567291259765625|cri_loss: 0.008209228515625|unsuper_loss: 0.0 +average reward score: 0.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.64%) |Training time=0.42s (18.90%) |Others=0.19 (8.46%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.81 +epoch: 0|step: 783|ppo_ep: 1|act_loss: -0.0092010498046875|cri_loss: 0.007904052734375|unsuper_loss: 0.0 +average reward score: -0.477783203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.41s (18.89%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:06:58,196] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 784|ppo_ep: 1|act_loss: -0.075927734375|cri_loss: -0.01995849609375|unsuper_loss: 0.0 +average reward score: -1.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.72%) |Training time=0.38s (18.10%) |Others=0.11 (5.18%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.81 +epoch: 0|step: 785|ppo_ep: 1|act_loss: 0.0209197998046875|cri_loss: 0.0208587646484375|unsuper_loss: 0.0 +average reward score: -0.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 786|ppo_ep: 1|act_loss: 0.06658935546875|cri_loss: 0.0540771484375|unsuper_loss: 0.0 +average reward score: -0.619140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.93%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 787|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.01529693603515625|unsuper_loss: 0.0 +average reward score: -1.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 788|ppo_ep: 1|act_loss: 0.0511474609375|cri_loss: 0.03466796875|unsuper_loss: 0.0 +average reward score: -0.470703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:07:08,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[9.657756441308542e-07, 9.657756441308542e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:07:08,932] [INFO] [timer.py:199:stop] epoch=0/micro_step=790/global_step=790, RunningAvgSamplesPerSec=129.08285400329538, CurrSamplesPerSec=129.59669852084232, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:07:09,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[5.004018881507016e-07, 5.004018881507016e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 789|ppo_ep: 1|act_loss: -0.00115966796875|cri_loss: 0.007965087890625|unsuper_loss: 0.0 +average reward score: 0.03558349609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 790|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.0225677490234375|unsuper_loss: 0.0 +average reward score: 0.43408203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.65%) |Training time=0.41s (18.88%) |Others=0.12 (5.47%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.81 +epoch: 0|step: 791|ppo_ep: 1|act_loss: 0.17724609375|cri_loss: 0.1102294921875|unsuper_loss: 0.0 +average reward score: 0.1558837890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.92%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 792|ppo_ep: 1|act_loss: 0.10076904296875|cri_loss: 0.065185546875|unsuper_loss: 0.0 +average reward score: -1.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:07:17,503] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 793|ppo_ep: 1|act_loss: -0.0789794921875|cri_loss: -0.017547607421875|unsuper_loss: 0.0 +average reward score: -0.6552734375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.82%) |Training time=0.38s (18.01%) |Others=0.11 (5.17%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.81 +epoch: 0|step: 794|ppo_ep: 1|act_loss: 0.01470947265625|cri_loss: 0.01580810546875|unsuper_loss: 0.0 +average reward score: -0.64208984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.98%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 795|ppo_ep: 1|act_loss: 0.08331298828125|cri_loss: 0.0516357421875|unsuper_loss: 0.0 +average reward score: -0.38134765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 796|ppo_ep: 1|act_loss: 0.1563720703125|cri_loss: 0.0919189453125|unsuper_loss: 0.0 +average reward score: 0.177001953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 797|ppo_ep: 1|act_loss: 0.157958984375|cri_loss: 0.09356689453125|unsuper_loss: 0.0 +average reward score: -0.78759765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.03%) |Training time=0.40s (18.83%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 798|ppo_ep: 1|act_loss: 0.0290374755859375|cri_loss: 0.021209716796875|unsuper_loss: 0.0 +average reward score: -0.311767578125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.13%) |Training time=0.40s (17.16%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.81 +[2023-04-22 00:07:30,540] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=12, lr=[8.720258705240782e-07, 8.720258705240782e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:07:30,558] [INFO] [timer.py:199:stop] epoch=0/micro_step=800/global_step=800, RunningAvgSamplesPerSec=129.1268465720084, CurrSamplesPerSec=130.86203289058668, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:07:30,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=11, lr=[4.4656727587773506e-07, 4.4656727587773506e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 799|ppo_ep: 1|act_loss: 0.1204833984375|cri_loss: 0.0723876953125|unsuper_loss: 0.0 +average reward score: 0.513671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 800|ppo_ep: 1|act_loss: -0.03778076171875|cri_loss: -0.01371002197265625|unsuper_loss: 0.0 +average reward score: -1.349609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.42s (19.29%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 801|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: 0.01058197021484375|unsuper_loss: 0.0 +average reward score: 0.189208984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.95%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 802|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: 0.0048980712890625|unsuper_loss: 0.0 +average reward score: -0.0274658203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.61%) |Training time=0.41s (18.74%) |Others=0.12 (5.65%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.81 +epoch: 0|step: 803|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: 0.00772857666015625|unsuper_loss: 0.0 +average reward score: -0.50634765625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.84%) |Training time=0.41s (18.23%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.81 +epoch: 0|step: 804|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: -0.0055389404296875|unsuper_loss: 0.0 +average reward score: -1.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 805|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.04962158203125|unsuper_loss: 0.0 +average reward score: 0.15283203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 806|ppo_ep: 1|act_loss: 0.03521728515625|cri_loss: 0.028228759765625|unsuper_loss: 0.0 +average reward score: -1.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 807|ppo_ep: 1|act_loss: 0.0712890625|cri_loss: 0.04302978515625|unsuper_loss: 0.0 +average reward score: 0.01416015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.19%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 808|ppo_ep: 1|act_loss: 0.130859375|cri_loss: 0.0811767578125|unsuper_loss: 0.0 +average reward score: -0.37646484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:07:52,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=12, lr=[7.729452588770968e-07, 7.729452588770968e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:07:52,166] [INFO] [timer.py:199:stop] epoch=0/micro_step=810/global_step=810, RunningAvgSamplesPerSec=129.13453205726572, CurrSamplesPerSec=132.02700392389087, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:07:52,259] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=11, lr=[3.9551119626347693e-07, 3.9551119626347693e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 809|ppo_ep: 1|act_loss: 0.0173797607421875|cri_loss: 0.0125274658203125|unsuper_loss: 0.0 +average reward score: -0.0780029296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.41s (18.88%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 810|ppo_ep: 1|act_loss: 0.1641845703125|cri_loss: 0.10888671875|unsuper_loss: 0.0 +average reward score: 0.4169921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.20%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 811|ppo_ep: 1|act_loss: -0.03912353515625|cri_loss: -0.0121917724609375|unsuper_loss: 0.0 +average reward score: 0.18603515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 812|ppo_ep: 1|act_loss: 0.04656982421875|cri_loss: 0.03363037109375|unsuper_loss: 0.0 +average reward score: 0.2425537109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 813|ppo_ep: 1|act_loss: 0.1826171875|cri_loss: 0.1153564453125|unsuper_loss: 0.0 +average reward score: -0.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 814|ppo_ep: 1|act_loss: -0.044952392578125|cri_loss: -0.014892578125|unsuper_loss: 0.0 +average reward score: -0.431640625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.87%) |Training time=0.40s (17.36%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.81 +epoch: 0|step: 815|ppo_ep: 1|act_loss: -0.0244140625|cri_loss: -0.0008392333984375|unsuper_loss: 0.0 +average reward score: -1.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 816|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.02813720703125|unsuper_loss: 0.0 +average reward score: -1.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 817|ppo_ep: 1|act_loss: 0.167724609375|cri_loss: 0.0970458984375|unsuper_loss: 0.0 +average reward score: -0.423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.42s (19.41%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.81 +epoch: 0|step: 818|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.01702880859375|unsuper_loss: 0.0 +average reward score: -0.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:08:13,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=12, lr=[6.793475473031601e-07, 6.793475473031601e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:08:13,802] [INFO] [timer.py:199:stop] epoch=0/micro_step=820/global_step=820, RunningAvgSamplesPerSec=129.15223569083219, CurrSamplesPerSec=129.81329278896325, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:08:13,895] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=11, lr=[3.473027341057728e-07, 3.473027341057728e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 819|ppo_ep: 1|act_loss: 0.0097503662109375|cri_loss: 0.01690673828125|unsuper_loss: 0.0 +average reward score: -1.5029296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.09%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 820|ppo_ep: 1|act_loss: -0.0237579345703125|cri_loss: 0.0008087158203125|unsuper_loss: 0.0 +average reward score: -0.583984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 821|ppo_ep: 1|act_loss: -0.08392333984375|cri_loss: -0.0306854248046875|unsuper_loss: 0.0 +average reward score: 0.0511474609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 822|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.06280517578125|unsuper_loss: 0.0 +average reward score: 0.37451171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 823|ppo_ep: 1|act_loss: 0.07537841796875|cri_loss: 0.0511474609375|unsuper_loss: 0.0 +average reward score: -0.4658203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.42s (19.34%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.81 +epoch: 0|step: 824|ppo_ep: 1|act_loss: -0.12249755859375|cri_loss: -0.0528564453125|unsuper_loss: 0.0 +average reward score: 0.42626953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.35%) |Training time=0.40s (18.53%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 825|ppo_ep: 1|act_loss: 0.03851318359375|cri_loss: 0.034942626953125|unsuper_loss: 0.0 +average reward score: -0.4189453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 826|ppo_ep: 1|act_loss: 0.00485992431640625|cri_loss: 0.0159912109375|unsuper_loss: 0.0 +average reward score: -1.5087890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 827|ppo_ep: 1|act_loss: -0.03228759765625|cri_loss: -0.0021820068359375|unsuper_loss: 0.0 +average reward score: -1.2861328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.05%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 828|ppo_ep: 1|act_loss: 0.060791015625|cri_loss: 0.051513671875|unsuper_loss: 0.0 +average reward score: -0.923828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:08:35,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=12, lr=[5.913593843626703e-07, 5.913593843626703e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:08:35,427] [INFO] [timer.py:199:stop] epoch=0/micro_step=830/global_step=830, RunningAvgSamplesPerSec=129.1667907504843, CurrSamplesPerSec=131.88378073237908, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:08:35,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=11, lr=[3.020071210457451e-07, 3.020071210457451e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 829|ppo_ep: 1|act_loss: -0.0938720703125|cri_loss: -0.020263671875|unsuper_loss: 0.0 +average reward score: -1.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.66%) |Training time=0.41s (17.59%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 830|ppo_ep: 1|act_loss: 0.071044921875|cri_loss: 0.046875|unsuper_loss: 0.0 +average reward score: -0.0699462890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.09%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 831|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.031890869140625|unsuper_loss: 0.0 +average reward score: -1.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.42s (19.34%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.81 +epoch: 0|step: 832|ppo_ep: 1|act_loss: -0.0081939697265625|cri_loss: 0.00579071044921875|unsuper_loss: 0.0 +average reward score: -0.0244140625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.88%) |Training time=0.42s (18.78%) |Others=0.16 (7.34%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.81 +epoch: 0|step: 833|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.047027587890625|unsuper_loss: 0.0 +average reward score: 0.08837890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 834|ppo_ep: 1|act_loss: 0.066162109375|cri_loss: 0.048828125|unsuper_loss: 0.0 +average reward score: -1.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 835|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.0400390625|unsuper_loss: 0.0 +average reward score: -1.193359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.42s (19.33%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 836|ppo_ep: 1|act_loss: 0.009918212890625|cri_loss: 0.01409912109375|unsuper_loss: 0.0 +average reward score: -0.8408203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 837|ppo_ep: 1|act_loss: -0.0616455078125|cri_loss: -0.023040771484375|unsuper_loss: 0.0 +average reward score: -1.115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 838|ppo_ep: 1|act_loss: 0.008087158203125|cri_loss: 0.025665283203125|unsuper_loss: 0.0 +average reward score: -1.2861328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:08:56,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=12, lr=[5.090998282460625e-07, 5.090998282460625e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:08:56,988] [INFO] [timer.py:199:stop] epoch=0/micro_step=840/global_step=840, RunningAvgSamplesPerSec=129.1591471189204, CurrSamplesPerSec=133.80120484930413, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:08:57,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=11, lr=[2.596856473018208e-07, 2.596856473018208e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 839|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0 +average reward score: -0.381591796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.14%) |Training time=0.40s (18.75%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 840|ppo_ep: 1|act_loss: 0.076416015625|cri_loss: 0.044891357421875|unsuper_loss: 0.0 +average reward score: 0.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.26%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 841|ppo_ep: 1|act_loss: 0.097900390625|cri_loss: 0.06298828125|unsuper_loss: 0.0 +average reward score: -0.48681640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 842|ppo_ep: 1|act_loss: 0.03302001953125|cri_loss: 0.0269775390625|unsuper_loss: 0.0 +average reward score: -0.4970703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.20%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 843|ppo_ep: 1|act_loss: -0.0022125244140625|cri_loss: 0.0120697021484375|unsuper_loss: 0.0 +average reward score: -0.336669921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 844|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.047119140625|unsuper_loss: 0.0 +average reward score: -0.3251953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.19%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 845|ppo_ep: 1|act_loss: 0.16259765625|cri_loss: 0.09173583984375|unsuper_loss: 0.0 +average reward score: -0.65576171875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.82%) |Training time=0.41s (17.46%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.81 +epoch: 0|step: 846|ppo_ep: 1|act_loss: 0.011627197265625|cri_loss: 0.01849365234375|unsuper_loss: 0.0 +average reward score: -0.126220703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.11%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 847|ppo_ep: 1|act_loss: -0.0119171142578125|cri_loss: 0.0048828125|unsuper_loss: 0.0 +average reward score: -0.80712890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 848|ppo_ep: 1|act_loss: -0.033416748046875|cri_loss: -0.0008544921875|unsuper_loss: 0.0 +average reward score: -0.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.01%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-22 00:09:18,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=12, lr=[4.326801856742557e-07, 4.326801856742557e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:09:18,620] [INFO] [timer.py:199:stop] epoch=0/micro_step=850/global_step=850, RunningAvgSamplesPerSec=129.16070369009825, CurrSamplesPerSec=130.51692071317785, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:09:18,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=11, lr=[2.203955787369519e-07, 2.203955787369519e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 849|ppo_ep: 1|act_loss: 0.022552490234375|cri_loss: 0.0177764892578125|unsuper_loss: 0.0 +average reward score: -0.142822265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 850|ppo_ep: 1|act_loss: 0.14013671875|cri_loss: 0.08056640625|unsuper_loss: 0.0 +average reward score: -0.61767578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 851|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: 0.00661468505859375|unsuper_loss: 0.0 +average reward score: 0.05120849609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 852|ppo_ep: 1|act_loss: 0.037384033203125|cri_loss: 0.03314208984375|unsuper_loss: 0.0 +average reward score: -0.572265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 853|ppo_ep: 1|act_loss: -0.075439453125|cri_loss: -0.0286407470703125|unsuper_loss: 0.0 +average reward score: -0.8759765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 854|ppo_ep: 1|act_loss: -0.169921875|cri_loss: -0.076171875|unsuper_loss: 0.0 +average reward score: 0.0458984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.15%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 855|ppo_ep: 1|act_loss: 0.03857421875|cri_loss: 0.034423828125|unsuper_loss: 0.0 +average reward score: 0.153564453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 856|ppo_ep: 1|act_loss: 0.09478759765625|cri_loss: 0.061431884765625|unsuper_loss: 0.0 +average reward score: -0.1632080078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 857|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.01202392578125|unsuper_loss: 0.0 +average reward score: 0.33740234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 858|ppo_ep: 1|act_loss: -0.04248046875|cri_loss: -0.0122222900390625|unsuper_loss: 0.0 +average reward score: 0.302490234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +[2023-04-22 00:09:40,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=12, lr=[3.6220386128776603e-07, 3.6220386128776603e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:09:40,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=860/global_step=860, RunningAvgSamplesPerSec=129.1763337152809, CurrSamplesPerSec=130.2929145354487, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:09:40,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=11, lr=[1.8419007937126254e-07, 1.8419007937126254e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 859|ppo_ep: 1|act_loss: 0.102783203125|cri_loss: 0.072265625|unsuper_loss: 0.0 +average reward score: -0.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (19.07%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 860|ppo_ep: 1|act_loss: 0.06964111328125|cri_loss: 0.040771484375|unsuper_loss: 0.0 +average reward score: 0.228515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 861|ppo_ep: 1|act_loss: -0.10516357421875|cri_loss: -0.0374755859375|unsuper_loss: 0.0 +average reward score: -0.98193359375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.50%) |Training time=0.41s (17.76%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.81 +epoch: 0|step: 862|ppo_ep: 1|act_loss: 0.1717529296875|cri_loss: 0.10546875|unsuper_loss: 0.0 +average reward score: -0.66259765625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.20%) |Training time=0.43s (19.01%) |Others=0.15 (6.79%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.81 +epoch: 0|step: 863|ppo_ep: 1|act_loss: -0.00164794921875|cri_loss: 0.0216827392578125|unsuper_loss: 0.0 +average reward score: -1.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 864|ppo_ep: 1|act_loss: 0.0867919921875|cri_loss: 0.052520751953125|unsuper_loss: 0.0 +average reward score: -0.98779296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 865|ppo_ep: 1|act_loss: -0.004730224609375|cri_loss: 0.00838470458984375|unsuper_loss: 0.0 +average reward score: -0.396240234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 866|ppo_ep: 1|act_loss: 0.14013671875|cri_loss: 0.08197021484375|unsuper_loss: 0.0 +average reward score: -0.88330078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 867|ppo_ep: 1|act_loss: 0.128173828125|cri_loss: 0.08990478515625|unsuper_loss: 0.0 +average reward score: -0.408935546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 868|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.020660400390625|unsuper_loss: 0.0 +average reward score: -0.28662109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.42s (19.38%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.81 +[2023-04-22 00:10:01,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=12, lr=[2.9776621772821655e-07, 2.9776621772821655e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:10:01,779] [INFO] [timer.py:199:stop] epoch=0/micro_step=870/global_step=870, RunningAvgSamplesPerSec=129.1611288792264, CurrSamplesPerSec=128.51771050373821, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:10:01,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=11, lr=[1.511181394449654e-07, 1.511181394449654e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 869|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.09844970703125|unsuper_loss: 0.0 +average reward score: -0.521484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 870|ppo_ep: 1|act_loss: 0.2822265625|cri_loss: 0.16162109375|unsuper_loss: 0.0 +average reward score: -0.126953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 871|ppo_ep: 1|act_loss: 0.01552581787109375|cri_loss: 0.01537322998046875|unsuper_loss: 0.0 +average reward score: -0.005615234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.26%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 872|ppo_ep: 1|act_loss: -0.024993896484375|cri_loss: -0.00213623046875|unsuper_loss: 0.0 +average reward score: -1.083984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 873|ppo_ep: 1|act_loss: 0.0127716064453125|cri_loss: 0.0162506103515625|unsuper_loss: 0.0 +average reward score: -0.2098388671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.99%) |Training time=0.41s (18.93%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81 +epoch: 0|step: 874|ppo_ep: 1|act_loss: 0.11865234375|cri_loss: 0.072265625|unsuper_loss: 0.0 +average reward score: 0.4326171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 875|ppo_ep: 1|act_loss: 0.08056640625|cri_loss: 0.05224609375|unsuper_loss: 0.0 +average reward score: 0.36376953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.98%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 876|ppo_ep: 1|act_loss: 0.048492431640625|cri_loss: 0.032745361328125|unsuper_loss: 0.0 +average reward score: -0.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.60%) |Training time=0.41s (17.66%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.81 +epoch: 0|step: 877|ppo_ep: 1|act_loss: -0.0577392578125|cri_loss: -0.0191802978515625|unsuper_loss: 0.0 +average reward score: 0.088134765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 878|ppo_ep: 1|act_loss: -0.00150299072265625|cri_loss: 0.004222869873046875|unsuper_loss: 0.0 +average reward score: 0.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:10:23,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=12, lr=[2.3945444660163493e-07, 2.3945444660163493e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:10:23,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=880/global_step=880, RunningAvgSamplesPerSec=129.1579314652433, CurrSamplesPerSec=129.09262182108125, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:10:23,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=11, lr=[1.212245091288794e-07, 1.212245091288794e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 879|ppo_ep: 1|act_loss: 0.032440185546875|cri_loss: 0.0249786376953125|unsuper_loss: 0.0 +average reward score: 0.188232421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 880|ppo_ep: 1|act_loss: -0.044830322265625|cri_loss: -0.00360107421875|unsuper_loss: 0.0 +average reward score: -0.5458984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.32%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 881|ppo_ep: 1|act_loss: 0.06884765625|cri_loss: 0.04541015625|unsuper_loss: 0.0 +average reward score: -0.12353515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 882|ppo_ep: 1|act_loss: 0.0084991455078125|cri_loss: 0.0186309814453125|unsuper_loss: 0.0 +average reward score: -0.8642578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.20%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 883|ppo_ep: 1|act_loss: 0.0063629150390625|cri_loss: 0.01184844970703125|unsuper_loss: 0.0 +average reward score: -0.37451171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.20%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 884|ppo_ep: 1|act_loss: -0.11187744140625|cri_loss: -0.04296875|unsuper_loss: 0.0 +average reward score: 0.63037109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 885|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.005706787109375|unsuper_loss: 0.0 +average reward score: -0.31201171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 886|ppo_ep: 1|act_loss: 0.0257110595703125|cri_loss: 0.023284912109375|unsuper_loss: 0.0 +average reward score: -0.473388671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 887|ppo_ep: 1|act_loss: 0.05303955078125|cri_loss: 0.03485107421875|unsuper_loss: 0.0 +average reward score: -0.199462890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 888|ppo_ep: 1|act_loss: 0.092041015625|cri_loss: 0.056671142578125|unsuper_loss: 0.0 +average reward score: 0.57763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.42s (19.34%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +[2023-04-22 00:10:44,882] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=12, lr=[1.8734745049808622e-07, 1.8734745049808622e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:10:44,900] [INFO] [timer.py:199:stop] epoch=0/micro_step=890/global_step=890, RunningAvgSamplesPerSec=129.1510880560782, CurrSamplesPerSec=128.60169210574225, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:10:44,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=11, lr=[9.45496379722624e-08, 9.45496379722624e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 889|ppo_ep: 1|act_loss: -0.0263671875|cri_loss: 0.00555419921875|unsuper_loss: 0.0 +average reward score: -0.40380859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 890|ppo_ep: 1|act_loss: 0.180419921875|cri_loss: 0.099853515625|unsuper_loss: 0.0 +average reward score: -0.42724609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 891|ppo_ep: 1|act_loss: 0.07855224609375|cri_loss: 0.04998779296875|unsuper_loss: 0.0 +average reward score: -0.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.39%) |Training time=0.42s (17.89%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.81 +epoch: 0|step: 892|ppo_ep: 1|act_loss: 0.00174713134765625|cri_loss: 0.005886077880859375|unsuper_loss: 0.0 +average reward score: 0.552734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.12%) |Training time=0.44s (19.79%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.81 +epoch: 0|step: 893|ppo_ep: 1|act_loss: -0.004913330078125|cri_loss: 0.0141754150390625|unsuper_loss: 0.0 +average reward score: 0.6435546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 894|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.00069427490234375|unsuper_loss: 0.0 +average reward score: 0.0802001953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 895|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.02679443359375|unsuper_loss: 0.0 +average reward score: -0.583984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.20%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 896|ppo_ep: 1|act_loss: 0.04742431640625|cri_loss: 0.0323486328125|unsuper_loss: 0.0 +average reward score: -0.271484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81 +epoch: 0|step: 897|ppo_ep: 1|act_loss: 0.006755828857421875|cri_loss: 0.0107879638671875|unsuper_loss: 0.0 +average reward score: -0.2408447265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 898|ppo_ep: 1|act_loss: -0.0697021484375|cri_loss: -0.013885498046875|unsuper_loss: 0.0 +average reward score: 0.20166015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:11:06,576] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-22 00:11:06,577] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=13, lr=[1.458146855771019e-07, 1.458146855771019e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:11:06,577] [INFO] [timer.py:199:stop] epoch=0/micro_step=900/global_step=900, RunningAvgSamplesPerSec=129.14527369687394, CurrSamplesPerSec=144.12734793459072, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:11:06,669] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=11, lr=[7.112962016987635e-08, 7.112962016987635e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 899|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.022064208984375|unsuper_loss: 0.0 +average reward score: -1.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.71%) |Training time=0.38s (18.13%) |Others=0.11 (5.16%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.81 +epoch: 0|step: 900|ppo_ep: 1|act_loss: 0.035003662109375|cri_loss: 0.036163330078125|unsuper_loss: 0.0 +average reward score: -0.07470703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 901|ppo_ep: 1|act_loss: 0.045989990234375|cri_loss: 0.037445068359375|unsuper_loss: 0.0 +average reward score: -0.1923828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.41s (19.13%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.81 +epoch: 0|step: 902|ppo_ep: 1|act_loss: 0.055419921875|cri_loss: 0.0379638671875|unsuper_loss: 0.0 +average reward score: -0.10693359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 903|ppo_ep: 1|act_loss: 0.180908203125|cri_loss: 0.1085205078125|unsuper_loss: 0.0 +average reward score: -0.662109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.24%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 904|ppo_ep: 1|act_loss: -0.04833984375|cri_loss: -0.013946533203125|unsuper_loss: 0.0 +average reward score: 0.10009765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.35%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 905|ppo_ep: 1|act_loss: -0.0296783447265625|cri_loss: -0.001434326171875|unsuper_loss: 0.0 +average reward score: -0.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 906|ppo_ep: 1|act_loss: -0.1195068359375|cri_loss: -0.0462646484375|unsuper_loss: 0.0 +average reward score: 0.04644775390625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.68%) |Training time=0.41s (17.58%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81 +epoch: 0|step: 907|ppo_ep: 1|act_loss: -0.059722900390625|cri_loss: -0.022705078125|unsuper_loss: 0.0 +average reward score: -0.9794921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.21%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 908|ppo_ep: 1|act_loss: -0.06658935546875|cri_loss: -0.022247314453125|unsuper_loss: 0.0 +average reward score: 0.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:11:28,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=13, lr=[1.0568399210089959e-07, 1.0568399210089959e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:11:28,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=910/global_step=910, RunningAvgSamplesPerSec=129.13780240885887, CurrSamplesPerSec=129.61697352473936, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:11:28,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=11, lr=[5.099614572235623e-08, 5.099614572235623e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 909|ppo_ep: 1|act_loss: -0.007720947265625|cri_loss: 0.00485992431640625|unsuper_loss: 0.0 +average reward score: -1.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +[2023-04-22 00:11:30,351] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 910|ppo_ep: 1|act_loss: -0.0655517578125|cri_loss: -0.0225067138671875|unsuper_loss: 0.0 +average reward score: -0.63720703125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.66%) |Training time=0.38s (18.16%) |Others=0.11 (5.18%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.81 +epoch: 0|step: 911|ppo_ep: 1|act_loss: -0.044464111328125|cri_loss: -0.009521484375|unsuper_loss: 0.0 +average reward score: 0.269287109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.09%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:11:34,728] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 912|ppo_ep: 1|act_loss: 0.08880615234375|cri_loss: 0.0516357421875|unsuper_loss: 0.0 +average reward score: -0.25634765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.45%) |Training time=0.40s (18.79%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.81 +epoch: 0|step: 913|ppo_ep: 1|act_loss: -0.008819580078125|cri_loss: 0.01165771484375|unsuper_loss: 0.0 +average reward score: -0.296630859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.40%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 914|ppo_ep: 1|act_loss: 0.11029052734375|cri_loss: 0.063232421875|unsuper_loss: 0.0 +average reward score: -0.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 915|ppo_ep: 1|act_loss: 0.11956787109375|cri_loss: 0.07684326171875|unsuper_loss: 0.0 +average reward score: -0.09716796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.04%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 916|ppo_ep: 1|act_loss: 0.0509033203125|cri_loss: 0.036712646484375|unsuper_loss: 0.0 +average reward score: -0.67822265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 917|ppo_ep: 1|act_loss: 0.015533447265625|cri_loss: 0.01422882080078125|unsuper_loss: 0.0 +average reward score: -0.1829833984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 918|ppo_ep: 1|act_loss: -0.02410888671875|cri_loss: 0.00177001953125|unsuper_loss: 0.0 +average reward score: -0.1407470703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.41s (19.23%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:11:49,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=14, lr=[7.502484219857415e-08, 7.502484219857415e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:11:49,662] [INFO] [timer.py:199:stop] epoch=0/micro_step=920/global_step=920, RunningAvgSamplesPerSec=129.15088417010656, CurrSamplesPerSec=128.13918927820959, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:11:49,755] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=12, lr=[3.5708622066224494e-08, 3.5708622066224494e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 919|ppo_ep: 1|act_loss: -0.07293701171875|cri_loss: -0.019866943359375|unsuper_loss: 0.0 +average reward score: 0.2802734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.23%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 920|ppo_ep: 1|act_loss: -0.0058746337890625|cri_loss: 0.0059814453125|unsuper_loss: 0.0 +average reward score: -0.5361328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 921|ppo_ep: 1|act_loss: 0.1065673828125|cri_loss: 0.06158447265625|unsuper_loss: 0.0 +average reward score: -0.45849609375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.16%) |Training time=0.42s (18.75%) |Others=0.12 (5.09%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.81 +epoch: 0|step: 922|ppo_ep: 1|act_loss: 0.068603515625|cri_loss: 0.05145263671875|unsuper_loss: 0.0 +average reward score: -0.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.50%) |Training time=0.42s (19.44%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.81 +epoch: 0|step: 923|ppo_ep: 1|act_loss: 0.06939697265625|cri_loss: 0.046966552734375|unsuper_loss: 0.0 +average reward score: -0.028076171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:12:00,630] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 924|ppo_ep: 1|act_loss: 0.0167083740234375|cri_loss: 0.019012451171875|unsuper_loss: 0.0 +average reward score: 0.87451171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.09%) |Training time=0.41s (19.17%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81 +epoch: 0|step: 925|ppo_ep: 1|act_loss: -0.1307373046875|cri_loss: -0.046844482421875|unsuper_loss: 0.0 +average reward score: -0.41357421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 926|ppo_ep: 1|act_loss: 0.15087890625|cri_loss: 0.085693359375|unsuper_loss: 0.0 +average reward score: -0.346923828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 927|ppo_ep: 1|act_loss: -0.0184173583984375|cri_loss: 0.0004119873046875|unsuper_loss: 0.0 +average reward score: -0.0367431640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.90%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 928|ppo_ep: 1|act_loss: -0.04833984375|cri_loss: -0.001373291015625|unsuper_loss: 0.0 +average reward score: -0.9365234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.16%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +[2023-04-22 00:12:11,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=14, lr=[4.7066422011146386e-08, 4.7066422011146386e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:12:11,279] [INFO] [timer.py:199:stop] epoch=0/micro_step=930/global_step=930, RunningAvgSamplesPerSec=129.16257215861955, CurrSamplesPerSec=131.13961886597778, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:12:11,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=13, lr=[2.312207866372762e-08, 2.312207866372762e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 929|ppo_ep: 1|act_loss: -0.0010528564453125|cri_loss: 0.0113983154296875|unsuper_loss: 0.0 +average reward score: -0.236328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.09%) |Training time=0.41s (18.84%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.81 +epoch: 0|step: 930|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.008270263671875|unsuper_loss: 0.0 +average reward score: -1.8974609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 931|ppo_ep: 1|act_loss: 0.043243408203125|cri_loss: 0.0360107421875|unsuper_loss: 0.0 +average reward score: 0.132568359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 932|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: 0.005218505859375|unsuper_loss: 0.0 +average reward score: 0.273681640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 933|ppo_ep: 1|act_loss: -0.030517578125|cri_loss: -0.0006256103515625|unsuper_loss: 0.0 +average reward score: -0.388427734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81 +epoch: 0|step: 934|ppo_ep: 1|act_loss: 0.05572509765625|cri_loss: 0.0394287109375|unsuper_loss: 0.0 +average reward score: 0.1448974609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +epoch: 0|step: 935|ppo_ep: 1|act_loss: -0.040863037109375|cri_loss: -0.01328277587890625|unsuper_loss: 0.0 +average reward score: 0.3115234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 936|ppo_ep: 1|act_loss: 0.136962890625|cri_loss: 0.0850830078125|unsuper_loss: 0.0 +average reward score: 0.4033203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 937|ppo_ep: 1|act_loss: 0.1923828125|cri_loss: 0.11328125|unsuper_loss: 0.0 +average reward score: -0.1240234375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.02%) |Training time=0.40s (17.27%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.81 +epoch: 0|step: 938|ppo_ep: 1|act_loss: -0.1480712890625|cri_loss: -0.05029296875|unsuper_loss: 0.0 +average reward score: 0.0418701171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:12:32,909] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=14, lr=[2.55731001596078e-08, 2.55731001596078e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:12:32,927] [INFO] [timer.py:199:stop] epoch=0/micro_step=940/global_step=940, RunningAvgSamplesPerSec=129.16296815177412, CurrSamplesPerSec=128.4635870106203, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:12:33,020] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=13, lr=[1.2321525434263737e-08, 1.2321525434263737e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 939|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.080322265625|unsuper_loss: 0.0 +average reward score: -0.37646484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 940|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.07830810546875|unsuper_loss: 0.0 +average reward score: 0.11474609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +epoch: 0|step: 941|ppo_ep: 1|act_loss: 0.080078125|cri_loss: 0.045806884765625|unsuper_loss: 0.0 +average reward score: 0.0250244140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81 +epoch: 0|step: 942|ppo_ep: 1|act_loss: 0.041839599609375|cri_loss: 0.029327392578125|unsuper_loss: 0.0 +average reward score: -1.208984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81 +[2023-04-22 00:12:41,583] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 943|ppo_ep: 1|act_loss: 0.0704345703125|cri_loss: 0.0430908203125|unsuper_loss: 0.0 +average reward score: -0.189697265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.01%) |Training time=0.41s (19.23%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81 +epoch: 0|step: 944|ppo_ep: 1|act_loss: 0.1177978515625|cri_loss: 0.0665283203125|unsuper_loss: 0.0 +average reward score: -0.91162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.19%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81 +epoch: 0|step: 945|ppo_ep: 1|act_loss: 0.030120849609375|cri_loss: 0.035247802734375|unsuper_loss: 0.0 +average reward score: -1.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81 +epoch: 0|step: 946|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0136260986328125|unsuper_loss: 0.0 +average reward score: -0.309814453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.88%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 947|ppo_ep: 1|act_loss: 0.08056640625|cri_loss: 0.049224853515625|unsuper_loss: 0.0 +average reward score: 0.128662109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 948|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.0458984375|unsuper_loss: 0.0 +average reward score: 0.0665283203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81 +[2023-04-22 00:12:54,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=14, lr=[1.0573959601489598e-08, 1.0573959601489598e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-22 00:12:54,366] [INFO] [timer.py:199:stop] epoch=0/micro_step=950/global_step=950, RunningAvgSamplesPerSec=129.16878047990338, CurrSamplesPerSec=129.97294169711395, MemAllocated=12.06GB, MaxMemAllocated=21.83GB +[2023-04-22 00:12:54,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=14, lr=[5.478735544813263e-09, 5.478735544813263e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 949|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.0377197265625|unsuper_loss: 0.0 +average reward score: -0.6123046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.07%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 950|ppo_ep: 1|act_loss: 0.00640869140625|cri_loss: 0.0272064208984375|unsuper_loss: 0.0 +average reward score: -0.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81 +epoch: 0|step: 951|ppo_ep: 1|act_loss: 0.0693359375|cri_loss: 0.04315185546875|unsuper_loss: 0.0 +average reward score: -0.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81 +epoch: 0|step: 952|ppo_ep: 1|act_loss: 0.00537872314453125|cri_loss: 0.00829315185546875|unsuper_loss: 0.0 +average reward score: 0.279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.84s (77.65%) |Training time=0.42s (17.66%) |Others=0.11 (4.70%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.81 +[2023-04-22 00:13:02,885] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 953|ppo_ep: 1|act_loss: 0.1746826171875|cri_loss: 0.09466552734375|unsuper_loss: 0.0 +average reward score: -2.0 +------------------------------------------------------------------------------------- +saving model ...saving model ... +saving model ...saving model ...saving model ...saving model ...saving model ... +saving model ... + + + + + +[2023-04-22 00:13:06,284] [INFO] [launch.py:460:main] Process 2910018 exits successfully. +[2023-04-22 00:13:07,285] [INFO] [launch.py:460:main] Process 2910014 exits successfully. +[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910016 exits successfully. +[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910013 exits successfully. +[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910017 exits successfully. +[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910015 exits successfully. +[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910012 exits successfully. +[2023-04-22 00:13:16,295] [INFO] [launch.py:460:main] Process 2910011 exits successfully.