diff --git a/llama-1B/16_GPUS/16_GPUS_summary_results.csv b/llama-1B/16_GPUS/16_GPUS_summary_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..6ad5380e17e9beafdababc92ad7933e9f18aa757 --- /dev/null +++ b/llama-1B/16_GPUS/16_GPUS_summary_results.csv @@ -0,0 +1,119 @@ +model,run_name,status,nnodes,dp,tp,pp,batch_accumulation_per_replica,micro_batch_size,tok/s/gpu,mfu,forward,backward +llama-1B,dp-1_tp-8_pp-2_mbz-1,,2,1,8,2,1024,1,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-2,,2,1,8,2,512,2,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-4,,2,1,8,2,256,4,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-8,,2,1,8,2,128,8,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-16,,2,1,8,2,64,16,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-32,,2,1,8,2,32,32,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-64,,2,1,8,2,16,64,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-128,,2,1,8,2,8,128,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-256,,2,1,8,2,4,256,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-512,,2,1,8,2,2,512,-1,-1,, +llama-1B,dp-1_tp-8_pp-2_mbz-1024,,2,1,8,2,1,1024,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-1,,2,1,1,16,1024,1,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-2,,2,1,1,16,512,2,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-4,,2,1,1,16,256,4,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-8,,2,1,1,16,128,8,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-16,,2,1,1,16,64,16,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-32,,2,1,1,16,32,32,-1,-1,, +llama-1B,dp-1_tp-1_pp-16_mbz-64,,2,1,1,16,16,64,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-1,,2,16,1,1,64,1,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-2,,2,16,1,1,32,2,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-4,,2,16,1,1,16,4,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-8,,2,16,1,1,8,8,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-16,,2,16,1,1,4,16,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-32,,2,16,1,1,2,32,-1,-1,, +llama-1B,dp-16_tp-1_pp-1_mbz-64,,2,16,1,1,1,64,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-1,,2,1,4,4,1024,1,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-2,,2,1,4,4,512,2,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-4,,2,1,4,4,256,4,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-8,,2,1,4,4,128,8,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-16,,2,1,4,4,64,16,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-32,,2,1,4,4,32,32,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-64,,2,1,4,4,16,64,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-128,,2,1,4,4,8,128,-1,-1,, +llama-1B,dp-1_tp-4_pp-4_mbz-256,,2,1,4,4,4,256,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-1,,2,8,2,1,128,1,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-2,,2,8,2,1,64,2,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-4,,2,8,2,1,32,4,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-8,,2,8,2,1,16,8,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-16,,2,8,2,1,8,16,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-32,,2,8,2,1,4,32,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-64,,2,8,2,1,2,64,-1,-1,, +llama-1B,dp-8_tp-2_pp-1_mbz-128,,2,8,2,1,1,128,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-1,,2,4,1,4,256,1,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-2,,2,4,1,4,128,2,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-4,,2,4,1,4,64,4,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-8,,2,4,1,4,32,8,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-16,,2,4,1,4,16,16,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-32,,2,4,1,4,8,32,-1,-1,, +llama-1B,dp-4_tp-1_pp-4_mbz-64,,2,4,1,4,4,64,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-1,,2,8,1,2,128,1,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-2,,2,8,1,2,64,2,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-4,,2,8,1,2,32,4,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-8,,2,8,1,2,16,8,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-16,,2,8,1,2,8,16,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-32,,2,8,1,2,4,32,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-64,,2,8,1,2,2,64,-1,-1,, +llama-1B,dp-8_tp-1_pp-2_mbz-128,,2,8,1,2,1,128,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-1,,2,4,4,1,256,1,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-2,,2,4,4,1,128,2,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-4,,2,4,4,1,64,4,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-8,,2,4,4,1,32,8,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-16,,2,4,4,1,16,16,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-32,,2,4,4,1,8,32,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-64,,2,4,4,1,4,64,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-128,,2,4,4,1,2,128,-1,-1,, +llama-1B,dp-4_tp-4_pp-1_mbz-256,,2,4,4,1,1,256,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-1,,2,2,2,4,512,1,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-2,,2,2,2,4,256,2,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-4,,2,2,2,4,128,4,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-8,,2,2,2,4,64,8,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-16,,2,2,2,4,32,16,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-32,,2,2,2,4,16,32,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-64,,2,2,2,4,8,64,-1,-1,, +llama-1B,dp-2_tp-2_pp-4_mbz-128,,2,2,2,4,4,128,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-1,,2,2,8,1,512,1,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-2,,2,2,8,1,256,2,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-4,,2,2,8,1,128,4,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-8,,2,2,8,1,64,8,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-16,,2,2,8,1,32,16,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-32,,2,2,8,1,16,32,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-64,,2,2,8,1,8,64,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-128,,2,2,8,1,4,128,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-256,,2,2,8,1,2,256,-1,-1,, +llama-1B,dp-2_tp-8_pp-1_mbz-512,,2,2,8,1,1,512,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-1,,2,1,2,8,1024,1,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-2,,2,1,2,8,512,2,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-4,,2,1,2,8,256,4,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-8,,2,1,2,8,128,8,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-16,,2,1,2,8,64,16,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-32,,2,1,2,8,32,32,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-64,,2,1,2,8,16,64,-1,-1,, +llama-1B,dp-1_tp-2_pp-8_mbz-128,,2,1,2,8,8,128,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-1,,2,2,1,8,512,1,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-2,,2,2,1,8,256,2,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-4,,2,2,1,8,128,4,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-8,,2,2,1,8,64,8,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-16,,2,2,1,8,32,16,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-32,,2,2,1,8,16,32,-1,-1,, +llama-1B,dp-2_tp-1_pp-8_mbz-64,,2,2,1,8,8,64,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-1,,2,2,4,2,512,1,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-2,,2,2,4,2,256,2,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-4,,2,2,4,2,128,4,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-8,,2,2,4,2,64,8,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-16,,2,2,4,2,32,16,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-32,,2,2,4,2,16,32,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-64,,2,2,4,2,8,64,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-128,,2,2,4,2,4,128,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-256,,2,2,4,2,2,256,-1,-1,, +llama-1B,dp-2_tp-4_pp-2_mbz-512,,2,2,4,2,1,512,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-1,,2,4,2,2,256,1,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-2,,2,4,2,2,128,2,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-4,,2,4,2,2,64,4,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-8,,2,4,2,2,32,8,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-16,,2,4,2,2,16,16,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-32,,2,4,2,2,8,32,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-64,,2,4,2,2,4,64,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-128,,2,4,2,2,2,128,-1,-1,, +llama-1B,dp-4_tp-2_pp-2_mbz-256,,2,4,2,2,1,256,-1,-1,, diff --git a/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-1/log_metrics.csv b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-1/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..8fb8b476db0cfb1ef1ad75c8f4c03c64dfa264e8 --- /dev/null +++ b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-1/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,21800.0,192000.0,12000.0,1020.0,11.3,0.0001,109.0,109.0,33.1,7256.42,15785.37,18632.0 +2,8390000.0,10800.0,387000.0,24200.0,1020.0,11.3,9.53e-05,220.0,220.0,33.3,7256.42,15785.37,18632.0 +3,12600000.0,11000.0,382000.0,23900.0,1020.0,16.0,9.05e-05,216.0,216.0,249.0,7256.42,15785.37,18632.0 +4,16800000.0,13300.0,316000.0,19700.0,1020.0,15.1,8.58e-05,179.0,179.0,41.6,7256.39,11621.72,18632.0 +5,21000000.0,13300.0,314000.0,19600.0,1020.0,10.8,8.11e-05,178.0,178.0,26.0,7256.39,15785.37,18632.0 +6,25200000.0,13200.0,319000.0,19900.0,1020.0,10.8,7.63e-05,181.0,181.0,18.9,7256.39,15785.37,18632.0 +7,29400000.0,10800.0,388000.0,24200.0,1020.0,10.2,7.16e-05,220.0,220.0,7.97,7256.39,15785.37,18632.0 +8,33600000.0,10800.0,388000.0,24200.0,1020.0,9.16,6.68e-05,220.0,220.0,6.46,7256.39,15785.37,18632.0 +9,37700000.0,11000.0,383000.0,23900.0,1020.0,11.2,6.21e-05,217.0,217.0,59.7,7256.39,15785.37,18632.0 +10,41900000.0,10800.0,387000.0,24200.0,1020.0,9.59,5.74e-05,219.0,219.0,44.0,7256.39,15785.37,18632.0 +11,46100000.0,10900.0,386000.0,24100.0,1020.0,8.08,5.26e-05,219.0,219.0,8.41,7256.39,15785.37,18632.0 +12,50300000.0,10900.0,384000.0,24000.0,1020.0,7.86,4.79e-05,218.0,218.0,5.09,7256.39,15785.37,18632.0 +13,54500000.0,11000.0,382000.0,23900.0,1020.0,7.7,4.32e-05,217.0,217.0,4.71,7256.39,15785.37,18632.0 +14,58700000.0,11000.0,381000.0,23800.0,1020.0,7.56,3.84e-05,216.0,216.0,5.14,7256.39,15785.37,18632.0 +15,62900000.0,11000.0,381000.0,23800.0,1020.0,7.4,3.37e-05,216.0,216.0,5.16,7256.39,15785.37,18632.0 +16,67099999.99999999,10900.0,386000.0,24100.0,1020.0,7.29,2.89e-05,219.0,219.0,5.26,7256.39,15785.37,18632.0 +17,71300000.0,11000.0,380000.0,23700.0,1020.0,7.22,2.42e-05,215.0,215.0,5.18,7256.39,15785.37,18632.0 +18,75500000.0,11100.0,378000.0,23600.0,1020.0,7.15,1.95e-05,214.0,214.0,5.04,7256.39,15785.37,18632.0 +19,79700000.0,11000.0,382000.0,23900.0,1020.0,7.08,1.47e-05,217.0,217.0,3.85,7256.39,15785.37,18632.0 +20,83900000.0,10800.0,388000.0,24200.0,1020.0,7.03,1e-05,220.0,220.0,2.9,,, diff --git a/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-1/profiler.csv b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-1/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..63e8f2ac814d6322a214a957b284f294e2ebf150 --- /dev/null +++ b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-1/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 959μs,1ms 7μs diff --git a/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..9e75a6e281a52b5675a99a79b8685ad583f3bf39 --- /dev/null +++ b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,18900.0,222000.0,13900.0,1020.0,11.3,0.0001,126.0,126.0,33.1,7252.46,25075.44,27568.0 +2,8390000.0,9350.0,449000.0,28000.0,1020.0,11.3,9.53e-05,254.0,254.0,33.3,7252.46,25075.44,27568.0 +3,12600000.0,9080.0,462000.0,28900.0,1020.0,16.0,9.05e-05,262.0,262.0,249.0,7252.46,25075.44,27568.0 +4,16800000.0,9640.0,435000.0,27200.0,1020.0,15.1,8.58e-05,247.0,247.0,41.6,7252.45,11617.76,27568.0 +5,21000000.0,9500.0,441000.0,27600.0,1020.0,10.8,8.11e-05,250.0,250.0,25.9,7252.45,25075.44,27568.0 +6,25200000.0,9730.0,431000.0,26900.0,1020.0,10.8,7.63e-05,244.0,244.0,18.9,7252.45,25075.44,27568.0 +7,29400000.0,8900.0,471000.0,29500.0,1020.0,10.2,7.16e-05,267.0,267.0,7.97,7252.45,25075.44,27568.0 +8,33600000.0,8860.0,473000.0,29600.0,1020.0,9.15,6.68e-05,268.0,268.0,6.46,7252.45,25075.44,27568.0 +9,37700000.0,9080.0,462000.0,28900.0,1020.0,11.2,6.21e-05,262.0,262.0,59.7,7252.45,25075.44,27568.0 +10,41900000.0,9130.0,459000.0,28700.0,1020.0,9.6,5.74e-05,260.0,260.0,44.2,7252.45,25075.44,27568.0 +11,46100000.0,9390.0,447000.0,27900.0,1020.0,8.08,5.26e-05,253.0,253.0,8.69,7252.45,25075.44,27568.0 +12,50300000.0,8910.0,471000.0,29400.0,1020.0,7.86,4.79e-05,267.0,267.0,5.1,7252.45,25075.44,27568.0 +13,54500000.0,9060.0,463000.0,28900.0,1020.0,7.7,4.32e-05,263.0,263.0,4.73,7252.45,25075.44,27568.0 +14,58700000.0,9030.0,464000.0,29000.0,1020.0,7.56,3.84e-05,263.0,263.0,5.09,7252.45,25075.44,27568.0 +15,62900000.0,9030.0,464000.0,29000.0,1020.0,7.4,3.37e-05,263.0,263.0,5.16,7252.45,25075.44,27568.0 +16,67099999.99999999,9270.0,453000.0,28300.0,1020.0,7.3,2.89e-05,257.0,257.0,5.15,7252.45,25075.44,27568.0 +17,71300000.0,9240.0,454000.0,28400.0,1020.0,7.22,2.42e-05,258.0,258.0,5.14,7252.45,25075.44,27568.0 +18,75500000.0,9120.0,460000.0,28800.0,1020.0,7.15,1.95e-05,261.0,261.0,5.04,7252.45,25075.44,27568.0 +19,79700000.0,9110.0,460000.0,28800.0,1020.0,7.08,1.47e-05,261.0,261.0,3.86,7252.45,25075.44,27568.0 +20,83900000.0,8990.0,467000.0,29200.0,1020.0,7.03,1e-05,265.0,265.0,2.94,,, diff --git a/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..88e84dd4c43bcaf01569a6a7038788a9e2f6e5a8 --- /dev/null +++ b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 881μs,1ms 507μs diff --git a/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..f5fde0ee49b229ee566a0a2d361dcbc7b9de98ef --- /dev/null +++ b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,20500.0,205000.0,12800.0,1020.0,11.3,0.0001,116.0,116.0,33.1,7252.59,42900.39,44042.0 +2,8390000.0,8800.0,476000.0,29800.0,1020.0,11.3,9.53e-05,270.0,270.0,33.3,7252.59,42900.39,44042.0 +3,12600000.0,8770.0,478000.0,29900.0,1020.0,16.0,9.05e-05,271.0,271.0,249.0,7252.59,42900.39,44042.0 +4,16800000.0,8870.0,473000.0,29600.0,1020.0,15.1,8.58e-05,268.0,268.0,41.6,7252.58,11617.89,44042.0 +5,21000000.0,8870.0,473000.0,29600.0,1020.0,10.8,8.11e-05,268.0,268.0,26.0,7252.58,42900.39,44042.0 +6,25200000.0,8820.0,475000.0,29700.0,1020.0,10.8,7.63e-05,270.0,270.0,18.9,7252.58,42900.39,44042.0 +7,29400000.0,8500.0,494000.0,30800.0,1020.0,10.2,7.16e-05,280.0,280.0,7.97,7252.58,42900.39,44042.0 +8,33600000.0,8580.0,489000.0,30500.0,1020.0,9.15,6.68e-05,277.0,277.0,6.46,7252.58,42900.39,44042.0 +9,37700000.0,8650.0,485000.0,30300.0,1020.0,11.2,6.21e-05,275.0,275.0,59.8,7252.58,42900.39,44042.0 +10,41900000.0,8790.0,477000.0,29800.0,1020.0,9.6,5.74e-05,271.0,271.0,44.2,7252.58,42900.39,44042.0 +11,46100000.0,8600.0,488000.0,30500.0,1020.0,8.08,5.26e-05,277.0,277.0,8.6,7252.58,42900.39,44042.0 +12,50300000.0,8660.0,484000.0,30300.0,1020.0,7.86,4.79e-05,275.0,275.0,5.09,7252.58,42900.39,44042.0 +13,54500000.0,8640.0,485000.0,30300.0,1020.0,7.7,4.32e-05,275.0,275.0,4.73,7252.58,42900.39,44042.0 +14,58700000.0,8820.0,476000.0,29700.0,1020.0,7.56,3.84e-05,270.0,270.0,5.1,7252.58,42900.39,44042.0 +15,62900000.0,8680.0,483000.0,30200.0,1020.0,7.4,3.37e-05,274.0,274.0,5.17,7252.58,42900.39,44042.0 +16,67099999.99999999,8660.0,485000.0,30300.0,1020.0,7.3,2.89e-05,275.0,275.0,5.17,7252.58,42900.39,44042.0 +17,71300000.0,8730.0,480000.0,30000.0,1020.0,7.22,2.42e-05,272.0,272.0,5.13,7252.58,42900.39,44042.0 +18,75500000.0,8630.0,486000.0,30400.0,1020.0,7.15,1.95e-05,276.0,276.0,5.04,7252.58,42900.39,44042.0 +19,79700000.0,8710.0,481000.0,30100.0,1020.0,7.08,1.47e-05,273.0,273.0,3.87,7252.58,42900.39,44042.0 +20,83900000.0,8790.0,477000.0,29800.0,1020.0,7.03,1e-05,271.0,271.0,2.93,,, diff --git a/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..e386c8773f73cf8c93ad89bd3a9fcbcfda31a27c --- /dev/null +++ b/llama-1B/16_GPUS/dp-16_tp-1_pp-1_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 971μs,1ms 187μs diff --git a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..23dbb5db86126ae2de5cea60045e6e04562bec0c --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,105000.0,40000.0,2500.0,1020.0,11.1,0.0001,22.7,22.7,25.6,3274.04,3274.05,13040.0 +2,8390000.0,50800.0,82500.0,5160.0,1020.0,11.1,9.53e-05,46.8,46.8,25.9,3274.04,12603.56,13042.0 +3,12600000.0,50900.0,82300.0,5150.0,1020.0,9.9,9.05e-05,46.7,46.7,40.4,3274.04,3274.05,13042.0 +4,16800000.0,47800.0,87700.0,5480.0,1020.0,11.9,8.58e-05,49.7,49.7,61.2,3274.04,12603.56,13042.0 +5,21000000.0,46600.0,90100.0,5630.0,1020.0,9.05,8.11e-05,51.1,51.1,8.31,,, +6,25200000.0,51500.0,81500.0,5090.0,1020.0,8.86,7.63e-05,46.2,46.2,6.63,3274.04,12603.56,13042.0 +7,29400000.0,534000.0,7850.0,491.0,1020.0,8.37,7.16e-05,4.45,4.45,4.93,3274.04,12603.56,13042.0 +8,33600000.0,53800.0,78000.0,4880.0,1020.0,7.97,6.68e-05,44.2,44.2,3.13,3274.04,12603.56,13042.0 +9,37700000.0,47500.0,88400.0,5520.0,1020.0,7.83,6.21e-05,50.1,50.1,9.04,3274.04,12603.56,13042.0 +10,41900000.0,48000.0,87300.0,5460.0,1020.0,7.62,5.74e-05,49.5,49.5,5.09,3274.04,12603.56,13042.0 +11,46100000.0,47900.0,87600.0,5470.0,1020.0,7.47,5.26e-05,49.7,49.7,4.06,,, +12,50300000.0,45700.0,91700.0,5730.0,1020.0,7.34,4.79e-05,52.0,52.0,3.12,3274.04,12603.56,13042.0 +13,54500000.0,48000.0,87400.0,5460.0,1020.0,7.23,4.32e-05,49.6,49.6,2.73,3274.04,12603.56,13042.0 +14,58700000.0,44700.0,93800.0,5860.0,1020.0,7.14,3.84e-05,53.2,53.2,2.33,3274.04,12603.56,13042.0 +15,62900000.0,46200.0,90800.0,5680.0,1020.0,7.06,3.37e-05,51.5,51.5,2.48,3274.04,12603.56,13042.0 +16,67099999.99999999,47000.0,89200.0,5580.0,1020.0,6.98,2.89e-05,50.6,50.6,2.66,3274.04,12603.56,13042.0 +17,71300000.0,46000.0,91100.0,5690.0,1020.0,6.9,2.42e-05,51.7,51.7,1.89,,, +18,75500000.0,43500.0,96400.0,6030.0,1020.0,6.84,1.95e-05,54.7,54.7,1.61,3274.04,12603.56,13042.0 +19,79700000.0,47500.0,88200.0,5510.0,1020.0,6.8,1.47e-05,50.0,50.0,1.85,,, +20,83900000.0,44700.0,93900.0,5870.0,1020.0,6.76,1e-05,53.2,53.2,1.81,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/profiler.csv index 556fe11ff4f1996566bcfb772d5924be91e5ddf4..80fcbf67330ee7442b81338693ce2cea8d3745c9 100644 --- a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/profiler.csv +++ b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-1/profiler.csv @@ -1,2 +1,2 @@ forward,backward -0ms 619μs,1ms 421μs +0ms 944μs,1ms 86μs diff --git a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..707f54abf6aae014be2354152e31023b96701f19 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,90800.0,46200.0,2890.0,1020.0,11.1,0.0001,26.2,26.2,25.6,3274.08,3274.08,23444.0 +2,8390000.0,31700.0,132000.0,8270.0,1020.0,11.1,9.53e-05,75.1,75.1,25.9,3274.08,22941.07,23444.0 +3,12600000.0,31900.0,131000.0,8210.0,1020.0,9.9,9.05e-05,74.5,74.5,40.4,3274.08,22941.07,23444.0 +4,16800000.0,30400.0,138000.0,8630.0,1020.0,11.9,8.58e-05,78.3,78.3,61.2,3274.08,3274.08,23444.0 +5,21000000.0,30100.0,139000.0,8700.0,1020.0,9.05,8.11e-05,78.9,78.9,8.32,3274.08,22941.07,23444.0 +6,25200000.0,32900.0,128000.0,7970.0,1020.0,8.86,7.63e-05,72.3,72.3,6.61,3274.08,22941.07,23444.0 +7,29400000.0,272000.0,15400.0,965.0,1020.0,8.37,7.16e-05,8.75,8.75,4.93,3274.08,22941.07,23444.0 +8,33600000.0,32000.0,131000.0,8189.999999999999,1020.0,7.97,6.68e-05,74.3,74.3,3.12,3274.08,22941.07,23444.0 +9,37700000.0,32600.0,129000.0,8039.999999999999,1020.0,7.83,6.21e-05,73.0,73.0,9.04,3274.08,22941.07,23444.0 +10,41900000.0,31100.0,135000.0,8430.0,1020.0,7.62,5.74e-05,76.5,76.5,5.08,,, +11,46100000.0,31100.0,135000.0,8440.0,1020.0,7.47,5.26e-05,76.5,76.5,4.05,3274.08,22941.07,23444.0 +12,50300000.0,32400.0,130000.0,8090.0,1020.0,7.34,4.79e-05,73.4,73.4,3.13,3274.08,22941.07,23444.0 +13,54500000.0,32000.0,131000.0,8180.0,1020.0,7.23,4.32e-05,74.2,74.2,2.74,,, +14,58700000.0,32100.0,131000.0,8160.0,1020.0,7.14,3.84e-05,74.0,74.0,2.32,3274.08,22941.07,23444.0 +15,62900000.0,30000.0,140000.0,8740.0,1020.0,7.06,3.37e-05,79.3,79.3,2.47,3274.08,22941.07,23444.0 +16,67099999.99999999,34000.0,123000.0,7700.0,1020.0,6.98,2.89e-05,69.9,69.9,2.66,,, +17,71300000.0,32900.0,128000.0,7970.0,1020.0,6.9,2.42e-05,72.4,72.4,1.88,3274.08,22941.07,23444.0 +18,75500000.0,31500.0,133000.0,8340.0,1020.0,6.84,1.95e-05,75.6,75.6,1.61,3274.08,22941.07,23444.0 +19,79700000.0,31900.0,132000.0,8230.0,1020.0,6.8,1.47e-05,74.7,74.7,1.83,3274.08,22941.07,23444.0 +20,83900000.0,31600.0,133000.0,8290.0,1020.0,6.77,1e-05,75.2,75.2,1.82,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..e988db749e7f6a97016f576b8801e86c04a9da2d --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 956μs,1ms 174μs diff --git a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..8fed3af89a10a7307778ee83184544253ae530be --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,81400.0,51500.0,3220.0,1020.0,11.1,0.0001,29.2,29.2,25.6,3274.15,3274.15,43028.0 +2,8390000.0,30800.0,136000.0,8500.0,1020.0,11.1,9.53e-05,77.1,77.1,25.9,3274.15,3274.15,43028.0 +3,12600000.0,28800.0,145000.0,9090.0,1020.0,9.9,9.05e-05,82.5,82.5,40.4,3274.15,3274.15,43028.0 +4,16800000.0,30000.0,140000.0,8750.0,1020.0,11.9,8.58e-05,79.4,79.4,61.2,3274.15,42592.08,43028.0 +5,21000000.0,31800.0,132000.0,8250.0,1020.0,9.05,8.11e-05,74.9,74.9,8.31,,, +6,25200000.0,30600.0,137000.0,8550.0,1020.0,8.85,7.63e-05,77.6,77.6,6.61,3274.15,42592.08,43028.0 +7,29400000.0,146000.0,28700.0,1790.0,1020.0,8.37,7.16e-05,16.3,16.3,4.93,,, +8,33600000.0,29800.0,141000.0,8810.0,1020.0,7.97,6.68e-05,79.9,79.9,3.12,3274.15,42592.08,43028.0 +9,37700000.0,31500.0,133000.0,8320.0,1020.0,7.83,6.21e-05,75.5,75.5,9.04,,, +10,41900000.0,31400.0,134000.0,8350.0,1020.0,7.62,5.74e-05,75.8,75.8,5.09,3274.15,42592.08,43028.0 +11,46100000.0,30300.0,138000.0,8650.0,1020.0,7.47,5.26e-05,78.5,78.5,4.06,3274.15,42592.08,43028.0 +12,50300000.0,32000.0,131000.0,8180.0,1020.0,7.34,4.79e-05,74.2,74.2,3.13,,, +13,54500000.0,32400.0,129000.0,8080.0,1020.0,7.23,4.32e-05,73.4,73.4,2.73,3274.15,42592.08,43028.0 +14,58700000.0,30900.0,136000.0,8480.0,1020.0,7.14,3.84e-05,77.0,77.0,2.33,3274.15,42592.08,43028.0 +15,62900000.0,30600.0,137000.0,8560.0,1020.0,7.06,3.37e-05,77.7,77.7,2.47,3274.15,42592.08,43028.0 +16,67099999.99999999,32100.0,131000.0,8170.0,1020.0,6.98,2.89e-05,74.1,74.1,2.69,3274.15,42592.08,43028.0 +17,71300000.0,31300.0,134000.0,8370.0,1020.0,6.9,2.42e-05,75.9,75.9,1.91,3274.15,42592.08,43028.0 +18,75500000.0,29900.0,140000.0,8770.0,1020.0,6.84,1.95e-05,79.5,79.5,1.62,3274.15,42592.08,43028.0 +19,79700000.0,31700.0,132000.0,8270.0,1020.0,6.8,1.47e-05,75.0,75.0,1.85,3274.15,42592.08,43028.0 +20,83900000.0,30800.0,136000.0,8500.0,1020.0,6.77,1e-05,77.1,77.1,1.82,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..e45de9faf96db303104c2ae5fb65f22770d4d83b --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-1_pp-16_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 584μs,3ms 721μs diff --git a/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-2/profiler.csv index c07cb06106d69b9392b0aba3a662683b69c120ce..95aac4239b2fd5d0a9c2a99b91f16f5f5ef4acc6 100644 --- a/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-2/profiler.csv +++ b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-2/profiler.csv @@ -1,2 +1,2 @@ forward,backward -0ms 918μs,1ms 958μs +0ms 942μs,1ms 137μs diff --git a/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..cf9a47e77b79adebcfeb810a0beafd37479c7758 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,109000.0,38400.0,2400.0,1020.0,11.2,0.0001,21.8,21.8,17.8,2393.89,24101.07,24548.0 +2,8390000.0,65600.0,63900.0,3990.0,1020.0,11.2,9.53e-05,36.2,36.2,17.8,2393.89,24101.07,24548.0 +3,12600000.0,62400.0,67200.0,4200.0,1020.0,9.62,9.05e-05,38.1,38.1,21.7,2393.89,2393.9,24548.0 +4,16800000.0,63900.0,65600.0,4100.0,1020.0,10.5,8.58e-05,37.2,37.2,45.6,,, +5,21000000.0,65900.0,63700.0,3980.0,1020.0,9.43,8.11e-05,36.1,36.1,11.3,2393.89,24101.07,24548.0 +6,25200000.0,66600.0,63000.0,3940.0,1020.0,9.37,7.63e-05,35.7,35.7,7.69,2393.89,24101.07,24548.0 +7,29400000.0,301000.0,14000.0,872.0,1020.0,8.96,7.16e-05,7.91,7.91,5.69,2393.89,24101.07,24548.0 +8,33600000.0,63200.0,66400.0,4150.0,1020.0,8.47,6.68e-05,37.6,37.6,5.25,2393.89,24101.07,24548.0 +9,37700000.0,65200.0,64300.0,4019.9999999999995,1020.0,8.01,6.21e-05,36.5,36.5,4.65,2393.89,24101.07,24548.0 +10,41900000.0,62100.0,67600.0,4220.0,1020.0,7.75,5.74e-05,38.3,38.3,3.85,2393.89,24101.07,24548.0 +11,46100000.0,61400.0,68400.0,4270.0,1020.0,7.62,5.26e-05,38.8,38.8,4.98,2393.89,24101.07,24548.0 +12,50300000.0,63000.0,66600.0,4160.0,1020.0,7.46,4.79e-05,37.7,37.7,3.45,,, +13,54500000.0,64200.0,65400.00000000001,4090.0,1020.0,7.34,4.32e-05,37.1,37.1,3.45,2393.89,24101.07,24548.0 +14,58700000.0,63700.0,65800.0,4110.0,1020.0,7.22,3.84e-05,37.3,37.3,3.22,2393.89,24101.07,24548.0 +15,62900000.0,64500.0,65000.0,4059.9999999999995,1020.0,7.1,3.37e-05,36.8,36.8,2.88,2393.89,24101.07,24548.0 +16,67099999.99999999,64300.0,65200.0,4080.0,1020.0,7.01,2.89e-05,37.0,37.0,2.57,2393.89,24101.07,24548.0 +17,71300000.0,63900.0,65700.0,4100.0,1020.0,6.94,2.42e-05,37.2,37.2,2.47,2393.89,24101.07,24548.0 +18,75500000.0,63700.0,65900.0,4120.0,1020.0,6.88,1.95e-05,37.4,37.4,2.49,2393.89,24101.07,24548.0 +19,79700000.0,63600.0,65900.0,4120.0,1020.0,6.83,1.47e-05,37.4,37.4,2.42,,, +20,83900000.0,64900.00000000001,64599.99999999999,4040.0,1020.0,6.78,1e-05,36.7,36.7,2.25,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..c57b26205b1ea71bc57eefed979f2195625376ed --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 971μs,1ms 239μs diff --git a/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-8/profiler.csv index 806f92a9bd6ecb32cc56ea61e58078ca1d806c02..11f99f9f92173ade34a845a5d52f5d57d3fb2a55 100644 --- a/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-8/profiler.csv +++ b/llama-1B/16_GPUS/dp-1_tp-2_pp-8_mbz-8/profiler.csv @@ -1,2 +1,2 @@ forward,backward -0ms 980μs,7ms 495μs +0ms 943μs,1ms 229μs diff --git a/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-16/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-16/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..9e54e7adf0fc93c6b36b51bdfa0320f0576ec69e --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-16/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,48500.0,86500.0,5400.0,1020.0,11.2,0.0001,49.0,49.0,10.9,1778.24,47092.1,47498.0 +2,8390000.0,19800.0,212000.0,13200.0,1020.0,11.2,9.53e-05,120.0,120.0,11.0,1778.24,1778.27,47498.0 +3,12600000.0,18600.0,226000.0,14100.0,1020.0,9.83,9.05e-05,128.0,128.0,44.4,1778.24,1778.27,47498.0 +4,16800000.0,17800.0,235000.0,14700.0,1020.0,12.1,8.58e-05,133.0,133.0,24.8,,, +5,21000000.0,17100.0,245000.0,15300.0,1020.0,10.1,8.11e-05,139.0,139.0,11.4,1778.24,47092.1,47498.0 +6,25200000.0,17100.0,245000.0,15300.0,1020.0,9.39,7.63e-05,139.0,139.0,7.05,1778.24,47092.1,47498.0 +7,29400000.0,113000.0,37000.0,2310.0,1020.0,8.7,7.16e-05,21.0,21.0,5.44,1778.24,47092.1,47498.0 +8,33600000.0,17200.0,243000.0,15200.0,1020.0,8.77,6.68e-05,138.0,138.0,18.3,,, +9,37700000.0,17200.0,244000.0,15300.0,1020.0,8.11,6.21e-05,139.0,139.0,4.97,1778.24,47092.1,47498.0 +10,41900000.0,16900.0,249000.0,15500.0,1020.0,7.96,5.74e-05,141.0,141.0,4.62,1778.24,47092.1,47498.0 +11,46100000.0,16000.0,262000.0,16400.0,1020.0,7.84,5.26e-05,149.0,149.0,4.93,1778.24,47092.1,47498.0 +12,50300000.0,18100.0,232000.0,14500.0,1020.0,7.64,4.79e-05,132.0,132.0,4.08,1778.24,47092.1,47498.0 +13,54500000.0,17500.0,240000.0,15000.0,1020.0,7.48,4.32e-05,136.0,136.0,3.28,1778.24,47092.1,47498.0 +14,58700000.0,18200.0,230000.0,14400.0,1020.0,7.4,3.84e-05,131.0,131.0,3.52,1778.24,47092.1,47498.0 +15,62900000.0,17000.0,246000.0,15400.0,1020.0,7.29,3.37e-05,140.0,140.0,3.13,,, +16,67099999.99999999,17600.0,239000.0,14900.0,1020.0,7.18,2.89e-05,135.0,135.0,3.12,1778.24,47092.1,47498.0 +17,71300000.0,17700.0,237000.0,14800.0,1020.0,7.09,2.42e-05,134.0,134.0,3.22,,, +18,75500000.0,17800.0,236000.0,14700.0,1020.0,7.02,1.95e-05,134.0,134.0,3.19,1778.24,47092.1,47498.0 +19,79700000.0,18400.0,227000.0,14200.0,1020.0,6.97,1.47e-05,129.0,129.0,3.06,1778.24,47092.1,47498.0 +20,83900000.0,17600.0,239000.0,14900.0,1020.0,6.92,1e-05,135.0,135.0,2.88,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-16/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-16/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..3515b50686ac4ceab121e63d305be14880dc1e15 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-16/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 888μs,1ms 570μs diff --git a/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-4/profiler.csv index d2cb2a086e5fa14c49424f6916f8cb48f6431e24..be7530271ad2c822d7403b47733eb3c2dc0be84e 100644 --- a/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-4/profiler.csv +++ b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-4/profiler.csv @@ -1,2 +1,2 @@ forward,backward -1ms 17μs,0ms 416μs +0ms 980μs,1ms 139μs diff --git a/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-8/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-8/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..fe0e143e7ffdfc11df8457e5db0022f69b1a704b --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-8/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,50800.0,82600.0,5160.0,1020.0,11.2,0.0001,46.9,46.9,10.9,1777.96,24436.91,24816.0 +2,8390000.0,15800.0,265000.0,16600.0,1020.0,11.2,9.53e-05,150.0,150.0,11.0,1777.96,24436.91,24816.0 +3,12600000.0,17300.0,242000.0,15100.0,1020.0,9.83,9.05e-05,137.0,137.0,44.3,1777.96,24436.91,24816.0 +4,16800000.0,16700.0,252000.0,15700.0,1020.0,12.1,8.58e-05,143.0,143.0,24.8,1777.96,24436.91,24816.0 +5,21000000.0,15400.0,272000.0,17000.0,1020.0,10.1,8.11e-05,154.0,154.0,11.4,,, +6,25200000.0,15900.0,264000.0,16500.0,1020.0,9.39,7.63e-05,150.0,150.0,7.05,1777.96,24436.91,24816.0 +7,29400000.0,204000.0,20600.0,1290.0,1020.0,8.69,7.16e-05,11.7,11.7,5.43,1777.96,24436.91,24816.0 +8,33600000.0,16300.0,257000.0,16000.0,1020.0,8.77,6.68e-05,146.0,146.0,18.4,1777.96,24436.91,24816.0 +9,37700000.0,17600.0,238000.0,14900.0,1020.0,8.11,6.21e-05,135.0,135.0,4.96,,, +10,41900000.0,16600.0,253000.0,15800.0,1020.0,7.96,5.74e-05,144.0,144.0,4.62,1777.96,24436.91,24816.0 +11,46100000.0,16300.0,257000.0,16100.000000000002,1020.0,7.84,5.26e-05,146.0,146.0,4.93,1777.96,24436.91,24816.0 +12,50300000.0,16000.0,262000.0,16400.0,1020.0,7.64,4.79e-05,148.0,148.0,4.08,,, +13,54500000.0,16100.000000000002,261000.0,16300.0,1020.0,7.48,4.32e-05,148.0,148.0,3.28,1777.96,24436.91,24816.0 +14,58700000.0,16500.0,254000.0,15900.0,1020.0,7.4,3.84e-05,144.0,144.0,3.52,1777.96,24436.91,24816.0 +15,62900000.0,16100.000000000002,261000.0,16300.0,1020.0,7.29,3.37e-05,148.0,148.0,3.13,,, +16,67099999.99999999,16000.0,263000.0,16400.0,1020.0,7.18,2.89e-05,149.0,149.0,3.11,1777.96,24436.91,24816.0 +17,71300000.0,15300.0,275000.0,17200.0,1020.0,7.09,2.42e-05,156.0,156.0,3.22,1777.96,24436.91,24816.0 +18,75500000.0,15800.0,266000.0,16600.0,1020.0,7.02,1.95e-05,151.0,151.0,3.19,1777.96,24436.91,24816.0 +19,79700000.0,15600.0,268000.0,16800.0,1020.0,6.97,1.47e-05,152.0,152.0,3.06,,, +20,83900000.0,15300.0,274000.0,17100.0,1020.0,6.92,1e-05,156.0,156.0,2.89,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-8/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..2576dc635f8519475273c74e61ca287d5b2ba095 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-4_pp-4_mbz-8/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 804μs,1ms 957μs diff --git a/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-16/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-16/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..4813c4bba57644db25c055fae9b93a12cda910da --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-16/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,36400.0,115000.0,7200.0,1020.0,11.2,0.0001,65.3,65.3,12.1,1572.71,31525.69,31874.0 +2,8390000.0,18000.0,233000.0,14500.0,1020.0,11.2,9.53e-05,132.0,132.0,12.2,1572.71,31525.69,31874.0 +3,12600000.0,18100.0,232000.0,14500.0,1020.0,10.0,9.05e-05,132.0,132.0,51.6,1572.71,31525.69,31874.0 +4,16800000.0,17400.0,241000.0,15000.0,1020.0,11.7,8.58e-05,136.0,136.0,18.2,1572.71,1572.75,31874.0 +5,21000000.0,17500.0,239000.0,14900.0,1020.0,10.4,8.11e-05,136.0,136.0,16.0,1572.71,31525.69,31874.0 +6,25200000.0,17500.0,240000.0,15000.0,1020.0,9.9,7.63e-05,136.0,136.0,9.07,1572.71,31525.69,31874.0 +7,29400000.0,216000.0,19400.0,1210.0,1020.0,9.37,7.16e-05,11.0,11.0,6.23,1572.71,31525.69,31874.0 +8,33600000.0,17400.0,241000.0,15100.0,1020.0,8.89,6.68e-05,137.0,137.0,5.76,1572.71,31525.69,31874.0 +9,37700000.0,18300.0,229000.0,14300.0,1020.0,8.8,6.21e-05,130.0,130.0,11.2,,, +10,41900000.0,17300.0,243000.0,15200.0,1020.0,8.33,5.74e-05,138.0,138.0,5.72,1572.71,31525.69,31874.0 +11,46100000.0,17200.0,243000.0,15200.0,1020.0,8.06,5.26e-05,138.0,138.0,4.91,1572.71,31525.69,31874.0 +12,50300000.0,17000.0,247000.0,15400.0,1020.0,7.9,4.79e-05,140.0,140.0,4.86,1572.71,31525.69,31874.0 +13,54500000.0,17300.0,242000.0,15100.0,1020.0,7.75,4.32e-05,137.0,137.0,4.69,,, +14,58700000.0,17300.0,243000.0,15200.0,1020.0,7.62,3.84e-05,138.0,138.0,4.69,1572.71,31525.69,31874.0 +15,62900000.0,17300.0,242000.0,15100.0,1020.0,7.48,3.37e-05,137.0,137.0,4.49,1572.71,31525.69,31874.0 +16,67099999.99999999,17000.0,247000.0,15400.0,1020.0,7.34,2.89e-05,140.0,140.0,3.99,1572.71,31525.69,31874.0 +17,71300000.0,17100.0,245000.0,15300.0,1020.0,7.23,2.42e-05,139.0,139.0,3.54,1572.71,31525.69,31874.0 +18,75500000.0,17300.0,242000.0,15100.0,1020.0,7.16,1.95e-05,137.0,137.0,3.28,1572.71,31525.69,31874.0 +19,79700000.0,17300.0,242000.0,15100.0,1020.0,7.09,1.47e-05,137.0,137.0,3.2,1572.71,31525.69,31874.0 +20,83900000.0,17500.0,240000.0,15000.0,1020.0,7.03,1e-05,136.0,136.0,3.1,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-16/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-16/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..d4ff84bf1919a54ded105854ec6e47b162f4e5a0 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-16/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 972μs,1ms 205μs diff --git a/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-32/log_metrics.csv b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-32/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..40c4fa5276b310f81e55107a7a1bbb9ebbe19891 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-32/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,35300.0,119000.0,7430.0,1020.0,11.2,0.0001,67.4,67.4,12.1,1573.27,1573.31,62200.0 +2,8390000.0,16800.0,250000.0,15600.0,1020.0,11.2,9.53e-05,142.0,142.0,12.2,1573.27,1573.31,62200.0 +3,12600000.0,17000.0,246000.0,15400.0,1020.0,10.0,9.05e-05,140.0,140.0,51.6,1573.27,1573.31,62200.0 +4,16800000.0,16800.0,249000.0,15600.0,1020.0,11.7,8.58e-05,141.0,141.0,18.3,1573.27,61477.19,62200.0 +5,21000000.0,16800.0,250000.0,15600.0,1020.0,10.4,8.11e-05,142.0,142.0,16.0,,, +6,25200000.0,16400.0,255000.0,16000.0,1020.0,9.9,7.63e-05,145.0,145.0,9.07,1573.27,61477.19,62200.0 +7,29400000.0,117000.0,36000.0,2250.0,1020.0,9.37,7.16e-05,20.4,20.4,6.23,1573.27,61477.19,62200.0 +8,33600000.0,16600.0,253000.0,15800.0,1020.0,8.89,6.68e-05,144.0,144.0,5.76,1573.27,61477.19,62200.0 +9,37700000.0,17300.0,243000.0,15200.0,1020.0,8.8,6.21e-05,138.0,138.0,11.2,1573.27,61477.19,62200.0 +10,41900000.0,16500.0,255000.0,15900.0,1020.0,8.33,5.74e-05,144.0,144.0,5.72,1573.27,61477.19,62200.0 +11,46100000.0,16700.0,252000.0,15700.0,1020.0,8.06,5.26e-05,143.0,143.0,4.91,1573.27,61477.19,62200.0 +12,50300000.0,16900.0,249000.0,15500.0,1020.0,7.9,4.79e-05,141.0,141.0,4.86,1573.27,61477.19,62200.0 +13,54500000.0,17000.0,247000.0,15400.0,1020.0,7.75,4.32e-05,140.0,140.0,4.69,1573.27,61477.19,62200.0 +14,58700000.0,16500.0,254000.0,15900.0,1020.0,7.62,3.84e-05,144.0,144.0,4.69,1573.27,61477.19,62200.0 +15,62900000.0,16700.0,251000.0,15700.0,1020.0,7.48,3.37e-05,143.0,143.0,4.49,,, +16,67099999.99999999,16900.0,248000.0,15500.0,1020.0,7.34,2.89e-05,141.0,141.0,3.99,1573.27,61477.19,62200.0 +17,71300000.0,17500.0,240000.0,15000.0,1020.0,7.23,2.42e-05,136.0,136.0,3.54,1573.27,61477.19,62200.0 +18,75500000.0,17100.0,245000.0,15300.0,1020.0,7.16,1.95e-05,139.0,139.0,3.28,1573.27,61477.19,62200.0 +19,79700000.0,16900.0,247000.0,15500.0,1020.0,7.09,1.47e-05,140.0,140.0,3.2,,, +20,83900000.0,16600.0,252000.0,15700.0,1020.0,7.03,1e-05,143.0,143.0,3.1,,, diff --git a/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-32/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-32/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..a138f7395eaafe6ee13ebf560305d41363cbbaa3 --- /dev/null +++ b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-32/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 974μs,0ms 988μs diff --git a/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-8/profiler.csv index 0502704babba712387f54d347559ace0a1293699..7eb59cbbe1708576899e36ad37ffd64e1a29f36d 100644 --- a/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-8/profiler.csv +++ b/llama-1B/16_GPUS/dp-1_tp-8_pp-2_mbz-8/profiler.csv @@ -1,2 +1,2 @@ forward,backward -1ms 74μs,0ms 533μs +0ms 949μs,1ms 197μs diff --git a/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..3ab3460740aff819f8aa633a65b8f517cdc08900 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,63000.0,66600.0,4160.0,1020.0,11.1,0.0001,37.8,37.8,24.9,3168.18,4459.05,23598.0 +2,8390000.0,32100.0,131000.0,8170.0,1020.0,11.1,9.53e-05,74.1,74.1,25.1,3168.18,4459.05,23598.0 +3,12600000.0,29300.0,143000.0,8950.0,1020.0,9.49,9.05e-05,81.2,81.2,21.5,3168.18,4459.05,23598.0 +4,16800000.0,28900.0,145000.0,9070.0,1020.0,9.36,8.58e-05,82.3,82.3,21.4,3168.18,22834.67,23598.0 +5,21000000.0,31800.0,132000.0,8240.0,1020.0,9.01,8.11e-05,74.8,74.8,12.7,,, +6,25200000.0,30800.0,136000.0,8520.0,1020.0,10.3,7.63e-05,77.3,77.3,47.1,3168.18,22834.67,23598.0 +7,29400000.0,241000.0,17400.0,1090.0,1020.0,8.68,7.16e-05,9.88,9.88,5.58,3168.18,22834.67,23598.0 +8,33600000.0,31200.0,135000.0,8410.0,1020.0,8.32,6.68e-05,76.3,76.3,4.77,3168.18,22834.67,23598.0 +9,37700000.0,31900.0,131000.0,8210.0,1020.0,7.95,6.21e-05,74.5,74.5,3.31,3168.18,22834.67,23598.0 +10,41900000.0,30600.0,137000.0,8550.0,1020.0,7.69,5.74e-05,77.6,77.6,4.31,3168.18,22834.67,23598.0 +11,46100000.0,32100.0,131000.0,8170.0,1020.0,7.45,5.26e-05,74.2,74.2,2.5,,, +12,50300000.0,31700.0,132000.0,8270.0,1020.0,7.37,4.79e-05,75.0,75.0,5.02,3168.18,22834.67,23598.0 +13,54500000.0,32200.000000000004,130000.0,8150.0,1020.0,7.31,4.32e-05,73.9,73.9,6.06,3168.18,22834.67,23598.0 +14,58700000.0,31900.0,132000.0,8220.0,1020.0,7.19,3.84e-05,74.6,74.6,5.3,,, +15,62900000.0,34100.0,123000.0,7690.0,1020.0,7.06,3.37e-05,69.7,69.7,2.73,3168.18,22834.67,23598.0 +16,67099999.99999999,32500.0,129000.0,8060.000000000001,1020.0,6.97,2.89e-05,73.2,73.2,1.99,3168.18,22834.67,23598.0 +17,71300000.0,30800.0,136000.0,8510.0,1020.0,6.91,2.42e-05,77.2,77.2,2.04,3168.18,22834.67,23598.0 +18,75500000.0,32900.0,128000.0,7970.0,1020.0,6.86,1.95e-05,72.4,72.4,2.0,3168.18,22834.67,23598.0 +19,79700000.0,30100.0,139000.0,8700.0,1020.0,6.81,1.47e-05,78.9,78.9,2.01,,, +20,83900000.0,31100.0,135000.0,8420.0,1020.0,6.77,1e-05,76.4,76.4,1.94,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..7b75c818d0d78d462de2fe53360197e1ac76cf2a --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 715μs,2ms 528μs diff --git a/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-4/profiler.csv index 2adb989d7e337d8324e131af7b1016d8cb12dd88..8194bcf37a26ce66faee3b03f35acd6b44ab1078 100644 --- a/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-4/profiler.csv +++ b/llama-1B/16_GPUS/dp-2_tp-1_pp-8_mbz-4/profiler.csv @@ -1,2 +1,2 @@ forward,backward -0ms 732μs,3ms 706μs +0ms 976μs,1ms 206μs diff --git a/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..439c0aad3aadffe2639af986e072ef77b3fa9ac6 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,58800.0,71400.0,4460.0,1020.0,11.2,0.0001,40.5,40.5,14.8,2343.89,3292.11,12328.0 +2,8390000.0,35300.0,119000.0,7440.0,1020.0,11.2,9.53e-05,67.5,67.5,14.9,2343.89,3292.11,12328.0 +3,12600000.0,36400.0,115000.0,7210.0,1020.0,9.53,9.05e-05,65.4,65.4,35.8,2343.89,3292.11,12328.0 +4,16800000.0,37900.0,111000.0,6910.0,1020.0,12.3,8.58e-05,62.7,62.7,37.4,,, +5,21000000.0,35100.0,119000.0,7460.0,1020.0,9.94,8.11e-05,67.7,67.7,14.1,2343.89,11855.1,12328.0 +6,25200000.0,34300.0,122000.0,7650.0,1020.0,9.44,7.63e-05,69.4,69.4,8.14,2343.89,11855.1,12328.0 +7,29400000.0,430000.0,9760.0,610.0,1020.0,8.73,7.16e-05,5.54,5.54,6.04,2343.89,11855.1,12328.0 +8,33600000.0,35100.0,119000.0,7460.0,1020.0,9.17,6.68e-05,67.7,67.7,27.9,,, +9,37700000.0,33900.0,124000.0,7730.0,1020.0,8.33,6.21e-05,70.2,70.2,9.38,2343.89,11855.1,12328.0 +10,41900000.0,32900.0,128000.0,7970.0,1020.0,8.02,5.74e-05,72.3,72.3,5.25,,, +11,46100000.0,35800.0,117000.0,7320.0,1020.0,7.85,5.26e-05,66.4,66.4,4.81,2343.89,11855.1,12328.0 +12,50300000.0,37200.0,113000.0,7050.0,1020.0,7.68,4.79e-05,64.0,64.0,4.49,,, +13,54500000.0,34900.0,120000.0,7520.0,1020.0,7.53,4.32e-05,68.2,68.2,4.16,2343.89,11855.1,12328.0 +14,58700000.0,34500.0,122000.0,7600.0,1020.0,7.4,3.84e-05,68.9,68.9,4.08,2343.89,11855.1,12328.0 +15,62900000.0,35200.0,119000.0,7440.0,1020.0,7.26,3.37e-05,67.5,67.5,3.25,,, +16,67099999.99999999,35300.0,119000.0,7420.0,1020.0,7.17,2.89e-05,67.3,67.3,2.43,2343.89,11855.1,12328.0 +17,71300000.0,37200.0,113000.0,7040.0,1020.0,7.1,2.42e-05,63.9,63.9,2.88,2343.89,11855.1,12328.0 +18,75500000.0,34500.0,121000.0,7590.0,1020.0,7.03,1.95e-05,68.9,68.9,2.75,2343.89,11855.1,12328.0 +19,79700000.0,36300.0,116000.0,7230.0,1020.0,6.96,1.47e-05,65.6,65.6,2.64,2343.89,11855.1,12328.0 +20,83900000.0,34800.0,120000.0,7530.0,1020.0,6.91,1e-05,68.3,68.3,2.47,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..f6fea939d026c91fc568f553a347591ad8f90344 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 959μs,1ms 86μs diff --git a/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..755c9dd133dac79af4fd28ef92bbbd78c476a101 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,58200.0,72100.0,4500.0,1020.0,11.2,0.0001,40.9,40.9,14.8,2343.96,3291.29,21926.0 +2,8390000.0,31600.0,133000.0,8290.0,1020.0,11.2,9.53e-05,75.2,75.2,14.9,2343.96,21358.33,21926.0 +3,12600000.0,32400.0,129000.0,8090.0,1020.0,9.53,9.05e-05,73.4,73.4,35.7,2343.96,3291.29,21926.0 +4,16800000.0,30400.0,138000.0,8630.0,1020.0,12.3,8.58e-05,78.3,78.3,37.4,2343.96,21358.33,21926.0 +5,21000000.0,33200.0,126000.0,7890.0,1020.0,9.94,8.11e-05,71.6,71.6,14.1,,, +6,25200000.0,32100.0,130000.0,8150.0,1020.0,9.43,7.63e-05,74.0,74.0,8.15,2343.96,21358.33,21926.0 +7,29400000.0,224000.0,18700.0,1170.0,1020.0,8.73,7.16e-05,10.6,10.6,6.04,2343.96,21358.33,21926.0 +8,33600000.0,34400.0,122000.0,7620.0,1020.0,9.16,6.68e-05,69.1,69.1,27.8,2343.96,21358.33,21926.0 +9,37700000.0,33300.0,126000.0,7880.0,1020.0,8.32,6.21e-05,71.5,71.5,9.29,2343.96,21358.33,21926.0 +10,41900000.0,32600.0,129000.0,8029.999999999999,1020.0,8.02,5.74e-05,72.9,72.9,5.24,2343.96,21358.33,21926.0 +11,46100000.0,33500.0,125000.0,7830.0,1020.0,7.85,5.26e-05,71.1,71.1,4.81,2343.96,21358.33,21926.0 +12,50300000.0,33200.0,126000.0,7900.0,1020.0,7.68,4.79e-05,71.7,71.7,4.49,2343.96,21358.33,21926.0 +13,54500000.0,31800.0,132000.0,8250.0,1020.0,7.53,4.32e-05,74.9,74.9,4.15,2343.96,21358.33,21926.0 +14,58700000.0,32000.0,131000.0,8200.0,1020.0,7.4,3.84e-05,74.4,74.4,4.07,2343.96,21358.33,21926.0 +15,62900000.0,32299.999999999996,130000.0,8119.999999999999,1020.0,7.26,3.37e-05,73.7,73.7,3.24,2343.96,21358.33,21926.0 +16,67099999.99999999,33600.0,125000.0,7810.0,1020.0,7.17,2.89e-05,70.8,70.8,2.43,2343.96,21358.33,21926.0 +17,71300000.0,31500.0,133000.0,8320.0,1020.0,7.1,2.42e-05,75.5,75.5,2.88,2343.96,21358.33,21926.0 +18,75500000.0,33200.0,126000.0,7890.0,1020.0,7.03,1.95e-05,71.6,71.6,2.75,2343.96,21358.33,21926.0 +19,79700000.0,34100.0,123000.0,7700.0,1020.0,6.96,1.47e-05,69.8,69.8,2.64,,, +20,83900000.0,31700.0,132000.0,8260.0,1020.0,6.91,1e-05,75.0,75.0,2.47,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..50a2c63ee64d05503d7b2c561b0255a0023d6bdf --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 915μs,1ms 415μs diff --git a/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-8/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-8/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..48dd40831ef7b5614c556cbe71b9711b57444281 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-8/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,47000.0,89300.0,5580.0,1020.0,11.2,0.0001,50.6,50.6,14.8,2344.1,3289.64,41214.0 +2,8390000.0,24200.0,174000.0,10800.0,1020.0,11.2,9.53e-05,98.4,98.4,14.9,2344.1,3289.64,41214.0 +3,12600000.0,28600.0,147000.0,9180.0,1020.0,9.53,9.05e-05,83.3,83.3,35.8,2344.1,40364.8,41214.0 +4,16800000.0,27800.0,151000.0,9440.0,1020.0,12.3,8.58e-05,85.7,85.7,37.4,2344.1,3289.64,41214.0 +5,21000000.0,25200.0,166000.0,10400.0,1020.0,9.94,8.11e-05,94.2,94.2,14.1,2344.1,40364.8,41214.0 +6,25200000.0,26300.0,159000.0,9950.0,1020.0,9.44,7.63e-05,90.3,90.3,8.13,2344.1,40364.8,41214.0 +7,29400000.0,121000.0,34700.0,2170.0,1020.0,8.73,7.16e-05,19.7,19.7,6.04,2344.1,40364.8,41214.0 +8,33600000.0,26000.0,161000.0,10100.0,1020.0,9.17,6.68e-05,91.4,91.4,28.0,2344.1,40364.8,41214.0 +9,37700000.0,27100.0,155000.0,9660.0,1020.0,8.33,6.21e-05,87.6,87.6,9.42,2344.1,40364.8,41214.0 +10,41900000.0,26600.0,157000.0,9840.0,1020.0,8.02,5.74e-05,89.3,89.3,5.24,2344.1,40364.8,41214.0 +11,46100000.0,26500.0,158000.0,9880.0,1020.0,7.85,5.26e-05,89.6,89.6,4.81,,, +12,50300000.0,27900.0,151000.0,9410.0,1020.0,7.68,4.79e-05,85.4,85.4,4.49,2344.1,40364.8,41214.0 +13,54500000.0,25900.0,162000.0,10100.0,1020.0,7.53,4.32e-05,91.7,91.7,4.15,2344.1,40364.8,41214.0 +14,58700000.0,25700.0,163000.0,10200.0,1020.0,7.4,3.84e-05,92.6,92.6,4.07,,, +15,62900000.0,25800.0,163000.0,10200.0,1020.0,7.26,3.37e-05,92.3,92.3,3.25,2344.1,40364.8,41214.0 +16,67099999.99999999,24100.0,174000.0,10900.0,1020.0,7.17,2.89e-05,98.6,98.6,2.44,2344.1,40364.8,41214.0 +17,71300000.0,25600.0,164000.0,10200.0,1020.0,7.1,2.42e-05,92.9,92.9,2.88,2344.1,40364.8,41214.0 +18,75500000.0,25400.0,165000.0,10300.0,1020.0,7.03,1.95e-05,93.5,93.5,2.75,2344.1,40364.8,41214.0 +19,79700000.0,24300.0,173000.0,10800.0,1020.0,6.96,1.47e-05,97.9,97.9,2.65,,, +20,83900000.0,28300.0,148000.0,9270.0,1020.0,6.91,1e-05,84.1,84.1,2.47,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-8/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..047b9654124246c04154982a2393d39ead418993 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-2_pp-4_mbz-8/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 961μs,1ms 120μs diff --git a/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-16/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-16/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..7011a1f27fd54d185b6fd7c8bbde4a5e5f382af0 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-16/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,27500.0,152000.0,9520.0,1020.0,11.1,0.0001,86.4,86.4,15.0,2064.33,2888.12,47696.0 +2,8390000.0,13900.0,302000.0,18900.0,1020.0,11.1,9.53e-05,171.0,171.0,15.1,2064.33,2888.12,47952.0 +3,12600000.0,12900.0,325000.0,20300.0,1020.0,11.4,9.05e-05,184.0,184.0,106.0,2064.33,47377.06,47952.0 +4,16800000.0,13300.0,315000.0,19700.0,1020.0,11.7,8.58e-05,179.0,179.0,24.5,2064.33,47377.06,47952.0 +5,21000000.0,13000.0,324000.0,20200.0,1020.0,10.0,8.11e-05,184.0,184.0,11.0,,, +6,25200000.0,12500.0,335000.0,20900.0,1020.0,9.46,7.63e-05,190.0,190.0,7.2,2064.33,47377.06,47952.0 +7,29400000.0,108000.0,38700.0,2420.0,1020.0,8.87,7.16e-05,21.9,21.9,5.99,2064.33,47377.06,47952.0 +8,33600000.0,13500.0,311000.0,19400.0,1020.0,8.44,6.68e-05,176.0,176.0,5.47,,, +9,37700000.0,12400.0,337000.0,21100.0,1020.0,8.17,6.21e-05,191.0,191.0,6.22,2064.33,47377.06,47952.0 +10,41900000.0,12700.0,331000.0,20700.0,1020.0,7.87,5.74e-05,188.0,188.0,4.35,2064.33,47377.06,47952.0 +11,46100000.0,12900.0,326000.0,20400.0,1020.0,7.74,5.26e-05,185.0,185.0,4.47,2064.33,47377.06,47952.0 +12,50300000.0,12700.0,331000.0,20700.0,1020.0,7.6,4.79e-05,188.0,188.0,4.41,2064.33,47377.06,47952.0 +13,54500000.0,12800.0,328000.0,20500.0,1020.0,7.41,4.32e-05,186.0,186.0,3.72,2064.33,47377.06,47952.0 +14,58700000.0,13700.0,306000.0,19100.0,1020.0,7.27,3.84e-05,173.0,173.0,3.19,,, +15,62900000.0,13300.0,316000.0,19800.0,1020.0,7.17,3.37e-05,179.0,179.0,3.0,2064.33,47377.06,47952.0 +16,67099999.99999999,13400.0,312000.0,19500.0,1020.0,7.07,2.89e-05,177.0,177.0,3.0,2064.33,47377.06,47952.0 +17,71300000.0,14800.0,283000.0,17700.0,1020.0,6.96,2.42e-05,160.0,160.0,2.81,2064.33,47377.06,47952.0 +18,75500000.0,13300.0,314000.0,19700.0,1020.0,6.88,1.95e-05,178.0,178.0,3.0,2064.33,47377.06,47952.0 +19,79700000.0,13600.0,308000.0,19200.0,1020.0,6.82,1.47e-05,174.0,174.0,3.08,,, +20,83900000.0,14100.0,298000.0,18600.0,1020.0,6.77,1e-05,169.0,169.0,2.98,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-16/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-16/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..8acaf4a56a7f9537ab7172568be2c09827741c31 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-16/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 963μs,1ms 10μs diff --git a/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..4f4c11ff49fb082c834fb9c4164e736c5f3306eb --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,30700.0,136000.0,8530.0,1020.0,11.1,0.0001,77.4,77.4,15.0,2063.91,13395.12,13626.0 +2,8390000.0,15700.0,268000.0,16700.0,1020.0,11.1,9.53e-05,152.0,152.0,15.1,2063.91,2887.7,13626.0 +3,12600000.0,15900.0,263000.0,16400.0,1020.0,11.4,9.05e-05,149.0,149.0,106.0,2063.91,2887.7,13626.0 +4,16800000.0,19200.0,218000.0,13600.0,1020.0,11.7,8.58e-05,124.0,124.0,24.5,2063.91,13395.12,13626.0 +5,21000000.0,20000.0,210000.0,13100.0,1020.0,10.0,8.11e-05,119.0,119.0,11.0,,, +6,25200000.0,19300.0,217000.0,13600.0,1020.0,9.46,7.63e-05,123.0,123.0,7.2,2063.91,13395.12,13626.0 +7,29400000.0,413000.0,10200.0,635.0,1020.0,8.87,7.16e-05,5.76,5.76,5.99,2063.91,13395.12,13626.0 +8,33600000.0,16300.0,257000.0,16000.0,1020.0,8.43,6.68e-05,145.0,145.0,5.47,,, +9,37700000.0,15400.0,273000.0,17000.0,1020.0,8.17,6.21e-05,155.0,155.0,6.19,2063.91,13395.12,13626.0 +10,41900000.0,16400.0,256000.0,16000.0,1020.0,7.86,5.74e-05,145.0,145.0,4.35,2063.91,13395.12,13626.0 +11,46100000.0,15600.0,268000.0,16800.0,1020.0,7.74,5.26e-05,152.0,152.0,4.48,2063.91,13395.12,13626.0 +12,50300000.0,15700.0,267000.0,16700.0,1020.0,7.6,4.79e-05,152.0,152.0,4.41,2063.91,13395.12,13626.0 +13,54500000.0,15400.0,273000.0,17100.0,1020.0,7.41,4.32e-05,155.0,155.0,3.72,2063.91,13395.12,13626.0 +14,58700000.0,15300.0,274000.0,17100.0,1020.0,7.27,3.84e-05,155.0,155.0,3.19,2063.91,13395.12,13626.0 +15,62900000.0,15100.0,278000.0,17300.0,1020.0,7.17,3.37e-05,157.0,157.0,3.0,2063.91,13395.12,13626.0 +16,67099999.99999999,16200.0,260000.0,16200.0,1020.0,7.07,2.89e-05,147.0,147.0,3.0,2063.91,13395.12,13626.0 +17,71300000.0,15300.0,273000.0,17100.0,1020.0,6.96,2.42e-05,155.0,155.0,2.81,2063.91,13395.12,13626.0 +18,75500000.0,15200.0,275000.0,17200.0,1020.0,6.88,1.95e-05,156.0,156.0,3.0,2063.91,13395.12,13626.0 +19,79700000.0,15600.0,269000.0,16800.0,1020.0,6.82,1.47e-05,152.0,152.0,3.08,,, +20,83900000.0,16200.0,260000.0,16200.0,1020.0,6.77,1e-05,147.0,147.0,2.98,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..aaacb5b60b430dd67f67e80d5980ae926154b567 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 967μs,1ms 107μs diff --git a/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-8/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-8/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..a40af15fda8db82a5dbf2d5ded09861e0c6d9b3e --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-8/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,29300.0,143000.0,8950.0,1020.0,11.1,0.0001,81.2,81.2,15.0,2064.05,2887.84,24986.0 +2,8390000.0,15600.0,269000.0,16800.0,1020.0,11.1,9.53e-05,153.0,153.0,15.1,2064.05,2887.84,25114.0 +3,12600000.0,14200.0,294000.0,18400.0,1020.0,11.4,9.05e-05,167.0,167.0,106.0,2064.05,2887.84,25114.0 +4,16800000.0,16000.0,262000.0,16400.0,1020.0,11.7,8.58e-05,149.0,149.0,24.5,,, +5,21000000.0,15600.0,269000.0,16800.0,1020.0,10.0,8.11e-05,153.0,153.0,11.0,2064.05,24722.43,25114.0 +6,25200000.0,14500.0,290000.0,18100.0,1020.0,9.46,7.63e-05,164.0,164.0,7.21,2064.05,24722.43,25114.0 +7,29400000.0,221000.0,19000.0,1190.0,1020.0,8.87,7.16e-05,10.8,10.8,5.99,,, +8,33600000.0,14600.0,287000.0,17900.0,1020.0,8.44,6.68e-05,163.0,163.0,5.45,2064.05,24722.43,25114.0 +9,37700000.0,16000.0,262000.0,16400.0,1020.0,8.18,6.21e-05,149.0,149.0,6.29,2064.05,24722.43,25114.0 +10,41900000.0,14500.0,289000.0,18100.0,1020.0,7.87,5.74e-05,164.0,164.0,4.35,2064.05,24722.43,25114.0 +11,46100000.0,14700.0,286000.0,17900.0,1020.0,7.74,5.26e-05,162.0,162.0,4.47,2064.05,24722.43,25114.0 +12,50300000.0,14200.0,295000.0,18500.0,1020.0,7.6,4.79e-05,167.0,167.0,4.41,,, +13,54500000.0,14600.0,288000.0,18000.0,1020.0,7.42,4.32e-05,163.0,163.0,3.72,2064.05,24722.43,25114.0 +14,58700000.0,14900.0,282000.0,17700.0,1020.0,7.27,3.84e-05,160.0,160.0,3.2,2064.05,24722.43,25114.0 +15,62900000.0,14500.0,289000.0,18100.0,1020.0,7.17,3.37e-05,164.0,164.0,2.99,2064.05,24722.43,25114.0 +16,67099999.99999999,14000.0,299000.0,18700.0,1020.0,7.07,2.89e-05,169.0,169.0,3.0,2064.05,24722.43,25114.0 +17,71300000.0,14900.0,282000.0,17600.0,1020.0,6.96,2.42e-05,160.0,160.0,2.81,,, +18,75500000.0,14500.0,289000.0,18100.0,1020.0,6.88,1.95e-05,164.0,164.0,3.0,2064.05,24722.43,25114.0 +19,79700000.0,15200.0,275000.0,17200.0,1020.0,6.82,1.47e-05,156.0,156.0,3.08,,, +20,83900000.0,15100.0,278000.0,17400.0,1020.0,6.77,1e-05,158.0,158.0,2.99,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-8/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..48ba6fd7e643652137b7d444976646bfe760fb3c --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-4_pp-2_mbz-8/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 967μs,1ms 105μs diff --git a/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-16/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-16/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..7caa45524672e146f78816f85d16598d5336debe --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-16/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,25900.0,162000.0,10100.0,1020.0,11.5,0.0001,92.0,92.0,15.7,1696.19,30498.63,30840.0 +2,8390000.0,13400.0,312000.0,19500.0,1020.0,11.5,9.53e-05,177.0,177.0,16.0,1696.19,30498.63,30840.0 +3,12600000.0,13600.0,308000.0,19200.0,1020.0,12.8,9.05e-05,174.0,174.0,137.0,1696.19,30498.63,30840.0 +4,16800000.0,13600.0,308000.0,19300.0,1020.0,12.2,8.58e-05,175.0,175.0,22.4,1696.17,2358.02,30840.0 +5,21000000.0,13400.0,314000.0,19600.0,1020.0,12.4,8.11e-05,178.0,178.0,42.9,1696.17,30498.63,30840.0 +6,25200000.0,13400.0,312000.0,19500.0,1020.0,11.1,7.63e-05,177.0,177.0,24.7,1696.17,30498.63,30840.0 +7,29400000.0,13600.0,309000.0,19300.0,1020.0,10.2,7.16e-05,175.0,175.0,12.2,1696.17,30498.63,30840.0 +8,33600000.0,13400.0,313000.0,19600.0,1020.0,9.8,6.68e-05,178.0,178.0,7.31,1696.17,30498.63,30840.0 +9,37700000.0,13400.0,314000.0,19600.0,1020.0,9.32,6.21e-05,178.0,178.0,6.66,1696.17,30498.63,30840.0 +10,41900000.0,13600.0,309000.0,19300.0,1020.0,9.22,5.74e-05,175.0,175.0,16.2,1696.17,30498.63,30840.0 +11,46100000.0,13600.0,308000.0,19300.0,1020.0,8.63,5.26e-05,175.0,175.0,7.93,1696.17,30498.63,30840.0 +12,50300000.0,13700.0,307000.0,19200.0,1020.0,8.27,4.79e-05,174.0,174.0,5.43,1696.17,30498.63,30840.0 +13,54500000.0,13300.0,315000.0,19700.0,1020.0,8.1,4.32e-05,179.0,179.0,5.53,1696.17,30498.63,30840.0 +14,58700000.0,13600.0,309000.0,19300.0,1020.0,7.93,3.84e-05,175.0,175.0,5.77,1696.17,30498.63,30840.0 +15,62900000.0,13600.0,309000.0,19300.0,1020.0,7.72,3.37e-05,175.0,175.0,5.17,1696.17,30498.63,30840.0 +16,67099999.99999999,13400.0,313000.0,19600.0,1020.0,7.56,2.89e-05,178.0,178.0,4.92,1696.17,30498.63,30840.0 +17,71300000.0,13800.0,304000.0,19000.0,1020.0,7.45,2.42e-05,172.0,172.0,4.93,1696.17,30498.63,30840.0 +18,75500000.0,13500.0,310000.0,19400.0,1020.0,7.35,1.95e-05,176.0,176.0,4.04,1696.17,30498.63,30840.0 +19,79700000.0,13500.0,311000.0,19400.0,1020.0,7.29,1.47e-05,176.0,176.0,4.11,1696.17,30498.63,30840.0 +20,83900000.0,13500.0,312000.0,19500.0,1020.0,7.23,1e-05,177.0,177.0,3.95,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-16/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-16/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..b5e5f09c91d1c3c73d75e7e752627d52f8580cac --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-16/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 971μs,0ms 973μs diff --git a/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-32/log_metrics.csv b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-32/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..3439a4d5681e84c8d2c98fa231d23b92b19ba0ba --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-32/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,20500.0,205000.0,12800.0,1020.0,11.5,0.0001,116.0,116.0,15.7,1697.3,59303.14,60566.0 +2,8390000.0,12900.0,325000.0,20300.0,1020.0,11.5,9.53e-05,184.0,184.0,16.0,1697.3,59303.14,60566.0 +3,12600000.0,12700.0,329000.0,20600.0,1020.0,12.8,9.05e-05,187.0,187.0,137.0,1697.3,59303.14,60566.0 +4,16800000.0,13000.0,322000.0,20100.0,1020.0,12.2,8.58e-05,183.0,183.0,22.4,1697.29,2359.13,60566.0 +5,21000000.0,13000.0,322000.0,20100.0,1020.0,12.4,8.11e-05,182.0,182.0,42.8,1697.29,59303.14,60566.0 +6,25200000.0,12900.0,325000.0,20300.0,1020.0,11.1,7.63e-05,185.0,185.0,24.8,1697.29,59303.14,60566.0 +7,29400000.0,13100.0,320000.0,20000.0,1020.0,10.2,7.16e-05,182.0,182.0,12.1,1697.29,59303.14,60566.0 +8,33600000.0,12700.0,329000.0,20600.0,1020.0,9.8,6.68e-05,187.0,187.0,7.31,1697.29,59303.14,60566.0 +9,37700000.0,12800.0,328000.0,20500.0,1020.0,9.32,6.21e-05,186.0,186.0,6.66,1697.29,59303.14,60566.0 +10,41900000.0,12900.0,324000.0,20300.0,1020.0,9.22,5.74e-05,184.0,184.0,16.3,1697.29,59303.14,60566.0 +11,46100000.0,12900.0,325000.0,20300.0,1020.0,8.63,5.26e-05,184.0,184.0,7.95,1697.29,59303.14,60566.0 +12,50300000.0,12800.0,329000.0,20500.0,1020.0,8.27,4.79e-05,186.0,186.0,5.43,1697.29,59303.14,60566.0 +13,54500000.0,12800.0,327000.0,20400.0,1020.0,8.1,4.32e-05,185.0,185.0,5.53,1697.29,59303.14,60566.0 +14,58700000.0,12800.0,328000.0,20500.0,1020.0,7.93,3.84e-05,186.0,186.0,5.77,1697.29,59303.14,60566.0 +15,62900000.0,12800.0,328000.0,20500.0,1020.0,7.72,3.37e-05,186.0,186.0,5.17,1697.29,59303.14,60566.0 +16,67099999.99999999,13000.0,323000.0,20200.0,1020.0,7.56,2.89e-05,183.0,183.0,4.93,1697.29,59303.14,60566.0 +17,71300000.0,12800.0,329000.0,20500.0,1020.0,7.45,2.42e-05,186.0,186.0,4.93,1697.29,59303.14,60566.0 +18,75500000.0,12800.0,327000.0,20500.0,1020.0,7.35,1.95e-05,186.0,186.0,4.02,1697.29,59303.14,60566.0 +19,79700000.0,12800.0,328000.0,20500.0,1020.0,7.29,1.47e-05,186.0,186.0,4.11,1697.29,59303.14,60566.0 +20,83900000.0,12700.0,329000.0,20600.0,1020.0,7.23,1e-05,187.0,187.0,3.96,,, diff --git a/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-32/profiler.csv b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-32/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..a934d682350a5e551aa4547f521be465e3bc24f7 --- /dev/null +++ b/llama-1B/16_GPUS/dp-2_tp-8_pp-1_mbz-32/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 958μs,1ms 170μs diff --git a/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-1/log_metrics.csv b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-1/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..4a3db7f9ff75c1eac112adfdcc859c224b4c88b1 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-1/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,34000.0,123000.0,7720.0,1020.0,11.1,0.0001,70.0,70.0,25.1,3478.24,11653.25,12904.0 +2,8390000.0,19000.0,221000.0,13800.0,1020.0,11.1,9.53e-05,125.0,125.0,25.2,3478.24,5184.61,12904.0 +3,12600000.0,19100.0,219000.0,13700.0,1020.0,11.4,9.05e-05,124.0,124.0,217.0,3478.24,5184.61,12904.0 +4,16800000.0,22400.0,187000.0,11700.0,1020.0,13.8,8.58e-05,106.0,106.0,22.5,,, +5,21000000.0,22200.0,189000.0,11800.0,1020.0,9.98,8.11e-05,107.0,107.0,16.5,3478.24,11653.25,12904.0 +6,25200000.0,21500.0,195000.0,12200.0,1020.0,10.9,7.63e-05,111.0,111.0,93.8,3478.24,11653.25,12904.0 +7,29400000.0,389000.0,10800.0,674.0,1020.0,9.16,7.16e-05,6.11,6.11,19.7,3478.24,11653.25,12904.0 +8,33600000.0,22500.0,187000.0,11700.0,1020.0,8.83,6.68e-05,106.0,106.0,6.08,3478.24,11653.25,12904.0 +9,37700000.0,22600.0,186000.0,11600.0,1020.0,8.47,6.21e-05,105.0,105.0,5.23,3478.24,11653.25,12904.0 +10,41900000.0,19200.0,218000.0,13600.0,1020.0,8.17,5.74e-05,124.0,124.0,7.72,3478.24,11653.25,12904.0 +11,46100000.0,20000.0,210000.0,13100.0,1020.0,7.93,5.26e-05,119.0,119.0,5.54,,, +12,50300000.0,19600.0,214000.0,13300.0,1020.0,7.75,4.79e-05,121.0,121.0,4.65,3478.24,11653.25,12904.0 +13,54500000.0,18900.0,222000.0,13900.0,1020.0,7.58,4.32e-05,126.0,126.0,2.89,3478.24,11653.25,12904.0 +14,58700000.0,19100.0,219000.0,13700.0,1020.0,7.5,3.84e-05,124.0,124.0,4.19,3478.24,11653.25,12904.0 +15,62900000.0,19200.0,218000.0,13600.0,1020.0,7.4,3.37e-05,124.0,124.0,3.86,3478.24,11653.25,12904.0 +16,67099999.99999999,19500.0,215000.0,13500.0,1020.0,7.29,2.89e-05,122.0,122.0,3.07,3478.24,11653.25,12904.0 +17,71300000.0,19600.0,214000.0,13400.0,1020.0,7.19,2.42e-05,122.0,122.0,2.39,3478.24,11653.25,12904.0 +18,75500000.0,21100.0,199000.0,12400.0,1020.0,7.13,1.95e-05,113.0,113.0,2.21,3478.24,11653.25,12904.0 +19,79700000.0,19600.0,214000.0,13400.0,1020.0,7.08,1.47e-05,121.0,121.0,2.64,3478.24,11653.25,12904.0 +20,83900000.0,17900.0,234000.0,14600.0,1020.0,7.03,1e-05,133.0,133.0,2.29,,, diff --git a/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-1/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-1/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..98477ceb8a4b1f06b381cd82478837628d8aaa58 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-1/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 947μs,1ms 132μs diff --git a/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-2/profiler.csv index ebabecda85d86ce5bdfc27d0cb53af2070601225..692599363030dc7a17a4b367d18398b6f47f6d7a 100644 --- a/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-2/profiler.csv +++ b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-2/profiler.csv @@ -1,2 +1,2 @@ forward,backward -0ms 890μs,1ms 193μs +0ms 947μs,1ms 197μs diff --git a/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..7046652c322c10a7248b68800b0046e551a67c81 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,38800.0,108000.0,6750.0,1020.0,11.1,0.0001,61.3,61.3,25.1,3478.34,37922.28,38320.0 +2,8390000.0,19100.0,219000.0,13700.0,1020.0,11.1,9.53e-05,124.0,124.0,25.2,3478.34,37922.28,38320.0 +3,12600000.0,19000.0,221000.0,13800.0,1020.0,11.4,9.05e-05,125.0,125.0,217.0,3478.34,37922.28,38320.0 +4,16800000.0,19800.0,212000.0,13300.0,1020.0,13.8,8.58e-05,120.0,120.0,22.5,3478.34,5180.18,38320.0 +5,21000000.0,18200.0,231000.0,14400.0,1020.0,9.98,8.11e-05,131.0,131.0,16.4,3478.34,37922.28,38320.0 +6,25200000.0,18600.0,226000.0,14100.0,1020.0,10.9,7.63e-05,128.0,128.0,93.8,3478.34,37922.28,38320.0 +7,29400000.0,105000.0,40000.0,2500.0,1020.0,9.16,7.16e-05,22.7,22.7,19.8,,, +8,33600000.0,18200.0,230000.0,14400.0,1020.0,8.83,6.68e-05,131.0,131.0,6.08,3478.34,37922.28,38320.0 +9,37700000.0,17900.0,235000.0,14700.0,1020.0,8.47,6.21e-05,133.0,133.0,5.23,3478.34,37922.28,38320.0 +10,41900000.0,17200.0,244000.0,15200.0,1020.0,8.17,5.74e-05,138.0,138.0,7.71,3478.34,37922.28,38320.0 +11,46100000.0,19400.0,216000.0,13500.0,1020.0,7.93,5.26e-05,123.0,123.0,5.53,3478.34,37922.28,38320.0 +12,50300000.0,18800.0,223000.0,14000.0,1020.0,7.75,4.79e-05,127.0,127.0,4.64,,, +13,54500000.0,18700.0,224000.0,14000.0,1020.0,7.58,4.32e-05,127.0,127.0,2.9,3478.34,37922.28,38320.0 +14,58700000.0,16200.0,258000.0,16200.0,1020.0,7.5,3.84e-05,147.0,147.0,4.18,3478.34,37922.28,38320.0 +15,62900000.0,18100.0,232000.0,14500.0,1020.0,7.4,3.37e-05,131.0,131.0,3.86,3478.34,37922.28,38320.0 +16,67099999.99999999,17700.0,237000.0,14800.0,1020.0,7.29,2.89e-05,134.0,134.0,3.06,3478.34,37922.28,38320.0 +17,71300000.0,18300.0,229000.0,14300.0,1020.0,7.19,2.42e-05,130.0,130.0,2.39,,, +18,75500000.0,20300.0,206000.0,12900.0,1020.0,7.13,1.95e-05,117.0,117.0,2.2,3478.34,37922.28,38320.0 +19,79700000.0,17800.0,236000.0,14800.0,1020.0,7.08,1.47e-05,134.0,134.0,2.64,,, +20,83900000.0,17200.0,244000.0,15200.0,1020.0,7.03,1e-05,138.0,138.0,2.3,,, diff --git a/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..23c0c84a8040e96a94a179e88430679bf30cdf8d --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-1_pp-4_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 966μs,1ms 229μs diff --git a/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..3378c61f1818a80fee9481ff56e48b2dd5456323 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,43400.0,96500.0,6030.0,1020.0,11.2,0.0001,54.7,54.7,21.2,3050.05,12561.11,13336.0 +2,8390000.0,28300.0,148000.0,9250.0,1020.0,11.2,9.53e-05,84.0,84.0,21.3,3050.05,4531.15,13336.0 +3,12600000.0,28000.0,150000.0,9360.0,1020.0,9.94,9.05e-05,85.0,85.0,114.0,3050.05,4531.15,13336.0 +4,16800000.0,30100.0,139000.0,8720.0,1020.0,13.3,8.58e-05,79.1,79.1,22.8,3050.05,12561.11,13336.0 +5,21000000.0,30200.0,139000.0,8670.0,1020.0,10.2,8.11e-05,78.7,78.7,10.4,,, +6,25200000.0,29100.0,144000.0,9010.0,1020.0,9.36,7.63e-05,81.8,81.8,15.4,3050.05,12561.11,13336.0 +7,29400000.0,415000.0,10100.0,632.0,1020.0,8.8,7.16e-05,5.74,5.74,8.93,3050.05,12561.11,13336.0 +8,33600000.0,27500.0,153000.0,9540.0,1020.0,8.6,6.68e-05,86.6,86.6,5.7,3050.05,12561.11,13336.0 +9,37700000.0,29700.0,141000.0,8830.0,1020.0,8.22,6.21e-05,80.1,80.1,4.85,3050.05,12561.11,13336.0 +10,41900000.0,28300.0,148000.0,9270.0,1020.0,8.02,5.74e-05,84.1,84.1,6.71,3050.05,12561.11,13336.0 +11,46100000.0,27700.0,152000.0,9470.0,1020.0,7.73,5.26e-05,85.9,85.9,4.02,3050.05,12561.11,13336.0 +12,50300000.0,27400.0,153000.0,9570.0,1020.0,7.55,4.79e-05,86.8,86.8,3.55,3050.05,12561.11,13336.0 +13,54500000.0,25900.0,162000.0,10100.0,1020.0,7.49,4.32e-05,91.8,91.8,4.42,3050.05,12561.11,13336.0 +14,58700000.0,28800.0,145000.0,9090.0,1020.0,7.36,3.84e-05,82.5,82.5,3.98,3050.05,12561.11,13336.0 +15,62900000.0,28400.0,147000.0,9210.0,1020.0,7.21,3.37e-05,83.6,83.6,2.83,3050.05,12561.11,13336.0 +16,67099999.99999999,27200.0,154000.0,9650.0,1020.0,7.12,2.89e-05,87.6,87.6,2.72,,, +17,71300000.0,29700.0,141000.0,8820.0,1020.0,7.02,2.42e-05,80.1,80.1,2.51,3050.05,12561.11,13336.0 +18,75500000.0,28300.0,148000.0,9270.0,1020.0,6.94,1.95e-05,84.1,84.1,2.53,3050.05,12561.11,13336.0 +19,79700000.0,27000.0,156000.0,9720.0,1020.0,6.89,1.47e-05,88.2,88.2,2.96,,, +20,83900000.0,26500.0,159000.0,9910.0,1020.0,6.84,1e-05,89.9,89.9,2.89,,, diff --git a/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..f601a36bdc2c3f1dbb45c30cbe182f0bc064584a --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 984μs,1ms 137μs diff --git a/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..63cb15aa0976ba3cc3ddc9b30b1acdc01d72660a --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,47100.0,89000.0,5560.0,1020.0,11.2,0.0001,50.5,50.5,21.2,3050.12,22064.2,23148.0 +2,8390000.0,25400.0,165000.0,10300.0,1020.0,11.2,9.53e-05,93.8,93.8,21.3,3050.12,22064.2,23148.0 +3,12600000.0,18700.0,224000.0,14000.0,1020.0,9.94,9.05e-05,127.0,127.0,114.0,3050.12,22064.2,23148.0 +4,16800000.0,27300.0,154000.0,9600.0,1020.0,13.3,8.58e-05,87.1,87.1,22.8,3050.12,22064.2,23148.0 +5,21000000.0,27200.0,154000.0,9650.0,1020.0,10.2,8.11e-05,87.6,87.6,10.4,,, +6,25200000.0,24500.0,171000.0,10700.0,1020.0,9.36,7.63e-05,97.1,97.1,15.4,3050.12,22064.2,23148.0 +7,29400000.0,222000.0,18900.0,1180.0,1020.0,8.8,7.16e-05,10.7,10.7,9.02,3050.12,22064.2,23148.0 +8,33600000.0,26600.0,158000.0,9850.0,1020.0,8.6,6.68e-05,89.3,89.3,5.71,3050.12,22064.2,23148.0 +9,37700000.0,25700.0,163000.0,10200.0,1020.0,8.22,6.21e-05,92.5,92.5,4.87,3050.12,22064.2,23148.0 +10,41900000.0,27700.0,152000.0,9480.0,1020.0,8.02,5.74e-05,86.0,86.0,6.54,3050.12,22064.2,23148.0 +11,46100000.0,27500.0,152000.0,9520.0,1020.0,7.74,5.26e-05,86.4,86.4,4.03,3050.12,22064.2,23148.0 +12,50300000.0,26300.0,160000.0,9970.0,1020.0,7.55,4.79e-05,90.5,90.5,3.53,,, +13,54500000.0,26100.0,161000.0,10000.0,1020.0,7.49,4.32e-05,91.1,91.1,4.41,3050.12,22064.2,23148.0 +14,58700000.0,26300.0,159000.0,9960.0,1020.0,7.36,3.84e-05,90.3,90.3,3.99,3050.12,22064.2,23148.0 +15,62900000.0,28900.0,145000.0,9060.0,1020.0,7.21,3.37e-05,82.2,82.2,2.84,3050.12,22064.2,23148.0 +16,67099999.99999999,26700.0,157000.0,9820.0,1020.0,7.12,2.89e-05,89.1,89.1,2.7,,, +17,71300000.0,26400.0,159000.0,9940.0,1020.0,7.02,2.42e-05,90.2,90.2,2.51,3050.12,22064.2,23148.0 +18,75500000.0,27200.0,154000.0,9620.0,1020.0,6.94,1.95e-05,87.3,87.3,2.53,3050.12,22064.2,23148.0 +19,79700000.0,26200.0,160000.0,10000.0,1020.0,6.89,1.47e-05,90.7,90.7,2.95,3050.12,22064.2,23148.0 +20,83900000.0,27200.0,154000.0,9650.0,1020.0,6.84,1e-05,87.5,87.5,2.87,,, diff --git a/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..61d99064176958a1a5285c887c8e0215bc3246f7 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-2_pp-2_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 877μs,1ms 775μs diff --git a/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-16/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-16/profiler.csv index 36a2a82d19c057dafe3815c594d9e0001be70d57..0619a8bf99efbf22db973f6ca545707a8c3c0ff4 100644 --- a/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-16/profiler.csv +++ b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-16/profiler.csv @@ -1,2 +1,2 @@ forward,backward -1ms 66μs,0ms 506μs +0ms 975μs,1ms 63μs diff --git a/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..813ef4877d53bc4b6b943c8b4f8b5d7be7734fcd --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,24300.0,172000.0,10800.0,1020.0,11.4,0.0001,97.8,97.8,20.6,2489.08,13754.73,14458.0 +2,8390000.0,12300.0,340000.0,21300.0,1020.0,11.4,9.53e-05,193.0,193.0,20.7,2489.08,13754.73,14458.0 +3,12600000.0,12800.0,328000.0,20500.0,1020.0,11.6,9.05e-05,186.0,186.0,195.0,2489.08,13754.73,14458.0 +4,16800000.0,15300.0,275000.0,17200.0,1020.0,13.6,8.58e-05,156.0,156.0,28.1,2489.05,3679.95,14458.0 +5,21000000.0,15400.0,272000.0,17000.0,1020.0,12.0,8.11e-05,154.0,154.0,48.8,2489.05,13754.73,14458.0 +6,25200000.0,15100.0,277000.0,17300.0,1020.0,10.9,7.63e-05,157.0,157.0,19.7,2489.05,13754.73,14458.0 +7,29400000.0,12800.0,328000.0,20500.0,1020.0,10.4,7.16e-05,186.0,186.0,8.64,2489.05,13754.73,14458.0 +8,33600000.0,12500.0,335000.0,20900.0,1020.0,9.66,6.68e-05,190.0,190.0,6.86,2489.05,13754.73,14458.0 +9,37700000.0,12900.0,324000.0,20300.0,1020.0,11.2,6.21e-05,184.0,184.0,52.6,2489.05,13754.73,14458.0 +10,41900000.0,12800.0,329000.0,20500.0,1020.0,9.07,5.74e-05,186.0,186.0,15.0,2489.05,13754.73,14458.0 +11,46100000.0,12500.0,336000.0,21000.0,1020.0,8.53,5.26e-05,191.0,191.0,6.64,2489.05,13754.73,14458.0 +12,50300000.0,12700.0,329000.0,20600.0,1020.0,8.33,4.79e-05,187.0,187.0,5.79,2489.05,13754.73,14458.0 +13,54500000.0,12400.0,337000.0,21100.0,1020.0,8.11,4.32e-05,191.0,191.0,5.57,2489.05,13754.73,14458.0 +14,58700000.0,12800.0,329000.0,20500.0,1020.0,7.85,3.84e-05,186.0,186.0,5.33,2489.05,13754.73,14458.0 +15,62900000.0,12500.0,336000.0,21000.0,1020.0,7.64,3.37e-05,191.0,191.0,4.85,2489.05,13754.73,14458.0 +16,67099999.99999999,12700.0,330000.0,20600.0,1020.0,7.53,2.89e-05,187.0,187.0,5.03,2489.05,13754.73,14458.0 +17,71300000.0,12700.0,330000.0,20600.0,1020.0,7.45,2.42e-05,187.0,187.0,5.73,2489.05,13754.73,14458.0 +18,75500000.0,12500.0,337000.0,21000.0,1020.0,7.32,1.95e-05,191.0,191.0,5.28,2489.05,13754.73,14458.0 +19,79700000.0,12500.0,334000.0,20900.0,1020.0,7.18,1.47e-05,190.0,190.0,3.03,2489.05,13754.73,14458.0 +20,83900000.0,12500.0,336000.0,21000.0,1020.0,7.14,1e-05,191.0,191.0,3.76,,, diff --git a/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..fbeeb02ac7a8af7e97e3e9436a67778f173ac071 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 974μs,1ms 69μs diff --git a/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-8/log_metrics.csv b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-8/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..bab800ccf27fd1d7ac621b887d8760102b95eae4 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-8/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,18700.0,225000.0,14000.0,1020.0,11.4,0.0001,127.0,127.0,20.6,2489.33,25017.84,25378.0 +2,8390000.0,11200.0,375000.0,23400.0,1020.0,11.4,9.53e-05,212.0,212.0,20.7,2489.33,25017.84,25378.0 +3,12600000.0,10700.0,392000.0,24500.0,1020.0,11.6,9.05e-05,223.0,223.0,195.0,2489.33,25017.84,25378.0 +4,16800000.0,10700.0,392000.0,24500.0,1020.0,13.6,8.58e-05,222.0,222.0,28.1,2489.32,3680.2,25378.0 +5,21000000.0,10900.0,385000.0,24100.0,1020.0,12.0,8.11e-05,218.0,218.0,48.9,2489.32,25017.84,25378.0 +6,25200000.0,10700.0,393000.0,24600.0,1020.0,10.9,7.63e-05,223.0,223.0,19.8,2489.32,25017.84,25378.0 +7,29400000.0,10500.0,398000.0,24900.0,1020.0,10.4,7.16e-05,226.0,226.0,8.65,2489.32,25017.84,25378.0 +8,33600000.0,10800.0,387000.0,24200.0,1020.0,9.67,6.68e-05,219.0,219.0,6.87,2489.32,25017.84,25378.0 +9,37700000.0,10900.0,386000.0,24100.0,1020.0,11.2,6.21e-05,219.0,219.0,52.8,2489.32,25017.84,25378.0 +10,41900000.0,10800.0,387000.0,24200.0,1020.0,9.09,5.74e-05,220.0,220.0,15.4,2489.32,25017.84,25378.0 +11,46100000.0,10800.0,390000.0,24400.0,1020.0,8.55,5.26e-05,221.0,221.0,6.91,2489.32,25017.84,25378.0 +12,50300000.0,10600.0,394000.0,24600.0,1020.0,8.35,4.79e-05,223.0,223.0,5.8,2489.32,25017.84,25378.0 +13,54500000.0,10900.0,386000.0,24100.0,1020.0,8.13,4.32e-05,219.0,219.0,5.59,2489.32,25017.84,25378.0 +14,58700000.0,10800.0,387000.0,24200.0,1020.0,7.87,3.84e-05,219.0,219.0,5.36,2489.32,25017.84,25378.0 +15,62900000.0,10900.0,385000.0,24000.0,1020.0,7.65,3.37e-05,218.0,218.0,4.88,2489.32,25017.84,25378.0 +16,67099999.99999999,10900.0,387000.0,24200.0,1020.0,7.54,2.89e-05,219.0,219.0,4.99,2489.32,25017.84,25378.0 +17,71300000.0,11200.0,375000.0,23400.0,1020.0,7.46,2.42e-05,213.0,213.0,5.5,2489.32,25017.84,25378.0 +18,75500000.0,10900.0,384000.0,24000.0,1020.0,7.34,1.95e-05,218.0,218.0,5.54,2489.32,25017.84,25378.0 +19,79700000.0,11000.0,383000.0,23900.0,1020.0,7.2,1.47e-05,217.0,217.0,3.27,2489.32,25017.84,25378.0 +20,83900000.0,10700.0,394000.0,24600.0,1020.0,7.14,1e-05,223.0,223.0,3.47,,, diff --git a/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-8/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..effdc1bf067901e7ef5302173b01692ceef9ca86 --- /dev/null +++ b/llama-1B/16_GPUS/dp-4_tp-4_pp-1_mbz-8/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 943μs,1ms 232μs diff --git a/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-1/log_metrics.csv b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-1/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..922cf9174a97290990ccc3719dc03aa59beff483 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-1/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,22500.0,186000.0,11600.0,1020.0,11.1,0.0001,106.0,106.0,24.6,5024.42,13199.38,15320.0 +2,8390000.0,15100.0,279000.0,17400.0,1020.0,11.1,9.53e-05,158.0,158.0,24.8,5024.42,13199.38,15320.0 +3,12600000.0,18200.0,231000.0,14400.0,1020.0,10.5,9.05e-05,131.0,131.0,197.0,5024.42,7821.83,15320.0 +4,16800000.0,21600.0,194000.0,12100.0,1020.0,13.9,8.58e-05,110.0,110.0,17.9,5024.42,13199.38,15320.0 +5,21000000.0,20600.0,203000.0,12700.0,1020.0,9.7,8.11e-05,115.0,115.0,20.2,,, +6,25200000.0,21400.0,196000.0,12300.0,1020.0,13.7,7.63e-05,111.0,111.0,98.4,5024.42,13199.38,15320.0 +7,29400000.0,363000.0,11600.0,722.0,1020.0,9.73,7.16e-05,6.55,6.55,12.8,5024.42,13199.38,15320.0 +8,33600000.0,19900.0,211000.0,13200.0,1020.0,8.94,6.68e-05,120.0,120.0,11.8,5024.42,13199.38,15320.0 +9,37700000.0,18400.0,228000.0,14200.0,1020.0,8.51,6.21e-05,129.0,129.0,5.67,5024.42,13199.38,15320.0 +10,41900000.0,18800.0,223000.0,13900.0,1020.0,8.14,5.74e-05,126.0,126.0,4.24,,, +11,46100000.0,18400.0,228000.0,14300.0,1020.0,8.01,5.26e-05,129.0,129.0,7.19,5024.42,13199.38,15320.0 +12,50300000.0,18400.0,228000.0,14200.0,1020.0,7.8,4.79e-05,129.0,129.0,6.24,5024.42,13199.38,15320.0 +13,54500000.0,17800.0,235000.0,14700.0,1020.0,7.6,4.32e-05,133.0,133.0,2.66,,, +14,58700000.0,16700.0,251000.0,15700.0,1020.0,7.53,3.84e-05,143.0,143.0,3.8,5024.42,13199.38,15320.0 +15,62900000.0,16500.0,254000.0,15900.0,1020.0,7.42,3.37e-05,144.0,144.0,3.41,5024.42,13199.38,15320.0 +16,67099999.99999999,17200.0,245000.0,15300.0,1020.0,7.31,2.89e-05,139.0,139.0,2.88,,, +17,71300000.0,17800.0,235000.0,14700.0,1020.0,7.21,2.42e-05,134.0,134.0,2.21,5024.42,13199.38,15320.0 +18,75500000.0,16800.0,249000.0,15600.0,1020.0,7.15,1.95e-05,141.0,141.0,2.55,5024.42,13199.38,15320.0 +19,79700000.0,17400.0,241000.0,15000.0,1020.0,7.09,1.47e-05,136.0,136.0,2.45,5024.42,13199.38,15320.0 +20,83900000.0,16000.0,262000.0,16400.0,1020.0,7.03,1e-05,148.0,148.0,2.0,,, diff --git a/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-1/profiler.csv b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-1/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..68f26ac4e55f840414cedab0a9884d8cb7fa0a03 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-1/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 894μs,1ms 277μs diff --git a/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..9c60c7ec59fb156acb0bc1a0c085d6eccaa4d8f7 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,22600.0,185000.0,11600.0,1020.0,11.1,0.0001,105.0,105.0,24.6,5024.46,7821.86,24558.0 +2,8390000.0,14100.0,298000.0,18600.0,1020.0,11.1,9.53e-05,169.0,169.0,24.8,5024.46,22254.32,24558.0 +3,12600000.0,13800.0,304000.0,19000.0,1020.0,10.5,9.05e-05,173.0,173.0,197.0,5024.46,22254.32,24558.0 +4,16800000.0,13400.0,313000.0,19500.0,1020.0,13.9,8.58e-05,177.0,177.0,17.9,5024.46,7821.86,24558.0 +5,21000000.0,13100.0,320000.0,20000.0,1020.0,9.71,8.11e-05,181.0,181.0,20.2,5024.46,22254.32,24558.0 +6,25200000.0,14100.0,297000.0,18600.0,1020.0,13.7,7.63e-05,168.0,168.0,98.3,5024.46,22254.32,24558.0 +7,29400000.0,195000.0,21600.0,1350.0,1020.0,9.73,7.16e-05,12.2,12.2,12.7,5024.46,22254.32,24558.0 +8,33600000.0,14600.0,287000.0,18000.0,1020.0,8.94,6.68e-05,163.0,163.0,11.9,,, +9,37700000.0,15200.0,276000.0,17200.0,1020.0,8.51,6.21e-05,156.0,156.0,5.7,5024.46,22254.32,24558.0 +10,41900000.0,14200.0,296000.0,18500.0,1020.0,8.14,5.74e-05,168.0,168.0,4.26,5024.46,22254.32,24558.0 +11,46100000.0,13600.0,309000.0,19300.0,1020.0,8.01,5.26e-05,175.0,175.0,7.02,,, +12,50300000.0,13100.0,321000.0,20100.0,1020.0,7.81,4.79e-05,182.0,182.0,6.26,5024.46,22254.32,24558.0 +13,54500000.0,13800.0,303000.0,18900.0,1020.0,7.6,4.32e-05,172.0,172.0,2.64,,, +14,58700000.0,14900.0,282000.0,17600.0,1020.0,7.53,3.84e-05,160.0,160.0,3.83,5024.46,22254.32,24558.0 +15,62900000.0,14700.0,286000.0,17900.0,1020.0,7.42,3.37e-05,162.0,162.0,3.42,5024.46,22254.32,24558.0 +16,67099999.99999999,12800.0,327000.0,20400.0,1020.0,7.31,2.89e-05,185.0,185.0,2.89,5024.46,22254.32,24558.0 +17,71300000.0,14400.0,291000.0,18200.0,1020.0,7.21,2.42e-05,165.0,165.0,2.21,5024.46,22254.32,24558.0 +18,75500000.0,13600.0,308000.0,19200.0,1020.0,7.15,1.95e-05,175.0,175.0,2.57,,, +19,79700000.0,13800.0,303000.0,18900.0,1020.0,7.09,1.47e-05,172.0,172.0,2.48,5024.46,22254.32,24558.0 +20,83900000.0,13800.0,304000.0,19000.0,1020.0,7.03,1e-05,173.0,173.0,2.03,,, diff --git a/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..4a07a7e7971993df2bac0dcecc97f85c4e4a7e59 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 967μs,1ms 199μs diff --git a/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-4/profiler.csv index 7c9ecf36d4a276b2a75aa7dee01be454185ee7b8..fd52f08b6ab09d114f8fa16b3f16a7da8006e708 100644 --- a/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-4/profiler.csv +++ b/llama-1B/16_GPUS/dp-8_tp-1_pp-2_mbz-4/profiler.csv @@ -1,2 +1,2 @@ forward,backward -0ms 879μs,1ms 268μs +0ms 947μs,1ms 198μs diff --git a/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-2/log_metrics.csv b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-2/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..c57128412d8ac0ad79fcdfedbd9af77756451051 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-2/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,26300.0,160000.0,9980.0,1020.0,11.5,0.0001,90.6,90.6,26.4,4076.11,13774.34,15566.0 +2,8390000.0,12400.0,337000.0,21100.0,1020.0,11.5,9.53e-05,191.0,191.0,26.6,4076.11,13774.34,15566.0 +3,12600000.0,12900.0,324000.0,20300.0,1020.0,11.5,9.05e-05,184.0,184.0,262.0,4076.11,13774.34,15566.0 +4,16800000.0,15600.0,269000.0,16800.0,1020.0,14.6,8.58e-05,153.0,153.0,29.1,4076.08,6325.11,15566.0 +5,21000000.0,16000.0,262000.0,16400.0,1020.0,10.8,8.11e-05,149.0,149.0,30.9,4076.08,13774.34,15566.0 +6,25200000.0,15800.0,265000.0,16600.0,1020.0,10.6,7.63e-05,150.0,150.0,27.6,4076.08,13774.34,15566.0 +7,29400000.0,12800.0,328000.0,20500.0,1020.0,10.2,7.16e-05,186.0,186.0,9.44,4076.08,13774.34,15566.0 +8,33600000.0,12700.0,329000.0,20600.0,1020.0,13.0,6.68e-05,187.0,187.0,78.3,4076.08,13774.34,15566.0 +9,37700000.0,13000.0,322000.0,20100.0,1020.0,9.45,6.21e-05,182.0,182.0,12.6,4076.08,13774.34,15566.0 +10,41900000.0,12600.0,332000.0,20800.0,1020.0,9.22,5.74e-05,188.0,188.0,6.83,4076.08,13774.34,15566.0 +11,46100000.0,12700.0,330000.0,20600.0,1020.0,8.94,5.26e-05,187.0,187.0,5.84,4076.08,13774.34,15566.0 +12,50300000.0,12800.0,327000.0,20400.0,1020.0,8.58,4.79e-05,185.0,185.0,6.3,4076.08,13774.34,15566.0 +13,54500000.0,12600.0,334000.0,20900.0,1020.0,8.14,4.32e-05,189.0,189.0,5.51,4076.08,13774.34,15566.0 +14,58700000.0,12500.0,335000.0,20900.0,1020.0,7.72,3.84e-05,190.0,190.0,4.85,4076.08,13774.34,15566.0 +15,62900000.0,12800.0,327000.0,20500.0,1020.0,7.48,3.37e-05,186.0,186.0,5.25,4076.08,13774.34,15566.0 +16,67099999.99999999,12800.0,326000.0,20400.0,1020.0,7.39,2.89e-05,185.0,185.0,6.86,4076.08,13774.34,15566.0 +17,71300000.0,12800.0,327000.0,20400.0,1020.0,7.35,2.42e-05,185.0,185.0,6.0,4076.08,13774.34,15566.0 +18,75500000.0,12700.0,330000.0,20600.0,1020.0,7.32,1.95e-05,187.0,187.0,6.9,4076.08,13774.34,15566.0 +19,79700000.0,12700.0,329000.0,20600.0,1020.0,7.21,1.47e-05,187.0,187.0,5.57,4076.08,13774.34,15566.0 +20,83900000.0,12700.0,329000.0,20600.0,1020.0,7.11,1e-05,187.0,187.0,4.31,,, diff --git a/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-2/profiler.csv b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-2/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..059d28844b5ce42f4ac8f15fd754629b8ba81774 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-2/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 959μs,1ms 78μs diff --git a/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-4/log_metrics.csv b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-4/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..1c7df1f65544cda6aaab6c34005f951972d14ce5 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-4/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,17600.0,239000.0,14900.0,1020.0,11.5,0.0001,135.0,135.0,26.4,4073.22,23464.76,23874.0 +2,8390000.0,10300.0,407000.0,25500.0,1020.0,11.5,9.53e-05,231.0,231.0,26.6,4073.22,23464.76,23874.0 +3,12600000.0,10200.0,411000.0,25700.0,1020.0,11.5,9.05e-05,233.0,233.0,262.0,4073.22,23464.76,23874.0 +4,16800000.0,11200.0,375000.0,23400.0,1020.0,14.6,8.58e-05,212.0,212.0,29.1,4073.21,6322.22,23874.0 +5,21000000.0,10800.0,390000.0,24400.0,1020.0,10.8,8.11e-05,221.0,221.0,30.9,4073.21,23464.76,23874.0 +6,25200000.0,10900.0,384000.0,24000.0,1020.0,10.6,7.63e-05,217.0,217.0,27.6,4073.21,23464.76,23874.0 +7,29400000.0,10100.0,416000.0,26000.0,1020.0,10.2,7.16e-05,236.0,236.0,9.44,4073.21,23464.76,23874.0 +8,33600000.0,10100.0,414000.0,25900.0,1020.0,13.0,6.68e-05,235.0,235.0,78.3,4073.21,23464.76,23874.0 +9,37700000.0,10300.0,406000.0,25400.0,1020.0,9.44,6.21e-05,230.0,230.0,12.5,4073.21,23464.76,23874.0 +10,41900000.0,10400.0,402000.0,25100.0,1020.0,9.22,5.74e-05,228.0,228.0,6.84,4073.21,23464.76,23874.0 +11,46100000.0,10200.0,411000.0,25700.0,1020.0,8.94,5.26e-05,233.0,233.0,5.84,4073.21,23464.76,23874.0 +12,50300000.0,10100.0,413000.0,25800.0,1020.0,8.58,4.79e-05,234.0,234.0,6.31,4073.21,23464.76,23874.0 +13,54500000.0,10300.0,405000.0,25300.0,1020.0,8.13,4.32e-05,230.0,230.0,5.49,4073.21,23464.76,23874.0 +14,58700000.0,10100.0,413000.0,25800.0,1020.0,7.72,3.84e-05,234.0,234.0,4.84,4073.21,23464.76,23874.0 +15,62900000.0,10400.0,403000.0,25200.0,1020.0,7.48,3.37e-05,228.0,228.0,5.3,4073.21,23464.76,23874.0 +16,67099999.99999999,10200.0,410000.0,25600.0,1020.0,7.39,2.89e-05,232.0,232.0,6.89,4073.21,23464.76,23874.0 +17,71300000.0,10300.0,407000.0,25400.0,1020.0,7.35,2.42e-05,231.0,231.0,6.0,4073.21,23464.76,23874.0 +18,75500000.0,10400.0,404000.0,25200.0,1020.0,7.32,1.95e-05,229.0,229.0,6.88,4073.21,23464.76,23874.0 +19,79700000.0,10400.0,402000.0,25200.0,1020.0,7.21,1.47e-05,228.0,228.0,5.57,4073.21,23464.76,23874.0 +20,83900000.0,10200.0,412000.0,25800.0,1020.0,7.11,1e-05,234.0,234.0,4.3,,, diff --git a/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-4/profiler.csv b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-4/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..5fc64c53898ecf6468b5863271e6aad068488df7 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-4/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 930μs,1ms 314μs diff --git a/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-8/log_metrics.csv b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-8/log_metrics.csv new file mode 100644 index 0000000000000000000000000000000000000000..d415e71d345d08da1505f885a06dfdd58af2758f --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-8/log_metrics.csv @@ -0,0 +1,21 @@ +iteration,consumed_tokens,elapsed_time_per_iteration_ms,tokens_per_sec,tokens_per_sec_per_gpu,global_batch_size,lm_loss,lr,model_tflops_per_gpu,hardware_tflops_per_gpu,grad_norm,memory_usage_MiB,peak_allocated_MiB,peak_reserved_MiB +1,4190000.0000000005,22200.0,189000.0,11800.0,1020.0,11.5,0.0001,107.0,107.0,26.4,4075.41,42861.19,44162.0 +2,8390000.0,10200.0,413000.0,25800.0,1020.0,11.5,9.53e-05,234.0,234.0,26.6,4075.41,42861.19,44162.0 +3,12600000.0,9820.0,427000.0,26700.0,1020.0,11.5,9.05e-05,242.0,242.0,262.0,4075.41,42861.19,44162.0 +4,16800000.0,9750.0,430000.0,26900.0,1020.0,14.6,8.58e-05,244.0,244.0,29.1,4075.4,6324.41,44162.0 +5,21000000.0,9970.0,421000.0,26300.0,1020.0,10.8,8.11e-05,238.0,238.0,31.0,4075.4,42861.19,44162.0 +6,25200000.0,9980.0,420000.0,26300.0,1020.0,10.6,7.63e-05,238.0,238.0,27.4,4075.4,42861.19,44162.0 +7,29400000.0,9830.0,427000.0,26700.0,1020.0,10.2,7.16e-05,242.0,242.0,9.44,4075.4,42861.19,44162.0 +8,33600000.0,9720.0,432000.0,27000.0,1020.0,13.0,6.68e-05,245.0,245.0,78.4,4075.4,42861.19,44162.0 +9,37700000.0,9790.0,429000.0,26800.0,1020.0,9.45,6.21e-05,243.0,243.0,12.9,4075.4,42861.19,44162.0 +10,41900000.0,9760.0,430000.0,26800.0,1020.0,9.22,5.74e-05,244.0,244.0,6.81,4075.4,42861.19,44162.0 +11,46100000.0,9980.0,420000.0,26300.0,1020.0,8.95,5.26e-05,238.0,238.0,5.83,4075.4,42861.19,44162.0 +12,50300000.0,10200.0,413000.0,25800.0,1020.0,8.59,4.79e-05,234.0,234.0,6.29,4075.4,42861.19,44162.0 +13,54500000.0,9790.0,428000.0,26800.0,1020.0,8.14,4.32e-05,243.0,243.0,5.48,4075.4,42861.19,44162.0 +14,58700000.0,9970.0,421000.0,26300.0,1020.0,7.72,3.84e-05,239.0,239.0,4.86,4075.4,42861.19,44162.0 +15,62900000.0,9790.0,428000.0,26800.0,1020.0,7.49,3.37e-05,243.0,243.0,5.16,4075.4,42861.19,44162.0 +16,67099999.99999999,10000.0,418000.0,26100.0,1020.0,7.39,2.89e-05,237.0,237.0,6.94,4075.4,42861.19,44162.0 +17,71300000.0,9770.0,429000.0,26800.0,1020.0,7.35,2.42e-05,243.0,243.0,5.96,4075.4,42861.19,44162.0 +18,75500000.0,9850.0,426000.0,26600.0,1020.0,7.32,1.95e-05,242.0,242.0,6.89,4075.4,42861.19,44162.0 +19,79700000.0,9790.0,428000.0,26800.0,1020.0,7.21,1.47e-05,243.0,243.0,5.6,4075.4,42861.19,44162.0 +20,83900000.0,10000.0,418000.0,26100.0,1020.0,7.11,1e-05,237.0,237.0,4.32,,, diff --git a/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-8/profiler.csv b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-8/profiler.csv new file mode 100644 index 0000000000000000000000000000000000000000..fec45c6fbbf2b839b88e543df90749dbb46662b9 --- /dev/null +++ b/llama-1B/16_GPUS/dp-8_tp-2_pp-1_mbz-8/profiler.csv @@ -0,0 +1,2 @@ +forward,backward +0ms 950μs,1ms 121μs