| { |
| "benchmark": "multi_gpu", |
| "timestamp": "2026-03-19T12:21:06.000529+00:00", |
| "n_gpus_available": 4, |
| "gpu_names": [ |
| "NVIDIA L4", |
| "NVIDIA L4", |
| "NVIDIA L4", |
| "NVIDIA L4" |
| ], |
| "inference": { |
| "gpu_1": { |
| "n_gpus": 1, |
| "batch_results": { |
| "batch_1": { |
| "p50_ms": 128.19, |
| "p95_ms": 130.86, |
| "mean_ms": 128.42, |
| "fps": 7.8, |
| "per_sample_ms": 128.42 |
| }, |
| "batch_4": { |
| "p50_ms": 430.18, |
| "p95_ms": 440.71, |
| "mean_ms": 429.76, |
| "fps": 9.3, |
| "per_sample_ms": 107.44 |
| }, |
| "batch_8": { |
| "p50_ms": 853.99, |
| "p95_ms": 873.4, |
| "mean_ms": 857.72, |
| "fps": 9.3, |
| "per_sample_ms": 107.21 |
| }, |
| "batch_16": { |
| "p50_ms": 1734.01, |
| "p95_ms": 1759.55, |
| "mean_ms": 1727.97, |
| "fps": 9.3, |
| "per_sample_ms": 108.0 |
| } |
| }, |
| "memory": { |
| "gpu_0_allocated_gb": 3.65, |
| "gpu_0_reserved_gb": 5.07 |
| } |
| }, |
| "gpu_2": { |
| "n_gpus": 2, |
| "batch_results": { |
| "batch_1": { |
| "p50_ms": 162.23, |
| "p95_ms": 168.32, |
| "mean_ms": 164.65, |
| "fps": 6.1, |
| "per_sample_ms": 164.65 |
| }, |
| "batch_4": { |
| "p50_ms": 611.69, |
| "p95_ms": 613.66, |
| "mean_ms": 611.27, |
| "fps": 6.5, |
| "per_sample_ms": 152.82 |
| }, |
| "batch_8": { |
| "p50_ms": 799.07, |
| "p95_ms": 802.8, |
| "mean_ms": 799.2, |
| "fps": 10.0, |
| "per_sample_ms": 99.9 |
| }, |
| "batch_16": { |
| "p50_ms": 1185.3, |
| "p95_ms": 1190.2, |
| "mean_ms": 1184.72, |
| "fps": 13.5, |
| "per_sample_ms": 74.04 |
| } |
| }, |
| "memory": { |
| "gpu_0_allocated_gb": 3.66, |
| "gpu_0_reserved_gb": 4.47, |
| "gpu_1_allocated_gb": 0.01, |
| "gpu_1_reserved_gb": 4.47 |
| } |
| }, |
| "gpu_4": { |
| "n_gpus": 4, |
| "batch_results": { |
| "batch_1": { |
| "p50_ms": 164.7, |
| "p95_ms": 171.49, |
| "mean_ms": 167.37, |
| "fps": 6.0, |
| "per_sample_ms": 167.37 |
| }, |
| "batch_4": { |
| "p50_ms": 913.4, |
| "p95_ms": 915.7, |
| "mean_ms": 912.74, |
| "fps": 4.4, |
| "per_sample_ms": 228.19 |
| }, |
| "batch_8": { |
| "p50_ms": 1003.53, |
| "p95_ms": 1007.34, |
| "mean_ms": 1002.41, |
| "fps": 8.0, |
| "per_sample_ms": 125.3 |
| }, |
| "batch_16": { |
| "p50_ms": 1178.17, |
| "p95_ms": 1182.78, |
| "mean_ms": 1178.2, |
| "fps": 13.6, |
| "per_sample_ms": 73.64 |
| } |
| }, |
| "memory": { |
| "gpu_0_allocated_gb": 3.66, |
| "gpu_0_reserved_gb": 4.49, |
| "gpu_1_allocated_gb": 0.01, |
| "gpu_1_reserved_gb": 4.16, |
| "gpu_2_allocated_gb": 0.01, |
| "gpu_2_reserved_gb": 4.16, |
| "gpu_3_allocated_gb": 0.01, |
| "gpu_3_reserved_gb": 4.16 |
| } |
| } |
| }, |
| "training": { |
| "gpu_1": { |
| "n_gpus": 1, |
| "batch_size": 2, |
| "n_steps": 30, |
| "step_time_mean_ms": 432.4, |
| "steps_per_sec": 2.31, |
| "samples_per_sec": 4.63, |
| "loss_start": 4.0196, |
| "loss_end": 1.7553, |
| "loss_reduction_pct": 56.3, |
| "memory": { |
| "gpu_0_peak_gb": 9.0 |
| } |
| }, |
| "gpu_2": { |
| "n_gpus": 2, |
| "batch_size": 4, |
| "n_steps": 30, |
| "step_time_mean_ms": 1264.8, |
| "steps_per_sec": 0.79, |
| "samples_per_sec": 3.16, |
| "loss_start": 1.3165, |
| "loss_end": 1.4857, |
| "loss_reduction_pct": -12.9, |
| "memory": { |
| "gpu_0_peak_gb": 14.59, |
| "gpu_1_peak_gb": 4.07 |
| } |
| }, |
| "gpu_4": { |
| "n_gpus": 4, |
| "batch_size": 8, |
| "n_steps": 30, |
| "step_time_mean_ms": 2005.1, |
| "steps_per_sec": 0.5, |
| "samples_per_sec": 3.99, |
| "loss_start": 6.7918, |
| "loss_end": 1.182, |
| "loss_reduction_pct": 82.6, |
| "memory": { |
| "gpu_0_peak_gb": 14.6, |
| "gpu_1_peak_gb": 4.07, |
| "gpu_2_peak_gb": 4.07, |
| "gpu_3_peak_gb": 4.07 |
| } |
| } |
| }, |
| "fp16": { |
| "fp16_gpu_1": { |
| "n_gpus": 1, |
| "precision": "fp16", |
| "batch_results": { |
| "batch_4": { |
| "p50_ms": 122.14, |
| "fps": 32.7, |
| "per_sample_ms": 30.56 |
| }, |
| "batch_8": { |
| "p50_ms": 234.11, |
| "fps": 34.2, |
| "per_sample_ms": 29.26 |
| }, |
| "batch_16": { |
| "p50_ms": 486.22, |
| "fps": 32.9, |
| "per_sample_ms": 30.43 |
| }, |
| "batch_32": { |
| "p50_ms": 950.24, |
| "fps": 33.6, |
| "per_sample_ms": 29.73 |
| } |
| } |
| }, |
| "fp16_gpu_4": { |
| "n_gpus": 4, |
| "precision": "fp16", |
| "batch_results": { |
| "batch_4": { |
| "p50_ms": 901.49, |
| "fps": 4.4, |
| "per_sample_ms": 225.52 |
| }, |
| "batch_8": { |
| "p50_ms": 903.67, |
| "fps": 8.8, |
| "per_sample_ms": 113.02 |
| }, |
| "batch_16": { |
| "p50_ms": 911.97, |
| "fps": 17.5, |
| "per_sample_ms": 57.07 |
| }, |
| "batch_32": { |
| "p50_ms": 1013.11, |
| "fps": 31.6, |
| "per_sample_ms": 31.67 |
| } |
| } |
| } |
| } |
| } |