{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_memory_inference_first": 690.0, "base_memory_inference": 570.0, "base_token_generation_latency_sync": 25.232858657836914, "base_token_generation_latency_async": 25.168074667453766, "base_token_generation_throughput_sync": 0.03963086440423651, "base_token_generation_throughput_async": 0.03973287640048031, "base_token_generation_CO2_emissions": 6.916667409165086e-06, "base_token_generation_energy_consumption": 0.001975681904854854, "base_inference_latency_sync": 25.73680648803711, "base_inference_latency_async": 25.754165649414062, "base_inference_throughput_sync": 0.03885485949722692, "base_inference_throughput_async": 0.03882867003391939, "base_inference_CO2_emissions": 8.20508156037289e-06, "base_inference_energy_consumption": 1.885578995579329e-05, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_memory_inference_first": 104.0, "smashed_memory_inference": 106.0, "smashed_token_generation_latency_sync": 53.81842727661133, "smashed_token_generation_latency_async": 53.83266881108284, "smashed_token_generation_throughput_sync": 0.01858099633533113, "smashed_token_generation_throughput_async": 0.018576080697565642, "smashed_token_generation_CO2_emissions": 1.4030177588319765e-05, "smashed_token_generation_energy_consumption": 0.004235501136593081, "smashed_inference_latency_sync": 53.602509307861325, "smashed_inference_latency_async": 53.591203689575195, "smashed_inference_throughput_sync": 0.018655843036313607, "smashed_inference_throughput_async": 0.018659778679211203, "smashed_inference_CO2_emissions": 1.382929576343033e-05, "smashed_inference_energy_consumption": 3.627959529056468e-05 }