{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_token_generation_latency_sync": 53.123374938964844, "base_token_generation_latency_async": 53.1091520562768, "base_token_generation_throughput_sync": 0.01882410522955163, "base_token_generation_throughput_async": 0.018829146414168992, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 52.437298965454104, "base_inference_latency_async": 51.26469135284424, "base_inference_throughput_sync": 0.019070394923636397, "base_inference_throughput_async": 0.019506603348437377, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_token_generation_latency_sync": 167.08106079101563, "smashed_token_generation_latency_async": 167.67955236136913, "smashed_token_generation_throughput_sync": 0.005985118811585691, "smashed_token_generation_throughput_async": 0.005963756378862955, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 174.62354125976563, "smashed_inference_latency_async": 141.94059371948242, "smashed_inference_throughput_sync": 0.005726604745189681, "smashed_inference_throughput_async": 0.007045200909729198, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }