{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_token_generation_latency_sync": 58.560171890258786, "base_token_generation_latency_async": 58.45238883048296, "base_token_generation_throughput_sync": 0.017076452607994227, "base_token_generation_throughput_async": 0.017107940667747342, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 55.90609893798828, "base_inference_latency_async": 55.28888702392578, "base_inference_throughput_sync": 0.017887136090629613, "base_inference_throughput_async": 0.01808681733034812, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_token_generation_latency_sync": 175.9563995361328, "smashed_token_generation_latency_async": 174.2004169151187, "smashed_token_generation_throughput_sync": 0.005683226086895743, "smashed_token_generation_throughput_async": 0.005740514389740309, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 178.78282165527344, "smashed_inference_latency_async": 156.2723159790039, "smashed_inference_throughput_sync": 0.00559337855136992, "smashed_inference_throughput_async": 0.006399086068029834, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }