{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_token_generation_latency_sync": 58.130772018432616, "base_token_generation_latency_async": 57.485764659941196, "base_token_generation_throughput_sync": 0.017202592796856563, "base_token_generation_throughput_async": 0.017395610998923484, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 55.49096908569336, "base_inference_latency_async": 55.28078079223633, "base_inference_throughput_sync": 0.01802095037222587, "base_inference_throughput_async": 0.01808946953477981, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_token_generation_latency_sync": 163.0220962524414, "smashed_token_generation_latency_async": 163.83853014558554, "smashed_token_generation_throughput_sync": 0.006134137782472688, "smashed_token_generation_throughput_async": 0.0061035703818351425, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 170.61754913330077, "smashed_inference_latency_async": 128.1928300857544, "smashed_inference_throughput_sync": 0.005861061802140388, "smashed_inference_throughput_async": 0.007800748289362607, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }