{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_perplexity": 8.891793251037598, "base_token_generation_latency_sync": 44.318396377563474, "base_token_generation_latency_async": 41.793143562972546, "base_token_generation_throughput_sync": 0.02256399332414152, "base_token_generation_throughput_async": 0.02392736977282488, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 119.94429473876953, "base_inference_latency_async": 38.98358345031738, "base_inference_throughput_sync": 0.008337203550847763, "base_inference_throughput_async": 0.02565182344702738, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_perplexity": 97608.234375, "smashed_token_generation_latency_sync": 167.89824371337892, "smashed_token_generation_latency_async": 173.81366025656462, "smashed_token_generation_throughput_sync": 0.0059559884480216005, "smashed_token_generation_throughput_async": 0.005753287736555976, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 264.10536804199216, "smashed_inference_latency_async": 211.17231845855713, "smashed_inference_throughput_sync": 0.003786367567663381, "smashed_inference_throughput_async": 0.004735469152867455, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }