{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_perplexity": 8.633415222167969, "base_token_generation_latency_sync": 36.40819664001465, "base_token_generation_latency_async": 36.063793674111366, "base_token_generation_throughput_sync": 0.027466342535102217, "base_token_generation_throughput_async": 0.027728641335863027, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 122.35038833618164, "base_inference_latency_async": 44.53322887420654, "base_inference_throughput_sync": 0.00817324745428927, "base_inference_throughput_async": 0.022455142492018937, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_perplexity": 9.665506362915039, "smashed_token_generation_latency_sync": 55.69197120666504, "smashed_token_generation_latency_async": 55.41938152164221, "smashed_token_generation_throughput_sync": 0.01795590959223083, "smashed_token_generation_throughput_async": 0.01804422879763613, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 197.44808807373047, "smashed_inference_latency_async": 111.44344806671143, "smashed_inference_throughput_sync": 0.005064622350896521, "smashed_inference_throughput_async": 0.00897316098297127, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }