kfoughali commited on
Commit
09b3e47
·
verified ·
1 Parent(s): 6a49dc8

Update benchmark.py

Browse files
Files changed (1) hide show
  1. benchmark.py +20 -0
benchmark.py CHANGED
@@ -141,12 +141,16 @@ class BenchmarkMetrics:
141
  self.prefill_time_std = float(np.std(self.prefill_times))
142
  self.prefill_time_ci = self._bootstrap_ci(self.prefill_times, config)
143
  self.prefill_tokens_per_sec = config.prefill_length / self.prefill_time_mean if self.prefill_time_mean > 0 else 0.0
 
 
144
 
145
  if self.prefill_peak_memories:
146
  memories_mb = [m / (1024 * 1024) for m in self.prefill_peak_memories]
147
  self.prefill_peak_memory_mean_mb = float(np.mean(memories_mb))
148
  self.prefill_peak_memory_std_mb = float(np.std(memories_mb))
149
  self.prefill_peak_memory_ci_mb = self._bootstrap_ci(memories_mb, config)
 
 
150
 
151
  if self.decode_times:
152
  self.decode_time_per_token_mean_ms = float(np.mean(self.decode_times) * 1000)
@@ -155,6 +159,8 @@ class BenchmarkMetrics:
155
  self.decode_tokens_per_sec = 1.0 / np.mean(self.decode_times) if self.decode_times else 0.0
156
  self.decode_time_p50_ms = float(np.percentile(self.decode_times, 50) * 1000)
157
  self.decode_time_p95_ms = float(np.percentile(self.decode_times, 95) * 1000)
 
 
158
 
159
  # Calculate end-to-end throughput
160
  if self.prefill_time_mean > 0 and self.decode_time_per_token_mean_ms > 0:
@@ -165,23 +171,37 @@ class BenchmarkMetrics:
165
 
166
  if self.decode_peak_memories:
167
  self.decode_peak_memory_mean_mb = float(np.mean(self.decode_peak_memories) / (1024 * 1024))
 
 
168
 
169
  if self.prefill_perplexities:
170
  self.prefill_perplexity_mean = float(np.mean(self.prefill_perplexities))
171
  self.prefill_perplexity_std = float(np.std(self.prefill_perplexities))
172
  self.prefill_perplexity_ci = self._bootstrap_ci(self.prefill_perplexities, config)
 
 
 
 
173
 
174
  if self.generation_perplexities:
175
  self.generation_perplexity_mean = float(np.mean(self.generation_perplexities))
176
  self.generation_perplexity_std = float(np.std(self.generation_perplexities))
177
  self.generation_perplexity_ci = self._bootstrap_ci(self.generation_perplexities, config)
 
 
 
 
178
 
179
  if self.compression_ratios:
180
  self.compression_ratio_mean = float(np.mean(self.compression_ratios))
181
  self.compression_ratio_std = float(np.std(self.compression_ratios))
 
 
182
 
183
  if self.kv_cache_memory_samples_mb:
184
  self.kv_cache_memory_mb = float(np.mean(self.kv_cache_memory_samples_mb))
 
 
185
 
186
  except Exception as e:
187
  logger.error(f"Error calculating statistics: {e}")
 
141
  self.prefill_time_std = float(np.std(self.prefill_times))
142
  self.prefill_time_ci = self._bootstrap_ci(self.prefill_times, config)
143
  self.prefill_tokens_per_sec = config.prefill_length / self.prefill_time_mean if self.prefill_time_mean > 0 else 0.0
144
+ else:
145
+ logger.debug("No prefill time data available")
146
 
147
  if self.prefill_peak_memories:
148
  memories_mb = [m / (1024 * 1024) for m in self.prefill_peak_memories]
149
  self.prefill_peak_memory_mean_mb = float(np.mean(memories_mb))
150
  self.prefill_peak_memory_std_mb = float(np.std(memories_mb))
151
  self.prefill_peak_memory_ci_mb = self._bootstrap_ci(memories_mb, config)
152
+ else:
153
+ logger.debug("No prefill memory data available")
154
 
155
  if self.decode_times:
156
  self.decode_time_per_token_mean_ms = float(np.mean(self.decode_times) * 1000)
 
159
  self.decode_tokens_per_sec = 1.0 / np.mean(self.decode_times) if self.decode_times else 0.0
160
  self.decode_time_p50_ms = float(np.percentile(self.decode_times, 50) * 1000)
161
  self.decode_time_p95_ms = float(np.percentile(self.decode_times, 95) * 1000)
162
+ else:
163
+ logger.debug("No decode time data available")
164
 
165
  # Calculate end-to-end throughput
166
  if self.prefill_time_mean > 0 and self.decode_time_per_token_mean_ms > 0:
 
171
 
172
  if self.decode_peak_memories:
173
  self.decode_peak_memory_mean_mb = float(np.mean(self.decode_peak_memories) / (1024 * 1024))
174
+ else:
175
+ logger.debug("No decode memory data available")
176
 
177
  if self.prefill_perplexities:
178
  self.prefill_perplexity_mean = float(np.mean(self.prefill_perplexities))
179
  self.prefill_perplexity_std = float(np.std(self.prefill_perplexities))
180
  self.prefill_perplexity_ci = self._bootstrap_ci(self.prefill_perplexities, config)
181
+ logger.info(f"Calculated prefill perplexity: mean={self.prefill_perplexity_mean:.2f}, "
182
+ f"std={self.prefill_perplexity_std:.2f}, samples={len(self.prefill_perplexities)}")
183
+ else:
184
+ logger.warning("No prefill perplexity data available")
185
 
186
  if self.generation_perplexities:
187
  self.generation_perplexity_mean = float(np.mean(self.generation_perplexities))
188
  self.generation_perplexity_std = float(np.std(self.generation_perplexities))
189
  self.generation_perplexity_ci = self._bootstrap_ci(self.generation_perplexities, config)
190
+ logger.info(f"Calculated generation perplexity: mean={self.generation_perplexity_mean:.2f}, "
191
+ f"std={self.generation_perplexity_std:.2f}, samples={len(self.generation_perplexities)}")
192
+ else:
193
+ logger.warning("No generation perplexity data available")
194
 
195
  if self.compression_ratios:
196
  self.compression_ratio_mean = float(np.mean(self.compression_ratios))
197
  self.compression_ratio_std = float(np.std(self.compression_ratios))
198
+ else:
199
+ logger.debug("No compression ratio data available")
200
 
201
  if self.kv_cache_memory_samples_mb:
202
  self.kv_cache_memory_mb = float(np.mean(self.kv_cache_memory_samples_mb))
203
+ else:
204
+ logger.debug("No KV cache memory data available")
205
 
206
  except Exception as e:
207
  logger.error(f"Error calculating statistics: {e}")