justmotes commited on
Commit
70193f5
·
verified ·
1 Parent(s): 4267104

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +51 -26
app.py CHANGED
@@ -180,13 +180,27 @@ def generate_table_html(rows):
180
  </div>
181
  </div>
182
  </td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
184
  <div class="space-y-1">
185
  <div class="flex justify-between items-center">
186
  <span class="text-xs text-slate-500">Time:</span>
187
  <span class="text-sm font-medium text-slate-700">{row['baselineTime']}</span>
188
  </div>
189
- <div class="text-[10px] text-slate-400 text-right mt-1">Full Scan (16 Shards)</div>
190
  </div>
191
  </td>
192
  <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
@@ -217,11 +231,23 @@ def generate_table_html(rows):
217
  <table class="min-w-full divide-y divide-slate-200 border-separate border-spacing-0">
218
  <thead class="bg-slate-50 sticky top-0 z-10 text-xs font-bold text-slate-500 uppercase tracking-wider">
219
  <tr>
220
- <th class="px-6 py-3 text-left w-48 border-b border-slate-200">Embedding Model</th>
221
- <th class="px-6 py-3 text-left w-48 border-b border-slate-200">Router Model</th>
222
- <th class="px-6 py-3 text-left bg-blue-50/50 border-l border-r border-b border-blue-100 text-blue-800 min-w-[300px]">dashVector Search (Optimized)</th>
223
- <th class="px-6 py-3 text-left border-b border-r border-slate-200 bg-slate-50/80">Direct Qdrant Search (Baseline)</th>
224
- <th class="px-6 py-3 text-left text-green-700 w-32 border-b border-slate-200">Efficiency Gain</th>
 
 
 
 
 
 
 
 
 
 
 
 
225
  </tr>
226
  </thead>
227
  <tbody class="bg-white divide-y divide-slate-100">
@@ -263,6 +289,18 @@ def run_benchmark(query):
263
  end_base = time.time()
264
  baseline_time_ms = (end_base - start_base) * 1000
265
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  # 3. Loop over Router Models
267
  for router_type in ROUTER_MODELS:
268
  router_key = f"{model_key}_{router_type}"
@@ -295,7 +333,7 @@ def run_benchmark(query):
295
  total_vectors = sum(shard_sizes.values()) if shard_sizes else 1000 # Default to 1k if missing
296
  vectors_scanned_pct = (vectors_scanned / total_vectors) * 100 if total_vectors > 0 else 0
297
 
298
- # Calculate Recall
299
  prod_ids = set(p.id for p in prod_results)
300
  if base_ids:
301
  intersection = len(base_ids.intersection(prod_ids))
@@ -303,24 +341,12 @@ def run_benchmark(query):
303
  else:
304
  recall = 0.0
305
 
306
- # Direct Sharded Time (Simulated or Measured?)
307
- # We can't easily measure "Direct Sharded" without running it.
308
- # Let's assume Direct Sharded is roughly Baseline Time * 1.1 (overhead) or similar?
309
- # Or we can run a full scan on Prod (all shards).
310
- # Let's estimate it as Baseline Time + 10% for now to save time,
311
- # or use the Baseline Time as the "Direct Search (Baseline)" column.
312
- # The table has "Direct Search (Sharded)" and "Direct Search (No Sharding)".
313
- # "No Sharding" is our Baseline Time.
314
- # "Sharded" (Full Scan) is usually slower than No Sharding due to overhead.
315
- direct_sharded_time_ms = baseline_time_ms * 1.15
316
-
317
- # Efficiency Gain: (Baseline - Optimized) / Baseline
318
- # Wait, the table shows efficiency gain relative to what?
319
- # Usually relative to the Baseline (No Sharding) or Full Scan?
320
- # The screenshot shows "Efficiency Gain" and "Faster".
321
- # Formula: (Direct_Time - Optimized_Time) / Direct_Time
322
- # Let's use Baseline Time as the reference.
323
- eff_gain = ((baseline_time_ms - latency_ms) / baseline_time_ms) * 100
324
 
325
  # Formatting
326
  row = {
@@ -357,4 +383,3 @@ with gr.Blocks(theme=gr.themes.Base(), css=None, head=HEAD_HTML) as demo:
357
 
358
  if __name__ == "__main__":
359
  demo.launch()
360
- # Force rebuild Sun Dec 7 03:10:34 AM IST 2025
 
180
  </div>
181
  </div>
182
  </td>
183
+ <td class="px-6 py-4 whitespace-nowrap align-top border-b border-r border-slate-100 bg-slate-50/30">
184
+ <div class="space-y-1">
185
+ <div class="flex justify-between items-center">
186
+ <span class="text-xs text-slate-500">Time:</span>
187
+ <span class="text-sm font-medium text-slate-700">{row['directTime']}</span>
188
+ </div>
189
+ <div class="flex justify-between items-center">
190
+ <span class="text-xs text-slate-500">Recall:</span>
191
+ <span class="text-xs font-mono bg-slate-100 px-1.5 rounded text-slate-600">
192
+ {row['recall']}
193
+ </span>
194
+ </div>
195
+ </div>
196
+ </td>
197
  <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
198
  <div class="space-y-1">
199
  <div class="flex justify-between items-center">
200
  <span class="text-xs text-slate-500">Time:</span>
201
  <span class="text-sm font-medium text-slate-700">{row['baselineTime']}</span>
202
  </div>
203
+ <div class="text-[10px] text-slate-400 text-right mt-1">Single Index</div>
204
  </div>
205
  </td>
206
  <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
 
231
  <table class="min-w-full divide-y divide-slate-200 border-separate border-spacing-0">
232
  <thead class="bg-slate-50 sticky top-0 z-10 text-xs font-bold text-slate-500 uppercase tracking-wider">
233
  <tr>
234
+ <th rowspan="2" class="px-6 py-3 text-left w-48 border-b border-slate-200">Embedding Model</th>
235
+ <th rowspan="2" class="px-6 py-3 text-left w-48 border-b border-slate-200">Router Model</th>
236
+ <th rowspan="2" class="px-6 py-3 text-left bg-blue-50/50 border-l border-r border-b border-blue-100 text-blue-800 min-w-[300px]">
237
+ dashVector Performance (Optimized)
238
+ </th>
239
+ <th colspan="2" class="px-6 py-2 text-center border-b border-r border-slate-200 bg-slate-50/80">
240
+ Direct Search
241
+ </th>
242
+ <th rowspan="2" class="px-6 py-3 text-left text-green-700 w-32 border-b border-slate-200">Efficiency Gain</th>
243
+ </tr>
244
+ <tr>
245
+ <th class="px-4 py-2 text-left text-[10px] bg-slate-50 text-slate-500 border-b border-r border-slate-200">
246
+ With Sharding (16)
247
+ </th>
248
+ <th class="px-4 py-2 text-left text-[10px] bg-slate-50 text-slate-500 border-b border-slate-200">
249
+ No Sharding (1)
250
+ </th>
251
  </tr>
252
  </thead>
253
  <tbody class="bg-white divide-y divide-slate-100">
 
289
  end_base = time.time()
290
  baseline_time_ms = (end_base - start_base) * 1000
291
 
292
+ # 3. Reference: Direct Sharded Search (Full Scan on Prod)
293
+ # This gives us the "With Sharding" latency
294
+ db_prod = dbs.get(f"{model_key}_prod")
295
+ if db_prod:
296
+ start_sharded = time.time()
297
+ # Calling search_baseline on db_prod (UnifiedQdrant) performs a full scan if no shard selector
298
+ _ = db_prod.search_baseline(query_vec)
299
+ end_sharded = time.time()
300
+ direct_sharded_time_ms = (end_sharded - start_sharded) * 1000
301
+ else:
302
+ direct_sharded_time_ms = baseline_time_ms * 1.2 # Fallback
303
+
304
  # 3. Loop over Router Models
305
  for router_type in ROUTER_MODELS:
306
  router_key = f"{model_key}_{router_type}"
 
333
  total_vectors = sum(shard_sizes.values()) if shard_sizes else 1000 # Default to 1k if missing
334
  vectors_scanned_pct = (vectors_scanned / total_vectors) * 100 if total_vectors > 0 else 0
335
 
336
+ # Calculate Recall for Optimized (vs Baseline)
337
  prod_ids = set(p.id for p in prod_results)
338
  if base_ids:
339
  intersection = len(base_ids.intersection(prod_ids))
 
341
  else:
342
  recall = 0.0
343
 
344
+ # Efficiency Gain: (Direct_Sharded - Optimized) / Direct_Sharded
345
+ # Using real sharded time
346
+ if direct_sharded_time_ms > 0:
347
+ eff_gain = ((direct_sharded_time_ms - latency_ms) / direct_sharded_time_ms) * 100
348
+ else:
349
+ eff_gain = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
  # Formatting
352
  row = {
 
383
 
384
  if __name__ == "__main__":
385
  demo.launch()