Spaces:

dashVector
/

dashVectorSpace

Sleeping

App Files Files Community

justmotes commited on 7 days ago

Commit

70193f5

verified ·

1 Parent(s): 4267104

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +51 -26

app.py CHANGED Viewed

@@ -180,13 +180,27 @@ def generate_table_html(rows):
                     </div>
                 </div>
             </td>
             <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
                  <div class="space-y-1">
                      <div class="flex justify-between items-center">
                         <span class="text-xs text-slate-500">Time:</span>
                         <span class="text-sm font-medium text-slate-700">{row['baselineTime']}</span>
                     </div>
-                    <div class="text-[10px] text-slate-400 text-right mt-1">Full Scan (16 Shards)</div>
                 </div>
             </td>
             <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
@@ -217,11 +231,23 @@ def generate_table_html(rows):
             <table class="min-w-full divide-y divide-slate-200 border-separate border-spacing-0">
                 <thead class="bg-slate-50 sticky top-0 z-10 text-xs font-bold text-slate-500 uppercase tracking-wider">
                     <tr>
-                        <th class="px-6 py-3 text-left w-48 border-b border-slate-200">Embedding Model</th>
-                        <th class="px-6 py-3 text-left w-48 border-b border-slate-200">Router Model</th>
-                        <th class="px-6 py-3 text-left bg-blue-50/50 border-l border-r border-b border-blue-100 text-blue-800 min-w-[300px]">dashVector Search (Optimized)</th>
-                        <th class="px-6 py-3 text-left border-b border-r border-slate-200 bg-slate-50/80">Direct Qdrant Search (Baseline)</th>
-                        <th class="px-6 py-3 text-left text-green-700 w-32 border-b border-slate-200">Efficiency Gain</th>
                     </tr>
                 </thead>
                 <tbody class="bg-white divide-y divide-slate-100">
@@ -263,6 +289,18 @@ def run_benchmark(query):
         end_base = time.time()
         baseline_time_ms = (end_base - start_base) * 1000
         # 3. Loop over Router Models
         for router_type in ROUTER_MODELS:
             router_key = f"{model_key}_{router_type}"
@@ -295,7 +333,7 @@ def run_benchmark(query):
                 total_vectors = sum(shard_sizes.values()) if shard_sizes else 1000 # Default to 1k if missing
                 vectors_scanned_pct = (vectors_scanned / total_vectors) * 100 if total_vectors > 0 else 0
-            # Calculate Recall
             prod_ids = set(p.id for p in prod_results)
             if base_ids:
                 intersection = len(base_ids.intersection(prod_ids))
@@ -303,24 +341,12 @@ def run_benchmark(query):
             else:
                 recall = 0.0
-            # Direct Sharded Time (Simulated or Measured?)
-            # We can't easily measure "Direct Sharded" without running it.
-            # Let's assume Direct Sharded is roughly Baseline Time * 1.1 (overhead) or similar?
-            # Or we can run a full scan on Prod (all shards).
-            # Let's estimate it as Baseline Time + 10% for now to save time,
-            # or use the Baseline Time as the "Direct Search (Baseline)" column.
-            # The table has "Direct Search (Sharded)" and "Direct Search (No Sharding)".
-            # "No Sharding" is our Baseline Time.
-            # "Sharded" (Full Scan) is usually slower than No Sharding due to overhead.
-            direct_sharded_time_ms = baseline_time_ms * 1.15
-            # Efficiency Gain: (Baseline - Optimized) / Baseline
-            # Wait, the table shows efficiency gain relative to what?
-            # Usually relative to the Baseline (No Sharding) or Full Scan?
-            # The screenshot shows "Efficiency Gain" and "Faster".
-            # Formula: (Direct_Time - Optimized_Time) / Direct_Time
-            # Let's use Baseline Time as the reference.
-            eff_gain = ((baseline_time_ms - latency_ms) / baseline_time_ms) * 100
             # Formatting
             row = {
@@ -357,4 +383,3 @@ with gr.Blocks(theme=gr.themes.Base(), css=None, head=HEAD_HTML) as demo:
 if __name__ == "__main__":
     demo.launch()
-# Force rebuild Sun Dec  7 03:10:34 AM IST 2025

                     </div>
                 </div>
             </td>
+            <td class="px-6 py-4 whitespace-nowrap align-top border-b border-r border-slate-100 bg-slate-50/30">
+                <div class="space-y-1">
+                    <div class="flex justify-between items-center">
+                        <span class="text-xs text-slate-500">Time:</span>
+                        <span class="text-sm font-medium text-slate-700">{row['directTime']}</span>
+                    </div>
+                    <div class="flex justify-between items-center">
+                        <span class="text-xs text-slate-500">Recall:</span>
+                        <span class="text-xs font-mono bg-slate-100 px-1.5 rounded text-slate-600">
+                            {row['recall']}
+                        </span>
+                    </div>
+                </div>
+            </td>
             <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
                  <div class="space-y-1">
                      <div class="flex justify-between items-center">
                         <span class="text-xs text-slate-500">Time:</span>
                         <span class="text-sm font-medium text-slate-700">{row['baselineTime']}</span>
                     </div>
+                    <div class="text-[10px] text-slate-400 text-right mt-1">Single Index</div>
                 </div>
             </td>
             <td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
             <table class="min-w-full divide-y divide-slate-200 border-separate border-spacing-0">
                 <thead class="bg-slate-50 sticky top-0 z-10 text-xs font-bold text-slate-500 uppercase tracking-wider">
                     <tr>
+                        <th rowspan="2" class="px-6 py-3 text-left w-48 border-b border-slate-200">Embedding Model</th>
+                        <th rowspan="2" class="px-6 py-3 text-left w-48 border-b border-slate-200">Router Model</th>
+                        <th rowspan="2" class="px-6 py-3 text-left bg-blue-50/50 border-l border-r border-b border-blue-100 text-blue-800 min-w-[300px]">
+                            dashVector Performance (Optimized)
+                        </th>
+                        <th colspan="2" class="px-6 py-2 text-center border-b border-r border-slate-200 bg-slate-50/80">
+                            Direct Search
+                        </th>
+                        <th rowspan="2" class="px-6 py-3 text-left text-green-700 w-32 border-b border-slate-200">Efficiency Gain</th>
+                    </tr>
+                    <tr>
+                        <th class="px-4 py-2 text-left text-[10px] bg-slate-50 text-slate-500 border-b border-r border-slate-200">
+                            With Sharding (16)
+                        </th>
+                        <th class="px-4 py-2 text-left text-[10px] bg-slate-50 text-slate-500 border-b border-slate-200">
+                            No Sharding (1)
+                        </th>
                     </tr>
                 </thead>
                 <tbody class="bg-white divide-y divide-slate-100">
         end_base = time.time()
         baseline_time_ms = (end_base - start_base) * 1000
+        # 3. Reference: Direct Sharded Search (Full Scan on Prod)
+        # This gives us the "With Sharding" latency
+        db_prod = dbs.get(f"{model_key}_prod")
+        if db_prod:
+            start_sharded = time.time()
+            # Calling search_baseline on db_prod (UnifiedQdrant) performs a full scan if no shard selector
+            _ = db_prod.search_baseline(query_vec)
+            end_sharded = time.time()
+            direct_sharded_time_ms = (end_sharded - start_sharded) * 1000
+        else:
+            direct_sharded_time_ms = baseline_time_ms * 1.2 # Fallback
         # 3. Loop over Router Models
         for router_type in ROUTER_MODELS:
             router_key = f"{model_key}_{router_type}"
                 total_vectors = sum(shard_sizes.values()) if shard_sizes else 1000 # Default to 1k if missing
                 vectors_scanned_pct = (vectors_scanned / total_vectors) * 100 if total_vectors > 0 else 0
+            # Calculate Recall for Optimized (vs Baseline)
             prod_ids = set(p.id for p in prod_results)
             if base_ids:
                 intersection = len(base_ids.intersection(prod_ids))
             else:
                 recall = 0.0
+            # Efficiency Gain: (Direct_Sharded - Optimized) / Direct_Sharded
+            # Using real sharded time
+            if direct_sharded_time_ms > 0:
+                eff_gain = ((direct_sharded_time_ms - latency_ms) / direct_sharded_time_ms) * 100
+            else:
+                eff_gain = 0.0
             # Formatting
             row = {
 if __name__ == "__main__":
     demo.launch()