Spaces:
Sleeping
Sleeping
Fix: Update UI, Shards (16), and Router Logic
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ from src.data_pipeline import get_embedding
|
|
| 10 |
# --- Configuration ---
|
| 11 |
COLLECTION_NAME = "dashVector_v1"
|
| 12 |
VECTOR_SIZE = 384 # MiniLM-L6-v2
|
| 13 |
-
NUM_CLUSTERS =
|
| 14 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 15 |
|
| 16 |
# --- Initialize Backend ---
|
|
@@ -90,14 +90,13 @@ NAVBAR_HTML = """
|
|
| 90 |
<header class="bg-white border-b border-slate-200 sticky top-0 z-40 shadow-sm w-full">
|
| 91 |
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 h-16 flex items-center justify-between">
|
| 92 |
<div class="flex items-center gap-2">
|
| 93 |
-
<!-- User Logo -->
|
| 94 |
-
<img src="file/logo.png" alt="dashVector Logo" class="h-8 w-auto" />
|
| 95 |
<h1 class="text-xl font-bold tracking-tight text-slate-900">dashVector</h1>
|
| 96 |
</div>
|
| 97 |
<div class="flex items-center gap-4">
|
| 98 |
<div class="hidden md:flex items-center gap-1.5 px-3 py-1 bg-slate-100 rounded-full border border-slate-200">
|
| 99 |
<span class="material-symbols-outlined text-slate-500 text-sm">database</span>
|
| 100 |
-
<span class="text-xs font-medium text-slate-600">Dataset: <span class="font-bold text-slate-800">MS Marco</span></span>
|
| 101 |
</div>
|
| 102 |
</div>
|
| 103 |
</div>
|
|
@@ -257,16 +256,18 @@ def run_benchmark(query):
|
|
| 257 |
|
| 258 |
# Router Prediction
|
| 259 |
if router:
|
| 260 |
-
print("DEBUG: Predicting
|
| 261 |
-
|
| 262 |
-
|
|
|
|
| 263 |
else:
|
| 264 |
print("DEBUG: No router loaded, using mock.")
|
| 265 |
-
|
| 266 |
|
| 267 |
# Search
|
| 268 |
print("DEBUG: Searching Qdrant...")
|
| 269 |
-
|
|
|
|
| 270 |
print(f"DEBUG: Search complete. Found {len(results)} results.")
|
| 271 |
|
| 272 |
end_total = time.time()
|
|
@@ -274,15 +275,17 @@ def run_benchmark(query):
|
|
| 274 |
|
| 275 |
# Construct Data Rows
|
| 276 |
|
| 277 |
-
# Live Row (MiniLM +
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
|
|
|
|
|
|
| 282 |
|
| 283 |
live_row = {
|
| 284 |
"embedding": "MiniLM-L6-v2 (Active)",
|
| 285 |
-
"router": "
|
| 286 |
"optimizedTime": f"{latency_ms:.1f} ms",
|
| 287 |
"shardsSearched": f"{shards_searched} / {total_shards}",
|
| 288 |
"totalShards": total_shards,
|
|
@@ -292,7 +295,7 @@ def run_benchmark(query):
|
|
| 292 |
"efficiency": f"+{((1 - latency_ms/direct_time)*100):.1f}%"
|
| 293 |
}
|
| 294 |
|
| 295 |
-
# Reference Rows (Static)
|
| 296 |
ref_rows = [
|
| 297 |
{
|
| 298 |
"embedding": "Gemma 300M",
|
|
@@ -307,7 +310,7 @@ def run_benchmark(query):
|
|
| 307 |
},
|
| 308 |
{
|
| 309 |
"embedding": "Qwen 600M",
|
| 310 |
-
"router": "
|
| 311 |
"optimizedTime": "109 ms",
|
| 312 |
"shardsSearched": "7 / 16",
|
| 313 |
"totalShards": 16,
|
|
|
|
| 10 |
# --- Configuration ---
|
| 11 |
COLLECTION_NAME = "dashVector_v1"
|
| 12 |
VECTOR_SIZE = 384 # MiniLM-L6-v2
|
| 13 |
+
NUM_CLUSTERS = 16
|
| 14 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 15 |
|
| 16 |
# --- Initialize Backend ---
|
|
|
|
| 90 |
<header class="bg-white border-b border-slate-200 sticky top-0 z-40 shadow-sm w-full">
|
| 91 |
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 h-16 flex items-center justify-between">
|
| 92 |
<div class="flex items-center gap-2">
|
| 93 |
+
<!-- User Logo Removed -->
|
|
|
|
| 94 |
<h1 class="text-xl font-bold tracking-tight text-slate-900">dashVector</h1>
|
| 95 |
</div>
|
| 96 |
<div class="flex items-center gap-4">
|
| 97 |
<div class="hidden md:flex items-center gap-1.5 px-3 py-1 bg-slate-100 rounded-full border border-slate-200">
|
| 98 |
<span class="material-symbols-outlined text-slate-500 text-sm">database</span>
|
| 99 |
+
<span class="text-xs font-medium text-slate-600">Dataset: <span class="font-bold text-slate-800">MS Marco (25k)</span></span>
|
| 100 |
</div>
|
| 101 |
</div>
|
| 102 |
</div>
|
|
|
|
| 256 |
|
| 257 |
# Router Prediction
|
| 258 |
if router:
|
| 259 |
+
print("DEBUG: Predicting clusters...")
|
| 260 |
+
# Now returns list of clusters and cumulative confidence
|
| 261 |
+
target_clusters, confidence = router.predict(query_vec)
|
| 262 |
+
print(f"DEBUG: Predicted clusters {target_clusters} with cumulative confidence {confidence}")
|
| 263 |
else:
|
| 264 |
print("DEBUG: No router loaded, using mock.")
|
| 265 |
+
target_clusters, confidence = [0], 0.95 # Mock
|
| 266 |
|
| 267 |
# Search
|
| 268 |
print("DEBUG: Searching Qdrant...")
|
| 269 |
+
# Now accepts list of clusters
|
| 270 |
+
results, mode = vector_db.search_hybrid(query_vec, target_clusters, confidence)
|
| 271 |
print(f"DEBUG: Search complete. Found {len(results)} results.")
|
| 272 |
|
| 273 |
end_total = time.time()
|
|
|
|
| 275 |
|
| 276 |
# Construct Data Rows
|
| 277 |
|
| 278 |
+
# Live Row (MiniLM + Logistic Regression)
|
| 279 |
+
shards_searched = len(target_clusters)
|
| 280 |
+
total_shards = 16 # Updated to 16
|
| 281 |
+
|
| 282 |
+
# Estimate baseline time (mock calculation for demo if we don't run full scan)
|
| 283 |
+
# Or we could actually run full scan if we wanted true comparison, but for speed we estimate
|
| 284 |
+
direct_time = latency_ms * (total_shards / max(shards_searched, 1)) * 1.1
|
| 285 |
|
| 286 |
live_row = {
|
| 287 |
"embedding": "MiniLM-L6-v2 (Active)",
|
| 288 |
+
"router": "Logistic Regression", # Updated label
|
| 289 |
"optimizedTime": f"{latency_ms:.1f} ms",
|
| 290 |
"shardsSearched": f"{shards_searched} / {total_shards}",
|
| 291 |
"totalShards": total_shards,
|
|
|
|
| 295 |
"efficiency": f"+{((1 - latency_ms/direct_time)*100):.1f}%"
|
| 296 |
}
|
| 297 |
|
| 298 |
+
# Reference Rows (Static - Updated)
|
| 299 |
ref_rows = [
|
| 300 |
{
|
| 301 |
"embedding": "Gemma 300M",
|
|
|
|
| 310 |
},
|
| 311 |
{
|
| 312 |
"embedding": "Qwen 600M",
|
| 313 |
+
"router": "Tiny MLP",
|
| 314 |
"optimizedTime": "109 ms",
|
| 315 |
"shardsSearched": "7 / 16",
|
| 316 |
"totalShards": 16,
|