Spaces:

buzzbandit
/

TornFlyingInventory

Sleeping

App Files Files Community

buzzbandit commited on Oct 24

Commit

8fcbcc3

verified ·

1 Parent(s): b4092ca

add diagnostics

Browse files

Files changed (1) hide show

app.py +20 -15

app.py CHANGED Viewed

@@ -137,12 +137,14 @@ def parse_freeform_query(text: str):
     return text, ""
 # ---------------- Improved semantic search ----------------
-def semantic_match(query, top_k=15, debug_top_n=5):
-    """Semantic search for items and categories with robust fallback and verbose diagnostics."""
     if not query:
         return {"category": None, "items": []}
     query = query.strip().lower()
     try:
         q_emb = embedder.encode(query, convert_to_tensor=True)
@@ -153,37 +155,42 @@ def semantic_match(query, top_k=15, debug_top_n=5):
     # --- Item similarities ---
     sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
     ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
     item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
     top_item_score = float(ranked_items[0][1]) if ranked_items else 0.0
     # --- Category similarities ---
     sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
     ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
-    if ranked_cats:
-        debug_preview = ", ".join([f"{c} ({s:.2f})" for c, s in ranked_cats[:debug_top_n]])
-        print(f"🔍 Category similarity for '{query}': {debug_preview}")
     top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
     strong_category = cat_score > 0.35
     weak_items = len(item_hits) == 0 or (top_item_score < 0.4)
     clearly_better = cat_score - top_item_score > 0.1
-    # --- NEW 1️⃣ direct substring heuristic ---
     if not top_cat:
         for c in CATEGORY_EMBEDS.keys():
             if c in query or query in c:
-                print(f"🧩 Heuristic substring fallback: '{query}' → '{c}'")
                 top_cat = c
                 strong_category = True
                 cat_score = 0.5
                 break
-    # --- NEW 2️⃣ plural/singular heuristic ---
     if not top_cat and query.endswith("s"):
         singular = query[:-1]
         if singular in CATEGORY_EMBEDS:
-            print(f"🧩 Plural fallback: '{query}' → '{singular}'")
             top_cat = singular
             strong_category = True
             cat_score = 0.5
@@ -191,18 +198,16 @@ def semantic_match(query, top_k=15, debug_top_n=5):
     # --- Decision ---
     if top_cat and (strong_category and (weak_items or clearly_better)):
         related_items = [n for n, t in ITEM_TO_TYPE.items() if t and t == top_cat]
-        print(f"🧩 Semantic fallback: treating '{query}' as category '{top_cat}' "
               f"({len(related_items)} items, cat_score={cat_score:.2f}, item_score={top_item_score:.2f})")
         return {"category": top_cat, "items": related_items}
-    # --- If still nothing, show best guesses for debug ---
-    if not item_hits and not top_cat:
-        print(f"⚠️ No strong semantic match for '{query}'. Top items: "
-              f"{[f'{n}({s:.2f})' for n, s in ranked_items[:3]]}")
     return {"category": None, "items": item_hits}
 # ---------------- Fetch YATA ----------------
 def fetch_yata(force_refresh=False):
     if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):

     return text, ""
 # ---------------- Improved semantic search ----------------
+def semantic_match(query, top_k=15, debug_top_n=8):
+    """Full diagnostic semantic search — logs item and category similarity scores, fallback logic."""
     if not query:
+        print("⚠️ semantic_match called with empty query")
         return {"category": None, "items": []}
     query = query.strip().lower()
+    print(f"\n🧠 [semantic_match] Input query: '{query}'")
     try:
         q_emb = embedder.encode(query, convert_to_tensor=True)
     # --- Item similarities ---
     sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
     ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
+    top_items_preview = [f"{n} ({s:.2f})" for n, s in ranked_items[:debug_top_n]]
+    print(f"   🔸 Top item similarities: {', '.join(top_items_preview)}")
     item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
     top_item_score = float(ranked_items[0][1]) if ranked_items else 0.0
+    print(f"   ✅ Found {len(item_hits)} item hits (top score={top_item_score:.2f})")
     # --- Category similarities ---
     sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
     ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
+    top_cats_preview = [f"{c} ({s:.2f})" for c, s in ranked_cats[:debug_top_n]]
+    print(f"   🔹 Top category similarities: {', '.join(top_cats_preview)}")
     top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
     strong_category = cat_score > 0.35
     weak_items = len(item_hits) == 0 or (top_item_score < 0.4)
     clearly_better = cat_score - top_item_score > 0.1
+    print(f"   💡 top_cat={top_cat}, cat_score={cat_score:.2f}, strong_category={strong_category}, "
+          f"weak_items={weak_items}, clearly_better={clearly_better}")
+    # --- Heuristic substring fallback ---
     if not top_cat:
         for c in CATEGORY_EMBEDS.keys():
             if c in query or query in c:
+                print(f"   🧩 Heuristic substring fallback → '{c}'")
                 top_cat = c
                 strong_category = True
                 cat_score = 0.5
                 break
+    # --- Plural heuristic ---
     if not top_cat and query.endswith("s"):
         singular = query[:-1]
         if singular in CATEGORY_EMBEDS:
+            print(f"   🧩 Plural fallback → '{singular}'")
             top_cat = singular
             strong_category = True
             cat_score = 0.5
     # --- Decision ---
     if top_cat and (strong_category and (weak_items or clearly_better)):
         related_items = [n for n, t in ITEM_TO_TYPE.items() if t and t == top_cat]
+        print(f"✅ [FALLBACK] '{query}' → category '{top_cat}' "
               f"({len(related_items)} items, cat_score={cat_score:.2f}, item_score={top_item_score:.2f})")
         return {"category": top_cat, "items": related_items}
+    print(f"🚫 No semantic fallback triggered for '{query}' — returning {len(item_hits)} item matches.")
     return {"category": None, "items": item_hits}
 # ---------------- Fetch YATA ----------------
 def fetch_yata(force_refresh=False):
     if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):