Spaces:
Sleeping
Sleeping
add diagnostics
Browse files
app.py
CHANGED
|
@@ -137,12 +137,14 @@ def parse_freeform_query(text: str):
|
|
| 137 |
return text, ""
|
| 138 |
|
| 139 |
# ---------------- Improved semantic search ----------------
|
| 140 |
-
def semantic_match(query, top_k=15, debug_top_n=
|
| 141 |
-
"""
|
| 142 |
if not query:
|
|
|
|
| 143 |
return {"category": None, "items": []}
|
| 144 |
|
| 145 |
query = query.strip().lower()
|
|
|
|
| 146 |
|
| 147 |
try:
|
| 148 |
q_emb = embedder.encode(query, convert_to_tensor=True)
|
|
@@ -153,37 +155,42 @@ def semantic_match(query, top_k=15, debug_top_n=5):
|
|
| 153 |
# --- Item similarities ---
|
| 154 |
sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
|
| 155 |
ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
|
|
|
|
|
|
| 156 |
item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
|
| 157 |
top_item_score = float(ranked_items[0][1]) if ranked_items else 0.0
|
|
|
|
| 158 |
|
| 159 |
# --- Category similarities ---
|
| 160 |
sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
|
| 161 |
ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
debug_preview = ", ".join([f"{c} ({s:.2f})" for c, s in ranked_cats[:debug_top_n]])
|
| 165 |
-
print(f"π Category similarity for '{query}': {debug_preview}")
|
| 166 |
|
| 167 |
top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
|
| 168 |
strong_category = cat_score > 0.35
|
| 169 |
weak_items = len(item_hits) == 0 or (top_item_score < 0.4)
|
| 170 |
clearly_better = cat_score - top_item_score > 0.1
|
| 171 |
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
| 173 |
if not top_cat:
|
| 174 |
for c in CATEGORY_EMBEDS.keys():
|
| 175 |
if c in query or query in c:
|
| 176 |
-
print(f"π§© Heuristic substring fallback
|
| 177 |
top_cat = c
|
| 178 |
strong_category = True
|
| 179 |
cat_score = 0.5
|
| 180 |
break
|
| 181 |
|
| 182 |
-
# ---
|
| 183 |
if not top_cat and query.endswith("s"):
|
| 184 |
singular = query[:-1]
|
| 185 |
if singular in CATEGORY_EMBEDS:
|
| 186 |
-
print(f"π§© Plural fallback
|
| 187 |
top_cat = singular
|
| 188 |
strong_category = True
|
| 189 |
cat_score = 0.5
|
|
@@ -191,18 +198,16 @@ def semantic_match(query, top_k=15, debug_top_n=5):
|
|
| 191 |
# --- Decision ---
|
| 192 |
if top_cat and (strong_category and (weak_items or clearly_better)):
|
| 193 |
related_items = [n for n, t in ITEM_TO_TYPE.items() if t and t == top_cat]
|
| 194 |
-
print(f"
|
| 195 |
f"({len(related_items)} items, cat_score={cat_score:.2f}, item_score={top_item_score:.2f})")
|
| 196 |
return {"category": top_cat, "items": related_items}
|
| 197 |
|
| 198 |
-
|
| 199 |
-
if not item_hits and not top_cat:
|
| 200 |
-
print(f"β οΈ No strong semantic match for '{query}'. Top items: "
|
| 201 |
-
f"{[f'{n}({s:.2f})' for n, s in ranked_items[:3]]}")
|
| 202 |
return {"category": None, "items": item_hits}
|
| 203 |
|
| 204 |
|
| 205 |
|
|
|
|
| 206 |
# ---------------- Fetch YATA ----------------
|
| 207 |
def fetch_yata(force_refresh=False):
|
| 208 |
if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):
|
|
|
|
| 137 |
return text, ""
|
| 138 |
|
| 139 |
# ---------------- Improved semantic search ----------------
|
| 140 |
+
def semantic_match(query, top_k=15, debug_top_n=8):
|
| 141 |
+
"""Full diagnostic semantic search β logs item and category similarity scores, fallback logic."""
|
| 142 |
if not query:
|
| 143 |
+
print("β οΈ semantic_match called with empty query")
|
| 144 |
return {"category": None, "items": []}
|
| 145 |
|
| 146 |
query = query.strip().lower()
|
| 147 |
+
print(f"\nπ§ [semantic_match] Input query: '{query}'")
|
| 148 |
|
| 149 |
try:
|
| 150 |
q_emb = embedder.encode(query, convert_to_tensor=True)
|
|
|
|
| 155 |
# --- Item similarities ---
|
| 156 |
sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
|
| 157 |
ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
|
| 158 |
+
top_items_preview = [f"{n} ({s:.2f})" for n, s in ranked_items[:debug_top_n]]
|
| 159 |
+
print(f" πΈ Top item similarities: {', '.join(top_items_preview)}")
|
| 160 |
+
|
| 161 |
item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
|
| 162 |
top_item_score = float(ranked_items[0][1]) if ranked_items else 0.0
|
| 163 |
+
print(f" β
Found {len(item_hits)} item hits (top score={top_item_score:.2f})")
|
| 164 |
|
| 165 |
# --- Category similarities ---
|
| 166 |
sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
|
| 167 |
ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
|
| 168 |
+
top_cats_preview = [f"{c} ({s:.2f})" for c, s in ranked_cats[:debug_top_n]]
|
| 169 |
+
print(f" πΉ Top category similarities: {', '.join(top_cats_preview)}")
|
|
|
|
|
|
|
| 170 |
|
| 171 |
top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
|
| 172 |
strong_category = cat_score > 0.35
|
| 173 |
weak_items = len(item_hits) == 0 or (top_item_score < 0.4)
|
| 174 |
clearly_better = cat_score - top_item_score > 0.1
|
| 175 |
|
| 176 |
+
print(f" π‘ top_cat={top_cat}, cat_score={cat_score:.2f}, strong_category={strong_category}, "
|
| 177 |
+
f"weak_items={weak_items}, clearly_better={clearly_better}")
|
| 178 |
+
|
| 179 |
+
# --- Heuristic substring fallback ---
|
| 180 |
if not top_cat:
|
| 181 |
for c in CATEGORY_EMBEDS.keys():
|
| 182 |
if c in query or query in c:
|
| 183 |
+
print(f" π§© Heuristic substring fallback β '{c}'")
|
| 184 |
top_cat = c
|
| 185 |
strong_category = True
|
| 186 |
cat_score = 0.5
|
| 187 |
break
|
| 188 |
|
| 189 |
+
# --- Plural heuristic ---
|
| 190 |
if not top_cat and query.endswith("s"):
|
| 191 |
singular = query[:-1]
|
| 192 |
if singular in CATEGORY_EMBEDS:
|
| 193 |
+
print(f" π§© Plural fallback β '{singular}'")
|
| 194 |
top_cat = singular
|
| 195 |
strong_category = True
|
| 196 |
cat_score = 0.5
|
|
|
|
| 198 |
# --- Decision ---
|
| 199 |
if top_cat and (strong_category and (weak_items or clearly_better)):
|
| 200 |
related_items = [n for n, t in ITEM_TO_TYPE.items() if t and t == top_cat]
|
| 201 |
+
print(f"β
[FALLBACK] '{query}' β category '{top_cat}' "
|
| 202 |
f"({len(related_items)} items, cat_score={cat_score:.2f}, item_score={top_item_score:.2f})")
|
| 203 |
return {"category": top_cat, "items": related_items}
|
| 204 |
|
| 205 |
+
print(f"π« No semantic fallback triggered for '{query}' β returning {len(item_hits)} item matches.")
|
|
|
|
|
|
|
|
|
|
| 206 |
return {"category": None, "items": item_hits}
|
| 207 |
|
| 208 |
|
| 209 |
|
| 210 |
+
|
| 211 |
# ---------------- Fetch YATA ----------------
|
| 212 |
def fetch_yata(force_refresh=False):
|
| 213 |
if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):
|