buzzbandit commited on
Commit
8fcbcc3
Β·
verified Β·
1 Parent(s): b4092ca

add diagnostics

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -137,12 +137,14 @@ def parse_freeform_query(text: str):
137
  return text, ""
138
 
139
  # ---------------- Improved semantic search ----------------
140
- def semantic_match(query, top_k=15, debug_top_n=5):
141
- """Semantic search for items and categories with robust fallback and verbose diagnostics."""
142
  if not query:
 
143
  return {"category": None, "items": []}
144
 
145
  query = query.strip().lower()
 
146
 
147
  try:
148
  q_emb = embedder.encode(query, convert_to_tensor=True)
@@ -153,37 +155,42 @@ def semantic_match(query, top_k=15, debug_top_n=5):
153
  # --- Item similarities ---
154
  sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
155
  ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
 
 
 
156
  item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
157
  top_item_score = float(ranked_items[0][1]) if ranked_items else 0.0
 
158
 
159
  # --- Category similarities ---
160
  sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
161
  ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
162
-
163
- if ranked_cats:
164
- debug_preview = ", ".join([f"{c} ({s:.2f})" for c, s in ranked_cats[:debug_top_n]])
165
- print(f"πŸ” Category similarity for '{query}': {debug_preview}")
166
 
167
  top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
168
  strong_category = cat_score > 0.35
169
  weak_items = len(item_hits) == 0 or (top_item_score < 0.4)
170
  clearly_better = cat_score - top_item_score > 0.1
171
 
172
- # --- NEW 1️⃣ direct substring heuristic ---
 
 
 
173
  if not top_cat:
174
  for c in CATEGORY_EMBEDS.keys():
175
  if c in query or query in c:
176
- print(f"🧩 Heuristic substring fallback: '{query}' β†’ '{c}'")
177
  top_cat = c
178
  strong_category = True
179
  cat_score = 0.5
180
  break
181
 
182
- # --- NEW 2️⃣ plural/singular heuristic ---
183
  if not top_cat and query.endswith("s"):
184
  singular = query[:-1]
185
  if singular in CATEGORY_EMBEDS:
186
- print(f"🧩 Plural fallback: '{query}' β†’ '{singular}'")
187
  top_cat = singular
188
  strong_category = True
189
  cat_score = 0.5
@@ -191,18 +198,16 @@ def semantic_match(query, top_k=15, debug_top_n=5):
191
  # --- Decision ---
192
  if top_cat and (strong_category and (weak_items or clearly_better)):
193
  related_items = [n for n, t in ITEM_TO_TYPE.items() if t and t == top_cat]
194
- print(f"🧩 Semantic fallback: treating '{query}' as category '{top_cat}' "
195
  f"({len(related_items)} items, cat_score={cat_score:.2f}, item_score={top_item_score:.2f})")
196
  return {"category": top_cat, "items": related_items}
197
 
198
- # --- If still nothing, show best guesses for debug ---
199
- if not item_hits and not top_cat:
200
- print(f"⚠️ No strong semantic match for '{query}'. Top items: "
201
- f"{[f'{n}({s:.2f})' for n, s in ranked_items[:3]]}")
202
  return {"category": None, "items": item_hits}
203
 
204
 
205
 
 
206
  # ---------------- Fetch YATA ----------------
207
  def fetch_yata(force_refresh=False):
208
  if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):
 
137
  return text, ""
138
 
139
  # ---------------- Improved semantic search ----------------
140
+ def semantic_match(query, top_k=15, debug_top_n=8):
141
+ """Full diagnostic semantic search β€” logs item and category similarity scores, fallback logic."""
142
  if not query:
143
+ print("⚠️ semantic_match called with empty query")
144
  return {"category": None, "items": []}
145
 
146
  query = query.strip().lower()
147
+ print(f"\n🧠 [semantic_match] Input query: '{query}'")
148
 
149
  try:
150
  q_emb = embedder.encode(query, convert_to_tensor=True)
 
155
  # --- Item similarities ---
156
  sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
157
  ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
158
+ top_items_preview = [f"{n} ({s:.2f})" for n, s in ranked_items[:debug_top_n]]
159
+ print(f" πŸ”Έ Top item similarities: {', '.join(top_items_preview)}")
160
+
161
  item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
162
  top_item_score = float(ranked_items[0][1]) if ranked_items else 0.0
163
+ print(f" βœ… Found {len(item_hits)} item hits (top score={top_item_score:.2f})")
164
 
165
  # --- Category similarities ---
166
  sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
167
  ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
168
+ top_cats_preview = [f"{c} ({s:.2f})" for c, s in ranked_cats[:debug_top_n]]
169
+ print(f" πŸ”Ή Top category similarities: {', '.join(top_cats_preview)}")
 
 
170
 
171
  top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
172
  strong_category = cat_score > 0.35
173
  weak_items = len(item_hits) == 0 or (top_item_score < 0.4)
174
  clearly_better = cat_score - top_item_score > 0.1
175
 
176
+ print(f" πŸ’‘ top_cat={top_cat}, cat_score={cat_score:.2f}, strong_category={strong_category}, "
177
+ f"weak_items={weak_items}, clearly_better={clearly_better}")
178
+
179
+ # --- Heuristic substring fallback ---
180
  if not top_cat:
181
  for c in CATEGORY_EMBEDS.keys():
182
  if c in query or query in c:
183
+ print(f" 🧩 Heuristic substring fallback β†’ '{c}'")
184
  top_cat = c
185
  strong_category = True
186
  cat_score = 0.5
187
  break
188
 
189
+ # --- Plural heuristic ---
190
  if not top_cat and query.endswith("s"):
191
  singular = query[:-1]
192
  if singular in CATEGORY_EMBEDS:
193
+ print(f" 🧩 Plural fallback β†’ '{singular}'")
194
  top_cat = singular
195
  strong_category = True
196
  cat_score = 0.5
 
198
  # --- Decision ---
199
  if top_cat and (strong_category and (weak_items or clearly_better)):
200
  related_items = [n for n, t in ITEM_TO_TYPE.items() if t and t == top_cat]
201
+ print(f"βœ… [FALLBACK] '{query}' β†’ category '{top_cat}' "
202
  f"({len(related_items)} items, cat_score={cat_score:.2f}, item_score={top_item_score:.2f})")
203
  return {"category": top_cat, "items": related_items}
204
 
205
+ print(f"🚫 No semantic fallback triggered for '{query}' β€” returning {len(item_hits)} item matches.")
 
 
 
206
  return {"category": None, "items": item_hits}
207
 
208
 
209
 
210
+
211
  # ---------------- Fetch YATA ----------------
212
  def fetch_yata(force_refresh=False):
213
  if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):