cstr commited on
Commit
e02acec
·
verified ·
1 Parent(s): 9d0a528

case insensitive wiktionary search

Browse files
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -1410,26 +1410,35 @@ def _wiktionary_build_report_for_entry(entry_id: int, conn: sqlite3.Connection)
1410
  return report
1411
 
1412
  def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Dict[str, Any]]:
1413
- """ Finds all entries related to an English word. """
1414
  log(f"Wiktionary (EN): Querying for '{word}'...")
1415
  found_entry_ids: Set[int] = set()
1416
 
1417
  lang_query = 'English'
1418
  form_titles = ("Inflected form", "verb form", "noun form", "adjective form", "Comparative", "Superlative")
1419
 
1420
- lemma_q = conn.execute(
1421
- f"SELECT id, pos_title FROM entries WHERE word = ? AND lang = '{lang_query}'", (word,)
1422
- ).fetchall()
 
 
 
 
 
 
 
 
1423
 
1424
  parent_lemmas_to_find: Set[str] = set()
1425
 
1426
  for row in lemma_q:
1427
  entry_id = row["id"]
1428
- pos_title = row["pos_title"]
1429
  found_entry_ids.add(entry_id)
1430
 
1431
- if pos_title in form_titles:
1432
- log(f"Wiktionary: Word '{word}' is an inflected entry (ID {entry_id}). Looking for its parent lemma...")
 
1433
  form_of_q = conn.execute(
1434
  "SELECT form_of FROM senses WHERE entry_id = ?", (entry_id,)
1435
  ).fetchall()
@@ -1444,30 +1453,35 @@ def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Di
1444
  if parent_lemma_word:
1445
  parent_lemmas_to_find.add(parent_lemma_word)
1446
  except json.JSONDecodeError:
1447
- log(f"Wiktionary: Failed to parse form_of JSON: {form_of_json}")
1448
 
1449
- form_q = conn.execute(
1450
- f"""
 
1451
  SELECT DISTINCT e.id
1452
  FROM forms f
1453
  JOIN entries e ON f.entry_id = e.id
1454
- WHERE f.form_text = ? AND e.lang = '{lang_query}'
1455
  AND f.id NOT IN (
1456
  SELECT ft.form_id
1457
  FROM form_tags ft
1458
  JOIN tags t ON ft.tag_id = t.id
1459
  WHERE t.tag IN ('variant', 'auxiliary')
1460
  )
1461
- """, (word,)
1462
- ).fetchall()
 
 
1463
  for row in form_q:
1464
  found_entry_ids.add(row["id"])
1465
 
 
1466
  if parent_lemmas_to_find:
1467
  log(f"Wiktionary: Found parent lemmas to add: {parent_lemmas_to_find}")
1468
  for lemma_word in parent_lemmas_to_find:
 
1469
  parent_id_q = conn.execute(
1470
- f"SELECT id FROM entries WHERE word = ? AND lang = '{lang_query}'", (lemma_word,)
1471
  ).fetchall()
1472
  for row in parent_id_q:
1473
  found_entry_ids.add(row["id"])
 
1410
  return report
1411
 
1412
  def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Dict[str, Any]]:
1413
+ """ Finds all entries related to an English word (Case-Insensitive Search). """
1414
  log(f"Wiktionary (EN): Querying for '{word}'...")
1415
  found_entry_ids: Set[int] = set()
1416
 
1417
  lang_query = 'English'
1418
  form_titles = ("Inflected form", "verb form", "noun form", "adjective form", "Comparative", "Superlative")
1419
 
1420
+ # --- FIX: Search for Original, Lowercase, and Title Case ---
1421
+ # This ensures we find "Ready" when input is "ready"
1422
+ search_variants = list(set([word, word.lower(), word.title()]))
1423
+ placeholders = ', '.join('?' for _ in search_variants)
1424
+
1425
+ # 1. Search Lemmatized Entries
1426
+ sql_lemma = f"SELECT id, pos_title, word FROM entries WHERE word IN ({placeholders}) AND lang = ?"
1427
+ # flatten params: [var1, var2, ..., 'English']
1428
+ params_lemma = list(search_variants) + [lang_query]
1429
+
1430
+ lemma_q = conn.execute(sql_lemma, params_lemma).fetchall()
1431
 
1432
  parent_lemmas_to_find: Set[str] = set()
1433
 
1434
  for row in lemma_q:
1435
  entry_id = row["id"]
1436
+ pos_title = row["pos_title"] or "" # Safe string
1437
  found_entry_ids.add(entry_id)
1438
 
1439
+ # If it's a form entry (e.g. "running"), try to find the parent ("run")
1440
+ if any(ft in pos_title for ft in form_titles):
1441
+ log(f"Wiktionary: '{row['word']}' is an inflected entry (ID {entry_id}). Looking for parent...")
1442
  form_of_q = conn.execute(
1443
  "SELECT form_of FROM senses WHERE entry_id = ?", (entry_id,)
1444
  ).fetchall()
 
1453
  if parent_lemma_word:
1454
  parent_lemmas_to_find.add(parent_lemma_word)
1455
  except json.JSONDecodeError:
1456
+ pass
1457
 
1458
+ # 2. Search Inflected Forms (Reverse lookup)
1459
+ # We also apply the case variants here
1460
+ sql_form = f"""
1461
  SELECT DISTINCT e.id
1462
  FROM forms f
1463
  JOIN entries e ON f.entry_id = e.id
1464
+ WHERE f.form_text IN ({placeholders}) AND e.lang = ?
1465
  AND f.id NOT IN (
1466
  SELECT ft.form_id
1467
  FROM form_tags ft
1468
  JOIN tags t ON ft.tag_id = t.id
1469
  WHERE t.tag IN ('variant', 'auxiliary')
1470
  )
1471
+ """
1472
+ params_form = list(search_variants) + [lang_query]
1473
+
1474
+ form_q = conn.execute(sql_form, params_form).fetchall()
1475
  for row in form_q:
1476
  found_entry_ids.add(row["id"])
1477
 
1478
+ # 3. Add Parent Lemmas (if any found in step 1)
1479
  if parent_lemmas_to_find:
1480
  log(f"Wiktionary: Found parent lemmas to add: {parent_lemmas_to_find}")
1481
  for lemma_word in parent_lemmas_to_find:
1482
+ # Recursively check exact match for parent
1483
  parent_id_q = conn.execute(
1484
+ "SELECT id FROM entries WHERE word = ? AND lang = ?", (lemma_word, lang_query)
1485
  ).fetchall()
1486
  for row in parent_id_q:
1487
  found_entry_ids.add(row["id"])