alessandro trinca tornidor commited on
Commit
a434f16
·
1 Parent(s): 39ba49c

feat: handle reverse search for custom entries, merge them with the ones from the builtin wordnet corpus

Browse files
Files changed (1) hide show
  1. my_ghost_writer/text_parsers2.py +30 -8
my_ghost_writer/text_parsers2.py CHANGED
@@ -36,7 +36,6 @@ def is_nlp_available() -> bool:
36
  return nlp is not None
37
 
38
 
39
- # --- NEW: Main function for handling multi-word selections ---
40
  def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[WordSynonymResult]:
41
  """
42
  Finds synonyms for all eligible words within a selected text span.
@@ -177,12 +176,17 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
177
 
178
  def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
179
  """Get synonyms from wn with optional POS filtering.
180
- Includes custom synonyms with a flag."""
181
 
182
  # 1. Check for custom synonyms in in-memory store
183
  app_logger.info("custom_synonyms:")
184
  app_logger.info(custom_synonyms)
185
  word_lower = word.lower()
 
 
 
 
 
186
  if word_lower in custom_synonyms:
187
  app_logger.info(f"found custom_synonyms:{custom_synonyms[word_lower]} by word:{word_lower}!")
188
  # 2. If custom synonyms exist, create the appropriate structure and return
@@ -196,12 +200,26 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
196
  }
197
  if pos_tag:
198
  custom_synset["pos"] = pos_tag
199
- return [custom_synset] # Returns a list containing one synset
200
 
201
- # 3. If no custom synonyms, proceed with the WordNet lookup
202
- try:
203
- synonyms_by_sense = []
 
 
 
 
 
204
 
 
 
 
 
 
 
 
 
 
 
205
  # Map spaCy POS to wn POS
206
  pos_map = {
207
  'NOUN': wn.NOUN,
@@ -240,12 +258,16 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
240
  sense_data['synonyms'] = sorted(list(unique_synonyms))
241
  synonyms_by_sense.append(sense_data)
242
 
243
- return synonyms_by_sense
244
-
245
  except Exception as ex:
246
  app_logger.error(f"Error getting wn synonyms: {ex}")
247
  raise HTTPException(status_code=500, detail=f"Error retrieving synonyms: {str(ex)}")
248
 
 
 
 
 
 
 
249
 
250
  def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
251
  """Adapt the input synonym arg to match the original word's grammatical form"""
 
36
  return nlp is not None
37
 
38
 
 
39
  def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[WordSynonymResult]:
40
  """
41
  Finds synonyms for all eligible words within a selected text span.
 
176
 
177
  def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
178
  """Get synonyms from wn with optional POS filtering.
179
+ Includes custom synonyms with a flag. Also performs a reverse lookup."""
180
 
181
  # 1. Check for custom synonyms in in-memory store
182
  app_logger.info("custom_synonyms:")
183
  app_logger.info(custom_synonyms)
184
  word_lower = word.lower()
185
+ synonyms_by_sense: list[dict[str, Any]] = [] # Initialize the list here
186
+
187
+ # 1. Custom Synonym Lookup and Preparation
188
+ custom_synset = None # Initialize to None
189
+ # 1. Direct Lookup: Check if the word is directly in custom_synonyms
190
  if word_lower in custom_synonyms:
191
  app_logger.info(f"found custom_synonyms:{custom_synonyms[word_lower]} by word:{word_lower}!")
192
  # 2. If custom synonyms exist, create the appropriate structure and return
 
200
  }
201
  if pos_tag:
202
  custom_synset["pos"] = pos_tag
 
203
 
204
+ # 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
205
+ for custom_word, synonym_list in custom_synonyms.items():
206
+ if word_lower in synonym_list:
207
+ app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{custom_word}'")
208
+ # Found a reverse match!
209
+ # Include the original custom_word in the synonym list
210
+ synonyms = [{"synonym": custom_word, "is_custom": True}] # Start with the original word
211
+ synonyms.extend([{"synonym": syn, "is_custom": True} for syn in custom_synonyms[custom_word]]) # Add the rest of the synonyms
212
 
213
+ custom_synset = {
214
+ 'definition': f'User-defined synonym (reverse match for "{word}").',
215
+ 'examples': [],
216
+ 'synonyms': synonyms
217
+ }
218
+ if pos_tag:
219
+ custom_synset["pos"] = pos_tag
220
+
221
+ # 3. WordNet Lookup
222
+ try:
223
  # Map spaCy POS to wn POS
224
  pos_map = {
225
  'NOUN': wn.NOUN,
 
258
  sense_data['synonyms'] = sorted(list(unique_synonyms))
259
  synonyms_by_sense.append(sense_data)
260
 
 
 
261
  except Exception as ex:
262
  app_logger.error(f"Error getting wn synonyms: {ex}")
263
  raise HTTPException(status_code=500, detail=f"Error retrieving synonyms: {str(ex)}")
264
 
265
+ # 4. Combine Custom and WordNet Synsets
266
+ if custom_synset:
267
+ synonyms_by_sense.insert(0, custom_synset) # Add custom synset at the beginning
268
+
269
+ return synonyms_by_sense
270
+
271
 
272
  def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
273
  """Adapt the input synonym arg to match the original word's grammatical form"""