alessandro trinca tornidor
commited on
Commit
·
a434f16
1
Parent(s):
39ba49c
feat: handle reverse search for custom entries, merge them with the ones from the builtin wordnet corpus
Browse files
my_ghost_writer/text_parsers2.py
CHANGED
|
@@ -36,7 +36,6 @@ def is_nlp_available() -> bool:
|
|
| 36 |
return nlp is not None
|
| 37 |
|
| 38 |
|
| 39 |
-
# --- NEW: Main function for handling multi-word selections ---
|
| 40 |
def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[WordSynonymResult]:
|
| 41 |
"""
|
| 42 |
Finds synonyms for all eligible words within a selected text span.
|
|
@@ -177,12 +176,17 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
|
|
| 177 |
|
| 178 |
def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
|
| 179 |
"""Get synonyms from wn with optional POS filtering.
|
| 180 |
-
Includes custom synonyms with a flag."""
|
| 181 |
|
| 182 |
# 1. Check for custom synonyms in in-memory store
|
| 183 |
app_logger.info("custom_synonyms:")
|
| 184 |
app_logger.info(custom_synonyms)
|
| 185 |
word_lower = word.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
if word_lower in custom_synonyms:
|
| 187 |
app_logger.info(f"found custom_synonyms:{custom_synonyms[word_lower]} by word:{word_lower}!")
|
| 188 |
# 2. If custom synonyms exist, create the appropriate structure and return
|
|
@@ -196,12 +200,26 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
|
|
| 196 |
}
|
| 197 |
if pos_tag:
|
| 198 |
custom_synset["pos"] = pos_tag
|
| 199 |
-
return [custom_synset] # Returns a list containing one synset
|
| 200 |
|
| 201 |
-
#
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
# Map spaCy POS to wn POS
|
| 206 |
pos_map = {
|
| 207 |
'NOUN': wn.NOUN,
|
|
@@ -240,12 +258,16 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
|
|
| 240 |
sense_data['synonyms'] = sorted(list(unique_synonyms))
|
| 241 |
synonyms_by_sense.append(sense_data)
|
| 242 |
|
| 243 |
-
return synonyms_by_sense
|
| 244 |
-
|
| 245 |
except Exception as ex:
|
| 246 |
app_logger.error(f"Error getting wn synonyms: {ex}")
|
| 247 |
raise HTTPException(status_code=500, detail=f"Error retrieving synonyms: {str(ex)}")
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
|
| 251 |
"""Adapt the input synonym arg to match the original word's grammatical form"""
|
|
|
|
| 36 |
return nlp is not None
|
| 37 |
|
| 38 |
|
|
|
|
| 39 |
def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[WordSynonymResult]:
|
| 40 |
"""
|
| 41 |
Finds synonyms for all eligible words within a selected text span.
|
|
|
|
| 176 |
|
| 177 |
def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
|
| 178 |
"""Get synonyms from wn with optional POS filtering.
|
| 179 |
+
Includes custom synonyms with a flag. Also performs a reverse lookup."""
|
| 180 |
|
| 181 |
# 1. Check for custom synonyms in in-memory store
|
| 182 |
app_logger.info("custom_synonyms:")
|
| 183 |
app_logger.info(custom_synonyms)
|
| 184 |
word_lower = word.lower()
|
| 185 |
+
synonyms_by_sense: list[dict[str, Any]] = [] # Initialize the list here
|
| 186 |
+
|
| 187 |
+
# 1. Custom Synonym Lookup and Preparation
|
| 188 |
+
custom_synset = None # Initialize to None
|
| 189 |
+
# 1. Direct Lookup: Check if the word is directly in custom_synonyms
|
| 190 |
if word_lower in custom_synonyms:
|
| 191 |
app_logger.info(f"found custom_synonyms:{custom_synonyms[word_lower]} by word:{word_lower}!")
|
| 192 |
# 2. If custom synonyms exist, create the appropriate structure and return
|
|
|
|
| 200 |
}
|
| 201 |
if pos_tag:
|
| 202 |
custom_synset["pos"] = pos_tag
|
|
|
|
| 203 |
|
| 204 |
+
# 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
|
| 205 |
+
for custom_word, synonym_list in custom_synonyms.items():
|
| 206 |
+
if word_lower in synonym_list:
|
| 207 |
+
app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{custom_word}'")
|
| 208 |
+
# Found a reverse match!
|
| 209 |
+
# Include the original custom_word in the synonym list
|
| 210 |
+
synonyms = [{"synonym": custom_word, "is_custom": True}] # Start with the original word
|
| 211 |
+
synonyms.extend([{"synonym": syn, "is_custom": True} for syn in custom_synonyms[custom_word]]) # Add the rest of the synonyms
|
| 212 |
|
| 213 |
+
custom_synset = {
|
| 214 |
+
'definition': f'User-defined synonym (reverse match for "{word}").',
|
| 215 |
+
'examples': [],
|
| 216 |
+
'synonyms': synonyms
|
| 217 |
+
}
|
| 218 |
+
if pos_tag:
|
| 219 |
+
custom_synset["pos"] = pos_tag
|
| 220 |
+
|
| 221 |
+
# 3. WordNet Lookup
|
| 222 |
+
try:
|
| 223 |
# Map spaCy POS to wn POS
|
| 224 |
pos_map = {
|
| 225 |
'NOUN': wn.NOUN,
|
|
|
|
| 258 |
sense_data['synonyms'] = sorted(list(unique_synonyms))
|
| 259 |
synonyms_by_sense.append(sense_data)
|
| 260 |
|
|
|
|
|
|
|
| 261 |
except Exception as ex:
|
| 262 |
app_logger.error(f"Error getting wn synonyms: {ex}")
|
| 263 |
raise HTTPException(status_code=500, detail=f"Error retrieving synonyms: {str(ex)}")
|
| 264 |
|
| 265 |
+
# 4. Combine Custom and WordNet Synsets
|
| 266 |
+
if custom_synset:
|
| 267 |
+
synonyms_by_sense.insert(0, custom_synset) # Add custom synset at the beginning
|
| 268 |
+
|
| 269 |
+
return synonyms_by_sense
|
| 270 |
+
|
| 271 |
|
| 272 |
def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
|
| 273 |
"""Adapt the input synonym arg to match the original word's grammatical form"""
|