import sys import os import json sys.path.append("./libs") from OpenDutchWordnet import Wn_grid_parser def find_all_le_hyponyms(instance, le_id): print(f"Starting from `{le_id}`...") le_el = instance.les_find_le(le_id) le_ss = le_el.get_synset_id() siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)} print(f"Siblings: {siblings}") synset_el = instance.synsets_find_synset(le_ss) print(f"Top-level synset: `{le_el.get_synset_id()}`...") hyponyms = find_all_synset_hyponyms(instance, synset_el) return siblings.union(hyponyms) def find_all_synset_hyponyms(instance, synset_el): print(f"Finding hyponyms of synset with gloss: `{synset_el.get_glosses()[:1]}`...") hypo_les = set() hypo_rels = synset_el.get_relations("has_hyponym") for rel in hypo_rels: hypo_ss = rel.get_target() print(hypo_ss) ss_les = {le.get_id() for le in instance.les_all_les_of_one_synset(hypo_ss)} for i in ss_les: print(f"\tfound LE: {i}") ss_les.update(find_all_synset_hyponyms(instance, instance.synsets_find_synset(hypo_ss))) hypo_les.update(ss_les) return hypo_les def find_siblings_and_hyperonym(instance, le_id): le_el = instance.les_find_le(le_id) le_ss = le_el.get_synset_id() siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)} print(siblings) synset_el = instance.synsets_find_synset(le_ss) hyper = synset_el.get_relations("has_hyperonym")[0] hyper_ss = instance.synsets_find_synset(hyper.get_target()) print(hyper_ss.get_glosses()) print({le.get_id() for le in instance.les_all_les_of_one_synset(hyper.get_target())}) def main(): instance = Wn_grid_parser(Wn_grid_parser.odwn) # find_all_le_hyponyms(instance, "slachtoffer-n-4") dicts = { "vehicles": { "WN:cars": sorted(find_all_le_hyponyms(instance, "automobiel-n-1")), "WN:motorbikes": sorted(find_all_le_hyponyms(instance, "motorfiets-n-1")), "WN:bikes": sorted(find_all_le_hyponyms(instance, "fiets-n-1")), "WN:buses": sorted(find_all_le_hyponyms(instance, "autobus-n-1")), "extra": sorted(["scootmobiel", "e-bike"]) }, "persons": { "WN:driver": sorted(find_all_le_hyponyms(instance, "bestuurder-n-2")), "WN:cyclist": sorted(find_all_le_hyponyms(instance, "fietser-n-1")), "WN:walker": sorted(find_all_le_hyponyms(instance, "loper-n-4")), "WN:pedestrian": sorted(find_all_le_hyponyms(instance, "voetganger-n-1")), "WN:victim": sorted(find_all_le_hyponyms(instance, "slachtoffer-n-4")), "extra": sorted( ["man", "vrouw", "jongen", "meisje", "persoon", "bejaarde", "maaltijdbezorger"] ) } } ignore_file = "output/crashes/predict_bechdel/lexical_dicts_ignore.json" if os.path.isfile(ignore_file): with open(ignore_file, encoding="utf-8") as f_ign: ignore = json.load(f_ign) cleaned_dicts = {} for category in dicts.keys(): cleaned_dicts[category] = {} for subcat, words in dicts[category].items(): ignore_subcat = ignore.get(category, {}).get(subcat, []) cleaned_dicts[category][subcat] = [w for w in words if w not in ignore_subcat] else: cleaned_dicts = dicts with open("output/crashes/predict_bechdel/lexical_dicts.json", "w", encoding="utf-8") as f_out: json.dump(cleaned_dicts, f_out, indent=4) if __name__ == "__main__": main()