|
import sys |
|
import os |
|
import json |
|
|
|
sys.path.append("./libs") |
|
from OpenDutchWordnet import Wn_grid_parser |
|
|
|
|
|
def find_all_le_hyponyms(instance, le_id): |
|
print(f"Starting from `{le_id}`...") |
|
le_el = instance.les_find_le(le_id) |
|
le_ss = le_el.get_synset_id() |
|
siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)} |
|
print(f"Siblings: {siblings}") |
|
synset_el = instance.synsets_find_synset(le_ss) |
|
print(f"Top-level synset: `{le_el.get_synset_id()}`...") |
|
hyponyms = find_all_synset_hyponyms(instance, synset_el) |
|
return siblings.union(hyponyms) |
|
|
|
|
|
def find_all_synset_hyponyms(instance, synset_el): |
|
print(f"Finding hyponyms of synset with gloss: `{synset_el.get_glosses()[:1]}`...") |
|
hypo_les = set() |
|
hypo_rels = synset_el.get_relations("has_hyponym") |
|
for rel in hypo_rels: |
|
hypo_ss = rel.get_target() |
|
print(hypo_ss) |
|
ss_les = {le.get_id() for le in instance.les_all_les_of_one_synset(hypo_ss)} |
|
for i in ss_les: |
|
print(f"\tfound LE: {i}") |
|
ss_les.update(find_all_synset_hyponyms(instance, instance.synsets_find_synset(hypo_ss))) |
|
hypo_les.update(ss_les) |
|
return hypo_les |
|
|
|
|
|
def find_siblings_and_hyperonym(instance, le_id): |
|
le_el = instance.les_find_le(le_id) |
|
le_ss = le_el.get_synset_id() |
|
siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)} |
|
print(siblings) |
|
synset_el = instance.synsets_find_synset(le_ss) |
|
hyper = synset_el.get_relations("has_hyperonym")[0] |
|
hyper_ss = instance.synsets_find_synset(hyper.get_target()) |
|
print(hyper_ss.get_glosses()) |
|
print({le.get_id() for le in instance.les_all_les_of_one_synset(hyper.get_target())}) |
|
|
|
|
|
def main(): |
|
instance = Wn_grid_parser(Wn_grid_parser.odwn) |
|
|
|
dicts = { |
|
"vehicles": { |
|
"WN:cars": sorted(find_all_le_hyponyms(instance, "automobiel-n-1")), |
|
"WN:motorbikes": sorted(find_all_le_hyponyms(instance, "motorfiets-n-1")), |
|
"WN:bikes": sorted(find_all_le_hyponyms(instance, "fiets-n-1")), |
|
"WN:buses": sorted(find_all_le_hyponyms(instance, "autobus-n-1")), |
|
"extra": sorted(["scootmobiel", "e-bike"]) |
|
}, |
|
"persons": { |
|
"WN:driver": sorted(find_all_le_hyponyms(instance, "bestuurder-n-2")), |
|
"WN:cyclist": sorted(find_all_le_hyponyms(instance, "fietser-n-1")), |
|
"WN:walker": sorted(find_all_le_hyponyms(instance, "loper-n-4")), |
|
"WN:pedestrian": sorted(find_all_le_hyponyms(instance, "voetganger-n-1")), |
|
"WN:victim": sorted(find_all_le_hyponyms(instance, "slachtoffer-n-4")), |
|
"extra": sorted( |
|
["man", "vrouw", "jongen", "meisje", "persoon", "bejaarde", "maaltijdbezorger"] |
|
) |
|
} |
|
} |
|
|
|
ignore_file = "output/crashes/predict_bechdel/lexical_dicts_ignore.json" |
|
if os.path.isfile(ignore_file): |
|
with open(ignore_file, encoding="utf-8") as f_ign: |
|
ignore = json.load(f_ign) |
|
|
|
cleaned_dicts = {} |
|
for category in dicts.keys(): |
|
cleaned_dicts[category] = {} |
|
for subcat, words in dicts[category].items(): |
|
ignore_subcat = ignore.get(category, {}).get(subcat, []) |
|
cleaned_dicts[category][subcat] = [w for w in words if w not in ignore_subcat] |
|
else: |
|
cleaned_dicts = dicts |
|
|
|
with open("output/crashes/predict_bechdel/lexical_dicts.json", "w", encoding="utf-8") as f_out: |
|
json.dump(cleaned_dicts, f_out, indent=4) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|