Spaces:
Build error
Build error
import sys | |
import os | |
import json | |
sys.path.append("./libs") | |
from OpenDutchWordnet import Wn_grid_parser | |
def find_all_le_hyponyms(instance, le_id): | |
print(f"Starting from `{le_id}`...") | |
le_el = instance.les_find_le(le_id) | |
le_ss = le_el.get_synset_id() | |
siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)} | |
print(f"Siblings: {siblings}") | |
synset_el = instance.synsets_find_synset(le_ss) | |
print(f"Top-level synset: `{le_el.get_synset_id()}`...") | |
hyponyms = find_all_synset_hyponyms(instance, synset_el) | |
return siblings.union(hyponyms) | |
def find_all_synset_hyponyms(instance, synset_el): | |
print(f"Finding hyponyms of synset with gloss: `{synset_el.get_glosses()[:1]}`...") | |
hypo_les = set() | |
hypo_rels = synset_el.get_relations("has_hyponym") | |
for rel in hypo_rels: | |
hypo_ss = rel.get_target() | |
print(hypo_ss) | |
ss_les = {le.get_id() for le in instance.les_all_les_of_one_synset(hypo_ss)} | |
for i in ss_les: | |
print(f"\tfound LE: {i}") | |
ss_les.update(find_all_synset_hyponyms(instance, instance.synsets_find_synset(hypo_ss))) | |
hypo_les.update(ss_les) | |
return hypo_les | |
def find_siblings_and_hyperonym(instance, le_id): | |
le_el = instance.les_find_le(le_id) | |
le_ss = le_el.get_synset_id() | |
siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)} | |
print(siblings) | |
synset_el = instance.synsets_find_synset(le_ss) | |
hyper = synset_el.get_relations("has_hyperonym")[0] | |
hyper_ss = instance.synsets_find_synset(hyper.get_target()) | |
print(hyper_ss.get_glosses()) | |
print({le.get_id() for le in instance.les_all_les_of_one_synset(hyper.get_target())}) | |
def main(): | |
instance = Wn_grid_parser(Wn_grid_parser.odwn) | |
# find_all_le_hyponyms(instance, "slachtoffer-n-4") | |
dicts = { | |
"vehicles": { | |
"WN:cars": sorted(find_all_le_hyponyms(instance, "automobiel-n-1")), | |
"WN:motorbikes": sorted(find_all_le_hyponyms(instance, "motorfiets-n-1")), | |
"WN:bikes": sorted(find_all_le_hyponyms(instance, "fiets-n-1")), | |
"WN:buses": sorted(find_all_le_hyponyms(instance, "autobus-n-1")), | |
"extra": sorted(["scootmobiel", "e-bike"]) | |
}, | |
"persons": { | |
"WN:driver": sorted(find_all_le_hyponyms(instance, "bestuurder-n-2")), | |
"WN:cyclist": sorted(find_all_le_hyponyms(instance, "fietser-n-1")), | |
"WN:walker": sorted(find_all_le_hyponyms(instance, "loper-n-4")), | |
"WN:pedestrian": sorted(find_all_le_hyponyms(instance, "voetganger-n-1")), | |
"WN:victim": sorted(find_all_le_hyponyms(instance, "slachtoffer-n-4")), | |
"extra": sorted( | |
["man", "vrouw", "jongen", "meisje", "persoon", "bejaarde", "maaltijdbezorger"] | |
) | |
} | |
} | |
ignore_file = "output/crashes/predict_bechdel/lexical_dicts_ignore.json" | |
if os.path.isfile(ignore_file): | |
with open(ignore_file, encoding="utf-8") as f_ign: | |
ignore = json.load(f_ign) | |
cleaned_dicts = {} | |
for category in dicts.keys(): | |
cleaned_dicts[category] = {} | |
for subcat, words in dicts[category].items(): | |
ignore_subcat = ignore.get(category, {}).get(subcat, []) | |
cleaned_dicts[category][subcat] = [w for w in words if w not in ignore_subcat] | |
else: | |
cleaned_dicts = dicts | |
with open("output/crashes/predict_bechdel/lexical_dicts.json", "w", encoding="utf-8") as f_out: | |
json.dump(cleaned_dicts, f_out, indent=4) | |
if __name__ == "__main__": | |
main() | |