import sys
import os
import json

sys.path.append("./libs")
from OpenDutchWordnet import Wn_grid_parser


def find_all_le_hyponyms(instance, le_id):
    print(f"Starting from `{le_id}`...")
    le_el = instance.les_find_le(le_id)
    le_ss = le_el.get_synset_id()
    siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)}
    print(f"Siblings: {siblings}")
    synset_el = instance.synsets_find_synset(le_ss)
    print(f"Top-level synset: `{le_el.get_synset_id()}`...")
    hyponyms = find_all_synset_hyponyms(instance, synset_el)
    return siblings.union(hyponyms)
    

def find_all_synset_hyponyms(instance, synset_el):
    print(f"Finding hyponyms of synset with gloss: `{synset_el.get_glosses()[:1]}`...")
    hypo_les = set()
    hypo_rels = synset_el.get_relations("has_hyponym")
    for rel in hypo_rels:
        hypo_ss = rel.get_target()
        print(hypo_ss)
        ss_les = {le.get_id() for le in instance.les_all_les_of_one_synset(hypo_ss)}
        for i in ss_les:
            print(f"\tfound LE: {i}")
        ss_les.update(find_all_synset_hyponyms(instance, instance.synsets_find_synset(hypo_ss)))
        hypo_les.update(ss_les)
    return hypo_les


def find_siblings_and_hyperonym(instance, le_id):
    le_el = instance.les_find_le(le_id)
    le_ss = le_el.get_synset_id()
    siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)}
    print(siblings)
    synset_el = instance.synsets_find_synset(le_ss)
    hyper = synset_el.get_relations("has_hyperonym")[0]
    hyper_ss = instance.synsets_find_synset(hyper.get_target())
    print(hyper_ss.get_glosses())
    print({le.get_id() for le in instance.les_all_les_of_one_synset(hyper.get_target())})


def main():
    instance = Wn_grid_parser(Wn_grid_parser.odwn)
    # find_all_le_hyponyms(instance, "slachtoffer-n-4")
    dicts = {
        "vehicles": {
            "WN:cars": sorted(find_all_le_hyponyms(instance, "automobiel-n-1")),
            "WN:motorbikes": sorted(find_all_le_hyponyms(instance, "motorfiets-n-1")),
            "WN:bikes": sorted(find_all_le_hyponyms(instance, "fiets-n-1")),
            "WN:buses": sorted(find_all_le_hyponyms(instance, "autobus-n-1")),
            "extra": sorted(["scootmobiel", "e-bike"])
        },
        "persons": {
            "WN:driver": sorted(find_all_le_hyponyms(instance, "bestuurder-n-2")),
            "WN:cyclist": sorted(find_all_le_hyponyms(instance, "fietser-n-1")),
            "WN:walker": sorted(find_all_le_hyponyms(instance, "loper-n-4")),
            "WN:pedestrian": sorted(find_all_le_hyponyms(instance, "voetganger-n-1")),
            "WN:victim": sorted(find_all_le_hyponyms(instance, "slachtoffer-n-4")),
            "extra": sorted(
                ["man", "vrouw", "jongen", "meisje", "persoon", "bejaarde", "maaltijdbezorger"]
                )
        }
    }

    ignore_file = "output/crashes/predict_bechdel/lexical_dicts_ignore.json"
    if os.path.isfile(ignore_file):
        with open(ignore_file, encoding="utf-8") as f_ign:
            ignore = json.load(f_ign)
        
        cleaned_dicts = {}
        for category in dicts.keys():
            cleaned_dicts[category] = {}
            for subcat, words in dicts[category].items():
                ignore_subcat = ignore.get(category, {}).get(subcat, [])
                cleaned_dicts[category][subcat] = [w for w in words if w not in ignore_subcat]
    else:
        cleaned_dicts = dicts

    with open("output/crashes/predict_bechdel/lexical_dicts.json", "w", encoding="utf-8") as f_out:
        json.dump(cleaned_dicts, f_out, indent=4) 


if __name__ == "__main__":
    main()