Spaces:

responsibility-framing
/

sociofillmore_public

Running

App Files Files Community

sociofillmore_public / sociofillmore /crashes /make_bechdel_dicts.py

Gosse Minnema

Add sociofillmore code, load dataset via private dataset repo

b11ac48 6 months ago

raw

history blame

No virus

3.64 kB

	import sys
	import os
	import json

	sys.path.append("./libs")
	from OpenDutchWordnet import Wn_grid_parser


	def find_all_le_hyponyms(instance, le_id):
	print(f"Starting from `{le_id}`...")
	le_el = instance.les_find_le(le_id)
	le_ss = le_el.get_synset_id()
	siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)}
	print(f"Siblings: {siblings}")
	synset_el = instance.synsets_find_synset(le_ss)
	print(f"Top-level synset: `{le_el.get_synset_id()}`...")
	hyponyms = find_all_synset_hyponyms(instance, synset_el)
	return siblings.union(hyponyms)


	def find_all_synset_hyponyms(instance, synset_el):
	print(f"Finding hyponyms of synset with gloss: `{synset_el.get_glosses()[:1]}`...")
	hypo_les = set()
	hypo_rels = synset_el.get_relations("has_hyponym")
	for rel in hypo_rels:
	hypo_ss = rel.get_target()
	print(hypo_ss)
	ss_les = {le.get_id() for le in instance.les_all_les_of_one_synset(hypo_ss)}
	for i in ss_les:
	print(f"\tfound LE: {i}")
	ss_les.update(find_all_synset_hyponyms(instance, instance.synsets_find_synset(hypo_ss)))
	hypo_les.update(ss_les)
	return hypo_les


	def find_siblings_and_hyperonym(instance, le_id):
	le_el = instance.les_find_le(le_id)
	le_ss = le_el.get_synset_id()
	siblings = {le.get_id() for le in instance.les_all_les_of_one_synset(le_ss)}
	print(siblings)
	synset_el = instance.synsets_find_synset(le_ss)
	hyper = synset_el.get_relations("has_hyperonym")[0]
	hyper_ss = instance.synsets_find_synset(hyper.get_target())
	print(hyper_ss.get_glosses())
	print({le.get_id() for le in instance.les_all_les_of_one_synset(hyper.get_target())})


	def main():
	instance = Wn_grid_parser(Wn_grid_parser.odwn)
	# find_all_le_hyponyms(instance, "slachtoffer-n-4")
	dicts = {
	"vehicles": {
	"WN:cars": sorted(find_all_le_hyponyms(instance, "automobiel-n-1")),
	"WN:motorbikes": sorted(find_all_le_hyponyms(instance, "motorfiets-n-1")),
	"WN:bikes": sorted(find_all_le_hyponyms(instance, "fiets-n-1")),
	"WN:buses": sorted(find_all_le_hyponyms(instance, "autobus-n-1")),
	"extra": sorted(["scootmobiel", "e-bike"])
	},
	"persons": {
	"WN:driver": sorted(find_all_le_hyponyms(instance, "bestuurder-n-2")),
	"WN:cyclist": sorted(find_all_le_hyponyms(instance, "fietser-n-1")),
	"WN:walker": sorted(find_all_le_hyponyms(instance, "loper-n-4")),
	"WN:pedestrian": sorted(find_all_le_hyponyms(instance, "voetganger-n-1")),
	"WN:victim": sorted(find_all_le_hyponyms(instance, "slachtoffer-n-4")),
	"extra": sorted(
	["man", "vrouw", "jongen", "meisje", "persoon", "bejaarde", "maaltijdbezorger"]
	)
	}
	}

	ignore_file = "output/crashes/predict_bechdel/lexical_dicts_ignore.json"
	if os.path.isfile(ignore_file):
	with open(ignore_file, encoding="utf-8") as f_ign:
	ignore = json.load(f_ign)

	cleaned_dicts = {}
	for category in dicts.keys():
	cleaned_dicts[category] = {}
	for subcat, words in dicts[category].items():
	ignore_subcat = ignore.get(category, {}).get(subcat, [])
	cleaned_dicts[category][subcat] = [w for w in words if w not in ignore_subcat]
	else:
	cleaned_dicts = dicts

	with open("output/crashes/predict_bechdel/lexical_dicts.json", "w", encoding="utf-8") as f_out:
	json.dump(cleaned_dicts, f_out, indent=4)


	if __name__ == "__main__":
	main()