Spaces:

responsibility-framing
/

sociofillmore_public

Running

Gosse Minnema

Add sociofillmore code, load dataset via private dataset repo

b11ac48 5 months ago

No virus

1.9 kB

	from collections import defaultdict
	import json

	import pandas as pd

	from sociofillmore.common.analyze_text import analyze_single_document, load_deep_frames_cache


	def main():
	frames_of_interest = ["Killing", "Death", "Dead_or_alive", "Event", "Catastrophe", "Undergoing"]

	texts_df = pd.read_csv(
	"output/femicides/split_data/rai/split_main.texts.meta.csv")
	deep_frames_cache = load_deep_frames_cache()

	fcp_to_sentences = defaultdict(list)

	for i, (_, row) in enumerate(texts_df.iterrows()):
	if i % 100 == 0:
	print(i)

	doc_analysis = analyze_single_document(row["text_id"], row["event_id"], "lome_0shot",
	"femicides/rai_main", texts_df, deep_frames_cache)
	for i, sent_analysis in enumerate(doc_analysis):
	sentence = " ".join(sent_analysis["sentence"])

	for fn_st in sent_analysis["fn_structures"]:
	frame = fn_st["frame"]
	tgt_idx = str(fn_st["target"]["tokens_idx"][0])
	if frame in frames_of_interest:
	construction = sent_analysis["syntax"][tgt_idx][0]["syn_construction"]
	fcp_to_sentences[f"{frame}++{construction}"].append({
	"event_id": row["event_id"],
	"frame": frame,
	"construction": construction,
	"target": fn_st["target"]["tokens_str"],
	"text_id": row["text_id"],
	"sentence_idx": i,
	"sentence_str": sentence,
	"selected_frame": frame,
	"selected_cx": construction
	})

	with open("output/scoring/extracted_frames.json", "w") as f:
	json.dump(fcp_to_sentences, f, indent=4, sort_keys=True)


	if __name__ == "__main__":
	main()