from collections import defaultdict import json import pandas as pd from sociofillmore.common.analyze_text import analyze_single_document, load_deep_frames_cache def main(): frames_of_interest = ["Killing", "Death", "Dead_or_alive", "Event", "Catastrophe", "Undergoing"] texts_df = pd.read_csv( "output/femicides/split_data/rai/split_main.texts.meta.csv") deep_frames_cache = load_deep_frames_cache() fcp_to_sentences = defaultdict(list) for i, (_, row) in enumerate(texts_df.iterrows()): if i % 100 == 0: print(i) doc_analysis = analyze_single_document(row["text_id"], row["event_id"], "lome_0shot", "femicides/rai_main", texts_df, deep_frames_cache) for i, sent_analysis in enumerate(doc_analysis): sentence = " ".join(sent_analysis["sentence"]) for fn_st in sent_analysis["fn_structures"]: frame = fn_st["frame"] tgt_idx = str(fn_st["target"]["tokens_idx"][0]) if frame in frames_of_interest: construction = sent_analysis["syntax"][tgt_idx][0]["syn_construction"] fcp_to_sentences[f"{frame}++{construction}"].append({ "event_id": row["event_id"], "frame": frame, "construction": construction, "target": fn_st["target"]["tokens_str"], "text_id": row["text_id"], "sentence_idx": i, "sentence_str": sentence, "selected_frame": frame, "selected_cx": construction }) with open("output/scoring/extracted_frames.json", "w") as f: json.dump(fcp_to_sentences, f, indent=4, sort_keys=True) if __name__ == "__main__": main()