|
from collections import defaultdict |
|
import json |
|
|
|
import pandas as pd |
|
|
|
from sociofillmore.common.analyze_text import analyze_single_document, load_deep_frames_cache |
|
|
|
|
|
def main(): |
|
frames_of_interest = ["Killing", "Death", "Dead_or_alive", "Event", "Catastrophe", "Undergoing"] |
|
|
|
texts_df = pd.read_csv( |
|
"output/femicides/split_data/rai/split_main.texts.meta.csv") |
|
deep_frames_cache = load_deep_frames_cache() |
|
|
|
fcp_to_sentences = defaultdict(list) |
|
|
|
for i, (_, row) in enumerate(texts_df.iterrows()): |
|
if i % 100 == 0: |
|
print(i) |
|
|
|
doc_analysis = analyze_single_document(row["text_id"], row["event_id"], "lome_0shot", |
|
"femicides/rai_main", texts_df, deep_frames_cache) |
|
for i, sent_analysis in enumerate(doc_analysis): |
|
sentence = " ".join(sent_analysis["sentence"]) |
|
|
|
for fn_st in sent_analysis["fn_structures"]: |
|
frame = fn_st["frame"] |
|
tgt_idx = str(fn_st["target"]["tokens_idx"][0]) |
|
if frame in frames_of_interest: |
|
construction = sent_analysis["syntax"][tgt_idx][0]["syn_construction"] |
|
fcp_to_sentences[f"{frame}++{construction}"].append({ |
|
"event_id": row["event_id"], |
|
"frame": frame, |
|
"construction": construction, |
|
"target": fn_st["target"]["tokens_str"], |
|
"text_id": row["text_id"], |
|
"sentence_idx": i, |
|
"sentence_str": sentence, |
|
"selected_frame": frame, |
|
"selected_cx": construction |
|
}) |
|
|
|
with open("output/scoring/extracted_frames.json", "w") as f: |
|
json.dump(fcp_to_sentences, f, indent=4, sort_keys=True) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|