gossminn's picture
Add frame subset selector
fda69e1
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from ast import literal_eval
from lxml import etree as ET
def prepare_data():
data = pd.read_csv(
"data/2002829_mapped_roles.csv", index_col=0,
converters={"frame": literal_eval, "changed_roles": literal_eval, "unchanged_roles": literal_eval, "roles": literal_eval}
)
frame_to_scenario, frame_to_super_scenario = load_kicktionary_info()
# extract information from "frame" tuples, filter & reorder columns
data_ = (
data
.assign(sentence_idx=data["frame"].apply(lambda frame: frame[0]))
.assign(frame_idx=data["frame"].apply(lambda frame: frame[1]))
.assign(frame_name=data["frame"].apply(lambda frame: frame[2]))
.assign(frame_scenario=data["frame"].apply(lambda frame: frame_to_scenario[frame[2]]))
.assign(frame_super_scenario=data["frame"].apply(lambda frame: frame_to_super_scenario[frame[2]]))
.assign(frame_target=data["frame"].apply(lambda frame: frame[3]))
.drop(columns=["frame"])
)[["sentence_idx", "frame_idx", "frame_name", "frame_scenario", "frame_super_scenario", "frame_target", "changed_roles", "roles"]]
# assign value in 0 < t < 1 to represent each frame instance's "time" point in the article
max_sent = max(data_["sentence_idx"])
max_frame_per_sent = data_.groupby("sentence_idx").agg({"frame_idx": max}).reset_index()
sent_to_max_frame = dict(zip(max_frame_per_sent["sentence_idx"], max_frame_per_sent["frame_idx"]))
data_with_time = data_.assign(
time_point= (data_
.apply(lambda row: (row["sentence_idx"] + row["frame_idx"] / (sent_to_max_frame[row["sentence_idx"]])) / (max_sent + 1), axis=1)
)
)
data_with_first_roles = data_with_time.assign(
first_role = data_with_time["changed_roles"].apply(lambda roles: roles[0] if len(roles) > 0 else None)
)
return data_with_first_roles
def load_kicktionary_info():
kicktionary = ET.parse("kicktionary_lu_info.xml")
frame_to_scenario = {
lu.attrib["frame"]: lu.attrib["scenario"]
for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
}
frame_to_super_scenario = {
lu.attrib["frame"]: lu.attrib["super-scenario"]
for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
}
return frame_to_scenario, frame_to_super_scenario
def explore_timeline():
data = prepare_data()
with st.container():
st.title("Football Perspective Chains")
frame_label_map = {
"frame_name": "frames",
"frame_scenario": "scenarios (groups of related frames)",
"frame_super_scenario": "super scenarios (groups of related scenarios)"
}
frame_column = st.selectbox(
label="Display frames as: ",
options=("frame_name", "frame_scenario", "frame_super_scenario"),
format_func=lambda label: frame_label_map[label]
)
frame_options = sorted(data[frame_column].value_counts().keys())
selected_frames = st.multiselect(
label="frame subset selection",
options=frame_options,
default=frame_options
)
st.header("Timeline")
time_scatter = (
data
[data[frame_column].isin(selected_frames)]
.dropna(axis=0, subset=["first_role"])
.plot.scatter(
x="first_role", y="time_point", backend="plotly", color=frame_column
)
)
time_scatter.update_traces(marker_size=20)
time_scatter.update_layout(height=1000)
st.plotly_chart(time_scatter)
st.header("Overall focus")
focus_bar = (
data
[data[frame_column].isin(selected_frames)]
.dropna(axis=0, subset=["first_role"])["first_role"]
.value_counts().plot.bar(y="first_role", backend="plotly")
)
st.plotly_chart(focus_bar)
st.header("Focus by frame")
for team in ["Man. United", "Rangers"]:
st.subheader(team)
frame_bar = (
data
.pipe(lambda df: df[df[frame_column].isin(selected_frames)])
.pipe(lambda df: df[df["first_role"] == team][frame_column])
.value_counts().plot.bar(y=frame_column, backend="plotly")
)
st.plotly_chart(frame_bar)
if __name__ == "__main__":
explore_timeline()