import streamlit as st import pandas as pd import plotly.graph_objects as go import plotly.express as px from ast import literal_eval from lxml import etree as ET def prepare_data(): data = pd.read_csv( "data/2002829_mapped_roles.csv", index_col=0, converters={"frame": literal_eval, "changed_roles": literal_eval, "unchanged_roles": literal_eval, "roles": literal_eval} ) frame_to_scenario, frame_to_super_scenario = load_kicktionary_info() # extract information from "frame" tuples, filter & reorder columns data_ = ( data .assign(sentence_idx=data["frame"].apply(lambda frame: frame[0])) .assign(frame_idx=data["frame"].apply(lambda frame: frame[1])) .assign(frame_name=data["frame"].apply(lambda frame: frame[2])) .assign(frame_scenario=data["frame"].apply(lambda frame: frame_to_scenario[frame[2]])) .assign(frame_super_scenario=data["frame"].apply(lambda frame: frame_to_super_scenario[frame[2]])) .assign(frame_target=data["frame"].apply(lambda frame: frame[3])) .drop(columns=["frame"]) )[["sentence_idx", "frame_idx", "frame_name", "frame_scenario", "frame_super_scenario", "frame_target", "changed_roles", "roles"]] # assign value in 0 < t < 1 to represent each frame instance's "time" point in the article max_sent = max(data_["sentence_idx"]) max_frame_per_sent = data_.groupby("sentence_idx").agg({"frame_idx": max}).reset_index() sent_to_max_frame = dict(zip(max_frame_per_sent["sentence_idx"], max_frame_per_sent["frame_idx"])) data_with_time = data_.assign( time_point= (data_ .apply(lambda row: (row["sentence_idx"] + row["frame_idx"] / (sent_to_max_frame[row["sentence_idx"]])) / (max_sent + 1), axis=1) ) ) data_with_first_roles = data_with_time.assign( first_role = data_with_time["changed_roles"].apply(lambda roles: roles[0] if len(roles) > 0 else None) ) return data_with_first_roles def load_kicktionary_info(): kicktionary = ET.parse("kicktionary_lu_info.xml") frame_to_scenario = { lu.attrib["frame"]: lu.attrib["scenario"] for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"] } frame_to_super_scenario = { lu.attrib["frame"]: lu.attrib["super-scenario"] for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"] } return frame_to_scenario, frame_to_super_scenario def explore_timeline(): data = prepare_data() with st.container(): st.title("Football Perspective Chains") frame_label_map = { "frame_name": "frames", "frame_scenario": "scenarios (groups of related frames)", "frame_super_scenario": "super scenarios (groups of related scenarios)" } frame_column = st.selectbox( label="Display frames as: ", options=("frame_name", "frame_scenario", "frame_super_scenario"), format_func=lambda label: frame_label_map[label] ) frame_options = sorted(data[frame_column].value_counts().keys()) selected_frames = st.multiselect( label="frame subset selection", options=frame_options, default=frame_options ) st.header("Timeline") time_scatter = ( data [data[frame_column].isin(selected_frames)] .dropna(axis=0, subset=["first_role"]) .plot.scatter( x="first_role", y="time_point", backend="plotly", color=frame_column ) ) time_scatter.update_traces(marker_size=20) time_scatter.update_layout(height=1000) st.plotly_chart(time_scatter) st.header("Overall focus") focus_bar = ( data [data[frame_column].isin(selected_frames)] .dropna(axis=0, subset=["first_role"])["first_role"] .value_counts().plot.bar(y="first_role", backend="plotly") ) st.plotly_chart(focus_bar) st.header("Focus by frame") for team in ["Man. United", "Rangers"]: st.subheader(team) frame_bar = ( data .pipe(lambda df: df[df[frame_column].isin(selected_frames)]) .pipe(lambda df: df[df["first_role"] == team][frame_column]) .value_counts().plot.bar(y=frame_column, backend="plotly") ) st.plotly_chart(frame_bar) if __name__ == "__main__": explore_timeline()