File size: 3,759 Bytes
a19c1c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff1c674
23993f9
ff1c674
 
a19c1c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import sys

import streamlit as st
from st_aggrid import AgGrid, DataReturnMode

current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import apply_style, get_idx_column, read_csv_from_web, read_json_from_web

apply_style()

codebook = {}

# TODO: Anonymize for paper
st.markdown(
    """
    # Codebook Creation/Edition Tool based on the PR-ENT Approach.
    ### *Rethinking the Event Coding Pipeline with Prompt Entailment*
    ### https://arxiv.org/abs/2210.05257
    ### Clément Lefebvre (Swiss Data Science Center)
    ### Niklas Stoehr (ETH Zürich, https://niklas-stoehr.com/)
    ##### Version: 1.0
"""
)

st.markdown("***********")

st.markdown(
    """
## Data Loading
"""
)


st.markdown(
    """
    ### Upload a CSV of event descriptions.
"""
)
uploaded_file = st.file_uploader("Upload a csv file containing event descriptions")
if uploaded_file is not None:
    st.session_state.data = read_csv_from_web(uploaded_file)


if "data" in st.session_state:
    # Filter will be reset if the page is left and then used again
    loading_df = st.text("Loading data display...")
    st.write(
        """
        The below display of the data can be used to filter the data. Click on the *3 bars logo* when hovering over a column name and the filtering
        tool will appear. Filters are kept in memory on the whole dashboard as long as the `Reset Filters` button is not clicked.

        Current limitation: If a filter is set and the user change page. Then it can not be modified anymore and needs to be reset.
    """
    )
    if "filtered_df" not in st.session_state:
        st.session_state.filtered_df = st.session_state.data
    if st.button("Reset Filters"):
        st.session_state.filtered_df = st.session_state.data

    st.session_state.filtered_df = AgGrid(
        st.session_state.filtered_df,
        height=400,
        data_return_mode=DataReturnMode.FILTERED,
        update_mode="MANUAL",
    )["data"]

    if "text_column_design_perm" not in st.session_state:
        st.session_state[
            "text_column_design_perm"
        ] = st.session_state.filtered_df.columns[0]

    def callback_function(mod, key):
        st.session_state[mod] = st.session_state[key]

    st.write("Select the column which contains the event descriptions.")
    st.selectbox(
        "Select the event description column:",
        st.session_state.filtered_df.columns,
        key="text_column_design",
        on_change=callback_function,
        args=("text_column_design_perm", "text_column_design"),
        index=get_idx_column(
            st.session_state["text_column_design_perm"],
            list(st.session_state.filtered_df.columns),
        ),
    )
    loading_df.text("")

    # Remove NaN Texts
    if st.button("Remove Empty Event Descriptions"):
        st.session_state.filtered_df = st.session_state.filtered_df.dropna(
            subset=[st.session_state["text_column_design_perm"]]
        )


st.write("********")
st.markdown("## Optional Upload")


st.markdown(
    """
    ### Upload a codebook if available. It needs to be in the format used in this dashboard.
"""
)
uploaded_codebook = st.file_uploader("Upload a codebook if available (OPTIONAL)")
if uploaded_codebook is not None:
    codebook = read_json_from_web(uploaded_codebook)
    st.session_state.codebook = codebook

st.markdown(
    """
    ### Upload a validated dataset (accept, reject, ignored) in the format of this dashboard.
"""
)

uploaded_validated_data = st.file_uploader(
    "Upload a json file containing validated data (OPTIONAL)"
)
if uploaded_validated_data is not None:
    st.session_state.validated_data = read_json_from_web(uploaded_validated_data)