File size: 3,613 Bytes
cdc2127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b001a98
cdc2127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import sys

import streamlit as st
from st_aggrid import AgGrid, DataReturnMode

current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import apply_style, get_idx_column, read_csv_from_web, read_json_from_web

apply_style()

codebook = {}

st.markdown(
    """
    # Codebook Creation/Edition Tool based on the PR-ENT Approach.
    ### *Rethinking the Event Coding Pipeline with Prompt Entailment*
    ### Author: Anonymized for submission
    ##### Version: 1.0
"""
)
st.markdown("***********")

st.markdown(
    """
## Data Loading
"""
)


st.markdown(
    """
    ### Upload a CSV of event descriptions.
"""
)
uploaded_file = st.file_uploader("Upload a csv file containing event descriptions")
if uploaded_file is not None:
    st.session_state.data = read_csv_from_web(uploaded_file)


if "data" in st.session_state:
    # Filter will be reset if the page is left and then used again
    loading_df = st.text("Loading data display...")
    st.write(
        """
        The below display of the data can be used to filter the data. Click on the *3 bars logo* when hovering over a column name and the filtering
        tool will appear. Filters are kept in memory on the whole dashboard as long as the `Reset Filters` button is not clicked.

        Current limitation: If a filter is set and the user change page. Then it can not be modified anymore and needs to be reset.
    """
    )
    if "filtered_df" not in st.session_state:
        st.session_state.filtered_df = st.session_state.data
    if st.button("Reset Filters"):
        st.session_state.filtered_df = st.session_state.data

    st.session_state.filtered_df = AgGrid(
        st.session_state.filtered_df,
        height=400,
        data_return_mode=DataReturnMode.FILTERED,
        update_mode="MANUAL",
    )["data"]

    if "text_column_design_perm" not in st.session_state:
        st.session_state[
            "text_column_design_perm"
        ] = st.session_state.filtered_df.columns[0]

    def callback_function(mod, key):
        st.session_state[mod] = st.session_state[key]

    st.write("Select the column which contains the event descriptions.")
    st.selectbox(
        "Select the event description column:",
        st.session_state.filtered_df.columns,
        key="text_column_design",
        on_change=callback_function,
        args=("text_column_design_perm", "text_column_design"),
        index=get_idx_column(
            st.session_state["text_column_design_perm"],
            list(st.session_state.filtered_df.columns),
        ),
    )
    loading_df.text("")

    # Remove NaN Texts
    if st.button("Remove Empty Event Descriptions"):
        st.session_state.filtered_df = st.session_state.filtered_df.dropna(
            subset=[st.session_state["text_column_design_perm"]]
        )


st.write("********")
st.markdown("## Optional Upload")


st.markdown(
    """
    ### Upload a codebook if available. It needs to be in the format used in this dashboard.
"""
)
uploaded_codebook = st.file_uploader("Upload a codebook if available (OPTIONAL)")
if uploaded_codebook is not None:
    codebook = read_json_from_web(uploaded_codebook)
    st.session_state.codebook = codebook

st.markdown(
    """
    ### Upload a validated dataset (accept, reject, ignored) in the format of this dashboard.
"""
)

uploaded_validated_data = st.file_uploader(
    "Upload a json file containing validated data (OPTIONAL)"
)
if uploaded_validated_data is not None:
    st.session_state.validated_data = read_json_from_web(uploaded_validated_data)