Spaces:
Runtime error
Runtime error
File size: 3,759 Bytes
a19c1c5 ff1c674 23993f9 ff1c674 a19c1c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import os
import sys
import streamlit as st
from st_aggrid import AgGrid, DataReturnMode
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import apply_style, get_idx_column, read_csv_from_web, read_json_from_web
apply_style()
codebook = {}
# TODO: Anonymize for paper
st.markdown(
"""
# Codebook Creation/Edition Tool based on the PR-ENT Approach.
### *Rethinking the Event Coding Pipeline with Prompt Entailment*
### https://arxiv.org/abs/2210.05257
### Clément Lefebvre (Swiss Data Science Center)
### Niklas Stoehr (ETH Zürich, https://niklas-stoehr.com/)
##### Version: 1.0
"""
)
st.markdown("***********")
st.markdown(
"""
## Data Loading
"""
)
st.markdown(
"""
### Upload a CSV of event descriptions.
"""
)
uploaded_file = st.file_uploader("Upload a csv file containing event descriptions")
if uploaded_file is not None:
st.session_state.data = read_csv_from_web(uploaded_file)
if "data" in st.session_state:
# Filter will be reset if the page is left and then used again
loading_df = st.text("Loading data display...")
st.write(
"""
The below display of the data can be used to filter the data. Click on the *3 bars logo* when hovering over a column name and the filtering
tool will appear. Filters are kept in memory on the whole dashboard as long as the `Reset Filters` button is not clicked.
Current limitation: If a filter is set and the user change page. Then it can not be modified anymore and needs to be reset.
"""
)
if "filtered_df" not in st.session_state:
st.session_state.filtered_df = st.session_state.data
if st.button("Reset Filters"):
st.session_state.filtered_df = st.session_state.data
st.session_state.filtered_df = AgGrid(
st.session_state.filtered_df,
height=400,
data_return_mode=DataReturnMode.FILTERED,
update_mode="MANUAL",
)["data"]
if "text_column_design_perm" not in st.session_state:
st.session_state[
"text_column_design_perm"
] = st.session_state.filtered_df.columns[0]
def callback_function(mod, key):
st.session_state[mod] = st.session_state[key]
st.write("Select the column which contains the event descriptions.")
st.selectbox(
"Select the event description column:",
st.session_state.filtered_df.columns,
key="text_column_design",
on_change=callback_function,
args=("text_column_design_perm", "text_column_design"),
index=get_idx_column(
st.session_state["text_column_design_perm"],
list(st.session_state.filtered_df.columns),
),
)
loading_df.text("")
# Remove NaN Texts
if st.button("Remove Empty Event Descriptions"):
st.session_state.filtered_df = st.session_state.filtered_df.dropna(
subset=[st.session_state["text_column_design_perm"]]
)
st.write("********")
st.markdown("## Optional Upload")
st.markdown(
"""
### Upload a codebook if available. It needs to be in the format used in this dashboard.
"""
)
uploaded_codebook = st.file_uploader("Upload a codebook if available (OPTIONAL)")
if uploaded_codebook is not None:
codebook = read_json_from_web(uploaded_codebook)
st.session_state.codebook = codebook
st.markdown(
"""
### Upload a validated dataset (accept, reject, ignored) in the format of this dashboard.
"""
)
uploaded_validated_data = st.file_uploader(
"Upload a json file containing validated data (OPTIONAL)"
)
if uploaded_validated_data is not None:
st.session_state.validated_data = read_json_from_web(uploaded_validated_data)
|