ts-explorations / app.py
laverdes's picture
feat: app version 1.0
d391513
import streamlit as st
import pathlib
import json
import pandas as pd
st.header("Time Series Preprocessing Pipeline")
st.markdown("Users can load their time-series data and select a set of transformations to prepare a training set for univariate or multivariate time-series classification.\
Go ahead and use the sidebar on the left to upload your data files in *.json* format and start exploring and transforming it!")
col1, col2 = st.columns(2)
file_names, file_bytes = [], []
with st.sidebar:
files = st.file_uploader("Load files", accept_multiple_files = True)
if files:
file_names = [file.name for file in files]
file_bytes = [file.getvalue() for file in files]
st.text("\n".join(file_names))
data_dict = dict({'trial_id':[], 'pupil_dilation':[], 'baseline':[], 'rating':[]})
with st.spinner("building base dictionary..."):
for file_data in file_bytes:
data = json.loads(file_data)
for k in data:
for i in data[k]:
for k, v in i.items():
data_dict[k].append(v)
df_base = pd.DataFrame() # {'<fields>' : []})
with col1:
if file_bytes:
with st.spinner("building base dataframe..."):
df_base = pd.DataFrame.from_dict(data_dict)
df_base["trial_id"] = df_base.trial_id.map(lambda s: "".join([c for c in s if c.isdigit()]))
df_base["len_pupil_dilation"] = df_base.pupil_dilation.map(lambda l: len(l))
df_base["len_baseline"] = df_base.baseline.map(lambda l: len(l))
st.info(f"number of files: {len(file_names)}")
st.markdown("Your original data")
st.dataframe(df_base)
else:
st.caption("Upload your data from the sidebar to start :sunglasses:")
with col2:
if not df_base.empty:
st.markdown("**Cleaning actions**")
detect_blinking = st.button("Detect blinking ('0.0' values)")
number_of_blinks = 0
if detect_blinking:
# Initialization of session_state
if 'df' not in st.session_state:
st.session_state['df'] = df_base
for ser in df_base['pupil_dilation']:
for f in ser:
if f == 0.0:
number_of_blinks += 1
for ser in df_base['baseline']:
for f in ser:
if f == 0.0:
number_of_blinks += 1
# Initialization of session_state
if 'blinks' not in st.session_state:
st.session_state['blinks'] = number_of_blinks
if "blinks" in st.session_state.keys():
st.info(f"blinking values (0.0) were found in {number_of_blinks} time-steps in all your data")
remove_blinking = st.button("Remove blinking")
# df in column 2
if remove_blinking:
df_right = st.session_state.df.copy(deep=True)
df_right.pupil_dilation = df_right.pupil_dilation.map(lambda ser: [f for f in ser if f != 0.0])
df_right.baseline = df_right.baseline.map(lambda ser: [f for f in ser if f != 0.0])
st.success("blinking values have been removed!")
st.info("after transformation")
st.dataframe(df_right)
elif detect_blinking and not number_of_blinks:
st.caption("no blinking values were found in your data!")
if not df_base.empty:
st.warning("consider running outlier detection to clean your data!", icon="⚠️")
# for key, value in st.session_state.items():
# st.success(f"{key}: {value}")
# reloading new samples would damage the st-session_state loading, vars are only written once