import streamlit as st import pathlib import json import pandas as pd st.header("Time Series Preprocessing Pipeline") st.markdown("Users can load their time-series data and select a set of transformations to prepare a training set for univariate or multivariate time-series classification.\ Go ahead and use the sidebar on the left to upload your data files in *.json* format and start exploring and transforming it!") col1, col2 = st.columns(2) file_names, file_bytes = [], [] with st.sidebar: files = st.file_uploader("Load files", accept_multiple_files = True) if files: file_names = [file.name for file in files] file_bytes = [file.getvalue() for file in files] st.text("\n".join(file_names)) data_dict = dict({'trial_id':[], 'pupil_dilation':[], 'baseline':[], 'rating':[]}) with st.spinner("building base dictionary..."): for file_data in file_bytes: data = json.loads(file_data) for k in data: for i in data[k]: for k, v in i.items(): data_dict[k].append(v) df_base = pd.DataFrame() # {'' : []}) with col1: if file_bytes: with st.spinner("building base dataframe..."): df_base = pd.DataFrame.from_dict(data_dict) df_base["trial_id"] = df_base.trial_id.map(lambda s: "".join([c for c in s if c.isdigit()])) df_base["len_pupil_dilation"] = df_base.pupil_dilation.map(lambda l: len(l)) df_base["len_baseline"] = df_base.baseline.map(lambda l: len(l)) st.info(f"number of files: {len(file_names)}") st.markdown("Your original data") st.dataframe(df_base) else: st.caption("Upload your data from the sidebar to start :sunglasses:") with col2: if not df_base.empty: st.markdown("**Cleaning actions**") detect_blinking = st.button("Detect blinking ('0.0' values)") number_of_blinks = 0 if detect_blinking: # Initialization of session_state if 'df' not in st.session_state: st.session_state['df'] = df_base for ser in df_base['pupil_dilation']: for f in ser: if f == 0.0: number_of_blinks += 1 for ser in df_base['baseline']: for f in ser: if f == 0.0: number_of_blinks += 1 # Initialization of session_state if 'blinks' not in st.session_state: st.session_state['blinks'] = number_of_blinks if "blinks" in st.session_state.keys(): st.info(f"blinking values (0.0) were found in {number_of_blinks} time-steps in all your data") remove_blinking = st.button("Remove blinking") # df in column 2 if remove_blinking: df_right = st.session_state.df.copy(deep=True) df_right.pupil_dilation = df_right.pupil_dilation.map(lambda ser: [f for f in ser if f != 0.0]) df_right.baseline = df_right.baseline.map(lambda ser: [f for f in ser if f != 0.0]) st.success("blinking values have been removed!") st.info("after transformation") st.dataframe(df_right) elif detect_blinking and not number_of_blinks: st.caption("no blinking values were found in your data!") if not df_base.empty: st.warning("consider running outlier detection to clean your data!", icon="⚠️") # for key, value in st.session_state.items(): # st.success(f"{key}: {value}") # reloading new samples would damage the st-session_state loading, vars are only written once