File size: 4,890 Bytes
d391513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37ccd3e
d391513
 
 
 
 
 
 
 
 
 
37ccd3e
d391513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37ccd3e
 
3263d20
 
 
 
 
 
 
 
 
 
 
 
 
 
d391513
37ccd3e
d391513
 
37ccd3e
d391513
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
import pathlib
import json
import pandas as pd

st.header("Time Series Preprocessing Pipeline")
st.markdown("Users can load their time-series data and select a set of transformations to prepare a training set for univariate or multivariate time-series classification.\
 Go ahead and use the sidebar on the left to upload your data files in *.json* format and start exploring and transforming it!")
col1, col2 = st.columns(2)

file_names, file_bytes = [], []
with st.sidebar:
    files = st.file_uploader("Load files", accept_multiple_files = True)
    if files:
        file_names = [file.name for file in files]
        file_bytes = [file.getvalue() for file in files]
        st.text("\n".join(file_names))

data_dict = dict({'trial_id':[], 'pupil_dilation':[], 'baseline':[], 'rating':[]})
with st.spinner("Building base dictionary..."):
    for file_data in file_bytes:
        data = json.loads(file_data)
        for k in data:
          for i in data[k]:
            for k, v in i.items():
              data_dict[k].append(v)

df_base = pd.DataFrame()  # {'<fields>' : []})
with col1:
    if file_bytes:
        with st.spinner("Building base dataframe..."):
            df_base = pd.DataFrame.from_dict(data_dict)
            df_base["trial_id"] = df_base.trial_id.map(lambda s: "".join([c for c in s if c.isdigit()]))
            df_base["len_pupil_dilation"] = df_base.pupil_dilation.map(lambda l: len(l))
            df_base["len_baseline"] = df_base.baseline.map(lambda l: len(l))
            st.info(f"number of files: {len(file_names)}")
            st.markdown("Your original data")
            st.dataframe(df_base)
    else:
        st.caption("Upload your data from the sidebar to start :sunglasses:")

with col2:
    if not df_base.empty:
        st.markdown("**Cleaning actions**")
        detect_blinking = st.button("Detect blinking ('0.0' values)")
        number_of_blinks = 0
        if detect_blinking:
            # Initialization of session_state
            if 'df' not in st.session_state:
                st.session_state['df'] = df_base
            for ser in df_base['pupil_dilation']:
                for f in ser:
                    if f == 0.0:
                        number_of_blinks += 1
            
            for ser in df_base['baseline']:
                for f in ser:
                    if f == 0.0:
                        number_of_blinks += 1
            # Initialization of session_state
            if 'blinks' not in st.session_state:
                st.session_state['blinks'] = number_of_blinks
                        
        if "blinks" in st.session_state.keys():
            st.info(f"blinking values (0.0) were found in {number_of_blinks} time-steps in all your data")
            remove_blinking  = st.button("Remove blinking")
            # df in column 2
            if remove_blinking:
                df_right = st.session_state.df.copy(deep=True)
                df_right.pupil_dilation = df_right.pupil_dilation.map(lambda ser: [f for f in ser if f != 0.0])
                df_right.baseline = df_right.baseline.map(lambda ser: [f for f in ser if f != 0.0])
                st.success("Blinking values have been removed!")
                if "baseline" in list(df_right.keys()):
                    st.markdown(f"A **baseline** feature has been found on your data, do you want to merge it with any of the other features in a new calculated field?")
                    option = st.multiselect('Select a feature to merge', [k for k in list(df_right.keys()) if k != 'baseline'], [])
                    if option:
                        st.write('You selected:', option)
                        relative_key = f"relative_{option}"
                        add_relative = st.button(f"Add {relative_key}")
                        if add_relative:
                            baseline_mean = [sum(s)/len(s) for s in df['baseline']]
                            df_right['relative_pupil_dilation'] = [df['pupil_dilation'][i] - baseline_mean[i] for i in range(len(df))]
                            st.markdown("After adding calculated fields")
                            st.dataframe(df_right)
                            with open('myfile.csv') as f:
                                st.download_button('Download CSV', f)
                            st.info("Your data has been downloaded, you can visualize and detect outliers in the 'Plotting' and 'Detect Outliers' pages on the sidebar.")
        elif detect_blinking and not number_of_blinks:
            st.caption("No blinking values were found in your data!")

if not df_base.empty:
    st.warning("Consider running outlier detection to clean your data!", icon="⚠️")

# for key, value in st.session_state.items():
#    st.success(f"{key}: {value}")

# reloading new samples would damage the st-session_state loading, vars are only written once