import modules import streamlit as st from streamlit_extras.let_it_rain import rain # Options DISCLAIMER = "*Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam urna sem, bibendum efficitur pellentesque a, sollicitudin pharetra urna. Nam vel lectus vitae elit luctus feugiat a a purus. Aenean mollis quis ipsum sed ornare. Nunc sit amet ultricies tellus. Vivamus vulputate sem id molestie viverra. Etiam egestas lobortis enim, sit amet lobortis ligula sollicitudin vel. Nunc eget ipsum sollicitudin, convallis.*" K = 2 # Page Config st.set_page_config(layout="wide") ### FILE LOADER for sidebar with st.sidebar: st.header("🕵️ 2anonymity") st.markdown("*Clean and anonymize data*") with st.container() as upload: file = st.file_uploader(f"Upload dataset:", type=modules.SUPPORTED_TYPES, label_visibility="collapsed") df, (filename, extension), result = modules.load_file(file) ### MAIN if df is None: # Await file to be uploaded rain("🤠") else: ### PRE-TRANSFORM features for sidebar with st.sidebar: # Options for data loading with st.container() as loading_options: st.markdown("### Data loading options:") remove_duplicates = st.checkbox("Remove duplicate rows", value=True) drop_missing = st.checkbox("Remove rows with missing values", value=False) # Options for data optimization with st.container() as anonymizing_options: st.markdown("### Anonymizing options:") max_categorical_size = st.slider("Maximum number of categories", min_value=2, max_value=200, value=50) bin_size = st.slider("Target bin size", min_value=2, max_value=200, value=20) sensitivity_minimum = st.number_input("Minimum count", min_value=2, max_value=10, value=2) ### DATA PREVIEW AND TRANSFORM # Preview data before transform with st.container() as before_data: s = df.style s = s.set_properties(**{'background-color': '#fce4e4'}) st.dataframe(s) # Transform data df = modules.data_cleaner(df, drop_missing, remove_duplicates) df, unprocessed = modules.data_anonymizer(df, K, max_categorical_size, bin_size, sensitivity_minimum) # Preview data after before_data with st.container() as after_data: s = df.style s = s.set_properties(**{'background-color': '#e4fce4'}) st.dataframe(s) ### POST-TRANSFORM features for sidebar with st.sidebar: # Options for download with st.container() as download_header: st.markdown("### Download options:") output_extension = st.selectbox("File type", [".csv", ".json", ".xlsx"]) if unprocessed: st.markdown(f"Error encountered when processing columns {str(unprocessed)}") # Prepare file for download with st.container() as downloader: if output_extension == ".csv": output_file = df.to_csv().encode("utf-8") elif output_extension == ".json": output_file = df.to_json().encode("utf-8") elif output_extension == ".xlsx": output_file = df.to_excel().encode("utf-8") output_filename = f"""{filename.split(".")[:-1][0]}-clean{output_extension}""" st.download_button("Download", output_file, file_name=output_filename) # Add a disclaimer for data security with st.container() as disclaimer: st.markdown( f""" Disclaimer: {DISCLAIMER} """ ) # Attribution st.sidebar.markdown("Created by team #2hack2furious for the hackthethreat2023")