ziggycross commited on
Commit
6c3e9dd
1 Parent(s): aa72b6d

Fixed download bug and created sample GUI.

Browse files
Files changed (2) hide show
  1. app.py +42 -33
  2. modules.py +3 -19
app.py CHANGED
@@ -5,17 +5,10 @@ from streamlit_extras.let_it_rain import rain
5
  # Options
6
  DISCLAIMER = "*Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam urna sem, bibendum efficitur pellentesque a, sollicitudin pharetra urna. Nam vel lectus vitae elit luctus feugiat a a purus. Aenean mollis quis ipsum sed ornare. Nunc sit amet ultricies tellus. Vivamus vulputate sem id molestie viverra. Etiam egestas lobortis enim, sit amet lobortis ligula sollicitudin vel. Nunc eget ipsum sollicitudin, convallis.*"
7
 
8
- # Cleaning parameters
9
- drop_missing = None,
10
- remove_duplicates = None,
11
-
12
- # Anonymizing parameters
13
- anonymize_data = None
14
-
15
  # Page Config
16
  st.set_page_config(layout="wide")
17
 
18
- # Default Sidebar
19
  with st.sidebar:
20
  st.header("🕵️ 2anonymity")
21
  st.markdown("*Clean and anonymize data*")
@@ -23,53 +16,69 @@ with st.sidebar:
23
  file = st.file_uploader(f"Upload dataset:", type=modules.SUPPORTED_TYPES, label_visibility="collapsed")
24
  df, (filename, extension), result = modules.load_file(file)
25
 
26
- # Main
27
- if df is None:
28
  rain("🤠")
29
  else:
30
- # Add options to sidebar
31
  with st.sidebar:
32
-
33
  # Options for data cleaning
34
  with st.container() as cleaning_options:
35
- st.markdown("Data cleaning options:")
36
  remove_duplicates = st.checkbox("Remove duplicate rows", value=True)
37
  drop_missing = st.checkbox("Remove rows with missing values", value=False)
38
 
39
  # Options for data optimization
40
  with st.container() as anonymizing_options:
41
- st.markdown("Anonymizing options:")
42
- anonymize_data = st.checkbox("Anonymize data", value=True)
43
-
44
- # Prepare file for download
45
- if df is not None:
46
- download_file = modules.create_file(df, extension)
47
- with st.container() as downloader:
48
- st.download_button("Download", download_file, file_name=filename)
49
-
50
- # Add a disclaimer for data security
51
- with st.container() as disclaimer:
52
- st.markdown(
53
- f"""
54
- Disclaimer:
55
- {DISCLAIMER}
56
- """
57
- )
58
 
59
- # Preview data before transformation
 
 
60
  with st.container() as before_data:
61
  s = df.style
62
  s = s.set_properties(**{'background-color': '#fce4e4'})
63
  st.dataframe(s)
64
 
65
- # Process data
66
  df = modules.data_cleaner(df, drop_missing, remove_duplicates)
 
 
67
 
68
- # Preview data after transformation
69
  with st.container() as after_data:
70
  s = df.style
71
  s = s.set_properties(**{'background-color': '#e4fce4'})
72
  st.dataframe(s)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # Attribution
75
  st.sidebar.markdown("Created by team #2hack2furious for the hackthethreat2023")
 
5
  # Options
6
  DISCLAIMER = "*Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam urna sem, bibendum efficitur pellentesque a, sollicitudin pharetra urna. Nam vel lectus vitae elit luctus feugiat a a purus. Aenean mollis quis ipsum sed ornare. Nunc sit amet ultricies tellus. Vivamus vulputate sem id molestie viverra. Etiam egestas lobortis enim, sit amet lobortis ligula sollicitudin vel. Nunc eget ipsum sollicitudin, convallis.*"
7
 
 
 
 
 
 
 
 
8
  # Page Config
9
  st.set_page_config(layout="wide")
10
 
11
+ ### FILE LOADER for sidebar
12
  with st.sidebar:
13
  st.header("🕵️ 2anonymity")
14
  st.markdown("*Clean and anonymize data*")
 
16
  file = st.file_uploader(f"Upload dataset:", type=modules.SUPPORTED_TYPES, label_visibility="collapsed")
17
  df, (filename, extension), result = modules.load_file(file)
18
 
19
+ ### MAIN
20
+ if df is None: # Await file to be uploaded
21
  rain("🤠")
22
  else:
23
+ ### PRE-TRANSFORM features for sidebar
24
  with st.sidebar:
 
25
  # Options for data cleaning
26
  with st.container() as cleaning_options:
27
+ st.markdown("### Data cleaning options:")
28
  remove_duplicates = st.checkbox("Remove duplicate rows", value=True)
29
  drop_missing = st.checkbox("Remove rows with missing values", value=False)
30
 
31
  # Options for data optimization
32
  with st.container() as anonymizing_options:
33
+ st.markdown("### Anonymizing options:")
34
+ sample_checkbox = st.checkbox("Test checkbox", value=True)
35
+ sample_slider = st.slider("Test slider", min_value=1, max_value=10, value=2)
36
+ sample_number = st.number_input("Test number", min_value=0, max_value=100, value=50)
37
+ sample_dropdown = st.selectbox("Test dropdown", ["A", "B", "C"], index=1)
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+
40
+ ### DATA PREVIEW AND TRANSFORM
41
+ # Preview data before transform
42
  with st.container() as before_data:
43
  s = df.style
44
  s = s.set_properties(**{'background-color': '#fce4e4'})
45
  st.dataframe(s)
46
 
47
+ # Transform data
48
  df = modules.data_cleaner(df, drop_missing, remove_duplicates)
49
+ df = modules.data_anonymizer(df)
50
+ # download_file = modules.create_file(df, ".csv")
51
 
52
+ # Preview data after before_data
53
  with st.container() as after_data:
54
  s = df.style
55
  s = s.set_properties(**{'background-color': '#e4fce4'})
56
  st.dataframe(s)
57
+
58
+
59
+ ### POST-TRANSFORM features for sidebar
60
+ with st.sidebar:
61
+ # Options for download
62
+ with st.container() as download_header:
63
+ st.markdown("### Download")
64
+ output_extension = st.selectbox("File type", [".csv", ".json", ".xlsx"])
65
+
66
+ # Prepare file for download
67
+ with st.container() as downloader:
68
+ if output_extension == ".csv": output_file = df.to_csv().encode("utf-8")
69
+ elif output_extension == ".json": output_file = df.to_json().encode("utf-8")
70
+ elif output_extension == ".xlsx": output_file = df.to_excel().encode("utf-8")
71
+ output_filename = f"""{filename.split(".")[:-1][0]}-clean{output_extension}"""
72
+ st.download_button("Download", output_file, file_name=output_filename)
73
+
74
+ # Add a disclaimer for data security
75
+ with st.container() as disclaimer:
76
+ st.markdown(
77
+ f"""
78
+ Disclaimer:
79
+ {DISCLAIMER}
80
+ """
81
+ )
82
 
83
  # Attribution
84
  st.sidebar.markdown("Created by team #2hack2furious for the hackthethreat2023")
modules.py CHANGED
@@ -36,25 +36,6 @@ def load_file(file):
36
  except Exception as error:
37
  return df, metadata, f"Error: Unable to read file '{filename}' ({type(error)}: {error})"
38
 
39
- def create_file(df, extension):
40
- """
41
- Prepares a dataframe from streamlit for download.
42
-
43
- @type df: pd.DataFrame
44
- @param df: A DataFrame to package into a file.
45
- @type extension: pd.DataFrame
46
- @param extension: The desired filetype.
47
- @return: A file container ready for download.
48
- """
49
- export_functions = {
50
- "csv": pd.DataFrame.to_csv,
51
- "json": pd.DataFrame.to_json,
52
- "xlsx": pd.DataFrame.to_excel
53
- }
54
- exporter = export_functions.get(extension, None)
55
- if exporter is None: return None
56
- return exporter(df)
57
-
58
  def data_cleaner(df, drop_missing=False, remove_duplicates=True):
59
  """
60
  Takes a DataFrame and removes empty and duplicate entries.
@@ -70,4 +51,7 @@ def data_cleaner(df, drop_missing=False, remove_duplicates=True):
70
  """
71
  df = df.dropna(how="any" if drop_missing else "all")
72
  if remove_duplicates: df = df.drop_duplicates()
 
 
 
73
  return df
 
36
  except Exception as error:
37
  return df, metadata, f"Error: Unable to read file '{filename}' ({type(error)}: {error})"
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def data_cleaner(df, drop_missing=False, remove_duplicates=True):
40
  """
41
  Takes a DataFrame and removes empty and duplicate entries.
 
51
  """
52
  df = df.dropna(how="any" if drop_missing else "all")
53
  if remove_duplicates: df = df.drop_duplicates()
54
+ return df
55
+
56
+ def data_anonymizer(df):
57
  return df