apjanco commited on
Commit
546e172
1 Parent(s): a8b5e63

first commit

Browse files
Files changed (3) hide show
  1. app.py +93 -0
  2. item_data.json +0 -0
  3. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import srsly
3
+ import streamlit as st
4
+ from pandas.api.types import (
5
+ is_categorical_dtype,
6
+ is_datetime64_any_dtype,
7
+ is_numeric_dtype,
8
+ is_object_dtype,
9
+ )
10
+
11
+ st.title("DEEP Data Explorer")
12
+
13
+
14
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
15
+ """
16
+ Adds a UI on top of a dataframe to let viewers filter columns
17
+
18
+ Args:
19
+ df (pd.DataFrame): Original dataframe
20
+
21
+ Returns:
22
+ pd.DataFrame: Filtered dataframe
23
+ """
24
+ modify = st.checkbox("Add filters")
25
+
26
+ if not modify:
27
+ return df
28
+
29
+ df = df.copy()
30
+
31
+ # Try to convert datetimes into a standard format (datetime, no timezone)
32
+ for col in df.columns:
33
+ if is_object_dtype(df[col]):
34
+ try:
35
+ df[col] = pd.to_datetime(df[col])
36
+ except Exception:
37
+ pass
38
+
39
+ if is_datetime64_any_dtype(df[col]):
40
+ df[col] = df[col].dt.tz_localize(None)
41
+
42
+ modification_container = st.container()
43
+
44
+ with modification_container:
45
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
46
+ for column in to_filter_columns:
47
+ left, right = st.columns((1, 20))
48
+ left.write("↳")
49
+ # Treat columns with < 10 unique values as categorical
50
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
51
+ user_cat_input = right.multiselect(
52
+ f"Values for {column}",
53
+ df[column].unique(),
54
+ default=list(df[column].unique()),
55
+ )
56
+ df = df[df[column].isin(user_cat_input)]
57
+ elif is_numeric_dtype(df[column]):
58
+ _min = float(df[column].min())
59
+ _max = float(df[column].max())
60
+ step = (_max - _min) / 100
61
+ user_num_input = right.slider(
62
+ f"Values for {column}",
63
+ _min,
64
+ _max,
65
+ (_min, _max),
66
+ step=step,
67
+ )
68
+ df = df[df[column].between(*user_num_input)]
69
+ elif is_datetime64_any_dtype(df[column]):
70
+ user_date_input = right.date_input(
71
+ f"Values for {column}",
72
+ value=(
73
+ df[column].min(),
74
+ df[column].max(),
75
+ ),
76
+ )
77
+ if len(user_date_input) == 2:
78
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
79
+ start_date, end_date = user_date_input
80
+ df = df.loc[df[column].between(start_date, end_date)]
81
+ else:
82
+ user_text_input = right.text_input(
83
+ f"Substring or regex in {column}",
84
+ )
85
+ if user_text_input:
86
+ df = df[df[column].str.contains(user_text_input)]
87
+
88
+ return df
89
+
90
+ data = srsly.read_json('item_data.json')
91
+ data = [data[key] for key in data.keys()]
92
+ df = pd.DataFrame(data)
93
+ st.dataframe(filter_dataframe(df))
item_data.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ srsly