Bayhaqy commited on
Commit
31f0043
1 Parent(s): bfdebbf

Create function.py

Browse files
Files changed (1) hide show
  1. function.py +84 -0
function.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pandas.api.types import (
4
+ is_categorical_dtype,
5
+ is_datetime64_any_dtype,
6
+ is_numeric_dtype,
7
+ is_object_dtype,
8
+ )
9
+
10
+ @st.cache_data(experimental_allow_widgets=True)
11
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
12
+ """
13
+ Adds a UI on top of a dataframe to let viewers filter columns
14
+
15
+ Args:
16
+ df (pd.DataFrame): Original dataframe
17
+
18
+ Returns:
19
+ pd.DataFrame: Filtered dataframe
20
+ """
21
+ modify = st.checkbox("Add filters")
22
+
23
+ if not modify:
24
+ return df
25
+
26
+ df = df.copy()
27
+
28
+ # Try to convert datetimes into a standard format (datetime, no timezone)
29
+ for col in df.columns:
30
+ if is_object_dtype(df[col]):
31
+ try:
32
+ df[col] = pd.to_datetime(df[col])
33
+ except Exception:
34
+ pass
35
+
36
+ if is_datetime64_any_dtype(df[col]):
37
+ df[col] = df[col].dt.tz_localize(None)
38
+
39
+ modification_container = st.container()
40
+
41
+ with modification_container:
42
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
43
+ for column in to_filter_columns:
44
+ left, right = st.columns((1, 20))
45
+ left.write("↳")
46
+ # Treat columns with < 10 unique values as categorical
47
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
48
+ user_cat_input = right.multiselect(
49
+ f"Values for {column}",
50
+ df[column].unique(),
51
+ default=list(df[column].unique()),
52
+ )
53
+ df = df[df[column].isin(user_cat_input)]
54
+ elif is_numeric_dtype(df[column]):
55
+ _min = float(df[column].min())
56
+ _max = float(df[column].max())
57
+ step = (_max - _min) / 100
58
+ user_num_input = right.slider(
59
+ f"Values for {column}",
60
+ _min,
61
+ _max,
62
+ (_min, _max),
63
+ step=step,
64
+ )
65
+ df = df[df[column].between(*user_num_input)]
66
+ elif is_datetime64_any_dtype(df[column]):
67
+ user_date_input = right.date_input(
68
+ f"Values for {column}",
69
+ value=(
70
+ df[column].min(),
71
+ df[column].max(),
72
+ ),
73
+ )
74
+ if len(user_date_input) == 2:
75
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
76
+ start_date, end_date = user_date_input
77
+ df = df.loc[df[column].between(start_date, end_date)]
78
+ else:
79
+ user_text_input = right.text_input(
80
+ f"Substring or regex in {column}",
81
+ )
82
+ if user_text_input:
83
+ df = df[df[column].str.contains(user_text_input)]
84
+ return df