RKoops commited on
Commit
c1a910b
1 Parent(s): 937b07d

Upload 3 files

Browse files
WK2_Airbnb_Amsterdam_listings_proj_solution.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from pandas.api.types import (
4
+ is_categorical_dtype,
5
+ is_datetime64_any_dtype,
6
+ is_numeric_dtype,
7
+ is_object_dtype
8
+ )
9
+
10
+ st.title("Filter your Airbnb Listings dataframe!")
11
+
12
+ st.write(
13
+ """This app is based on this blog [here]
14
+ (https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/).
15
+ Can you think of ways to extend it with visuals?
16
+ """
17
+ )
18
+
19
+
20
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
21
+ """
22
+ Adds a UI on top of a dataframe to let viewers filter columns
23
+ Args:
24
+ df (pd.DataFrame): Original dataframe
25
+ Returns:
26
+ pd.DataFrame: Filtered dataframe
27
+ """
28
+ modify = st.checkbox("Add filters")
29
+
30
+ if not modify:
31
+ return df
32
+
33
+ df = df.copy()
34
+
35
+ # Try to convert datetimes into a standard format (datetime, no timezone)
36
+ for col in df.columns:
37
+ if is_object_dtype(df[col]):
38
+ try:
39
+ df[col] = pd.to_datetime(df[col])
40
+ except Exception:
41
+ pass
42
+
43
+ if is_datetime64_any_dtype(df[col]):
44
+ df[col] = df[col].dt.tz_localize(None)
45
+
46
+ modification_container = st.container()
47
+
48
+ with modification_container:
49
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
50
+ for column in to_filter_columns:
51
+ left, right = st.columns((1, 20))
52
+ left.write("↳")
53
+ # Treat columns with < 10 unique values as categorical
54
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
55
+ user_cat_input = right.multiselect(
56
+ f"Values for {column}",
57
+ df[column].unique(),
58
+ default=list(df[column].unique()),
59
+ )
60
+ df = df[df[column].isin(user_cat_input)]
61
+ elif is_numeric_dtype(df[column]):
62
+ _min = float(df[column].min())
63
+ _max = float(df[column].max())
64
+ step = (_max - _min) / 100
65
+ user_num_input = right.slider(
66
+ f"Values for {column}",
67
+ _min,
68
+ _max,
69
+ (_min, _max),
70
+ step=step,
71
+ )
72
+ df = df[df[column].between(*user_num_input)]
73
+ elif is_datetime64_any_dtype(df[column]):
74
+ user_date_input = right.date_input(
75
+ f"Values for {column}",
76
+ value=(
77
+ df[column].min(),
78
+ df[column].max(),
79
+ ),
80
+ )
81
+ if len(user_date_input) == 2:
82
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
83
+ start_date, end_date = user_date_input
84
+ df = df.loc[df[column].between(start_date, end_date)]
85
+ else:
86
+ user_text_input = right.text_input(
87
+ f"Substring or regex in {column}",
88
+ )
89
+ if user_text_input:
90
+ df = df[df[column].str.contains(user_text_input)]
91
+
92
+ return df
93
+
94
+
95
+ df = pd.read_csv(
96
+ "WK2_Airbnb_Amsterdam_listings_proj_solution.csv"
97
+ )
98
+ st.dataframe(filter_dataframe(df))
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pandas
2
+ streamlit