ppsingh commited on
Commit
30b4e17
1 Parent(s): ca33833

explore dataframe filtering

Browse files
Files changed (1) hide show
  1. appStore/target.py +101 -2
appStore/target.py CHANGED
@@ -16,6 +16,11 @@ from utils.config import get_classifier_params
16
  from io import BytesIO
17
  import xlsxwriter
18
  import plotly.express as px
 
 
 
 
 
19
 
20
  # Declare all the necessary variables
21
  classifier_identifier = 'target'
@@ -132,8 +137,12 @@ def target_display():
132
  # st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
133
  # hits = hits.reset_index(drop =True)
134
  st.write('----------------')
135
- st.write('Explore the data')
136
- AgGrid(hits)
 
 
 
 
137
  df_xlsx = to_excel(df,hits)
138
 
139
  with st.sidebar:
@@ -142,6 +151,96 @@ def target_display():
142
  data=df_xlsx ,
143
  file_name= os.path.splitext(os.path.basename(st.session_state['filename']))[0]+'.xlsx')
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  # else:
146
  # st.info("🤔 No Targets found")
147
  # count_df = df['Target Label'].value_counts()
 
16
  from io import BytesIO
17
  import xlsxwriter
18
  import plotly.express as px
19
+ from pandas.api.types import (
20
+ is_categorical_dtype,
21
+ is_datetime64_any_dtype,
22
+ is_numeric_dtype,
23
+ is_object_dtype,)
24
 
25
  # Declare all the necessary variables
26
  classifier_identifier = 'target'
 
137
  # st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
138
  # hits = hits.reset_index(drop =True)
139
  st.write('----------------')
140
+
141
+
142
+ st.title("Auto Filter Dataframes in Streamlit")
143
+ st.dataframe(filter_dataframe(hits))
144
+ # st.write('Explore the data')
145
+ # AgGrid(hits)
146
  df_xlsx = to_excel(df,hits)
147
 
148
  with st.sidebar:
 
151
  data=df_xlsx ,
152
  file_name= os.path.splitext(os.path.basename(st.session_state['filename']))[0]+'.xlsx')
153
 
154
+ # st.write(
155
+ # """This app accomodates the blog [here](https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/)
156
+ # and walks you through one example of how the Streamlit
157
+ # Data Science Team builds add-on functions to Streamlit.
158
+ # """
159
+ # )
160
+
161
+
162
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
163
+ """
164
+ Adds a UI on top of a dataframe to let viewers filter columns
165
+
166
+ Args:
167
+ df (pd.DataFrame): Original dataframe
168
+
169
+ Returns:
170
+ pd.DataFrame: Filtered dataframe
171
+ """
172
+ modify = st.checkbox("Add filters")
173
+
174
+ if not modify:
175
+ return df
176
+
177
+ df = df.copy()
178
+
179
+ # Try to convert datetimes into a standard format (datetime, no timezone)
180
+ # for col in df.columns:
181
+ # if is_object_dtype(df[col]):
182
+ # try:
183
+ # df[col] = pd.to_datetime(df[col])
184
+ # except Exception:
185
+ # pass
186
+
187
+ # if is_datetime64_any_dtype(df[col]):
188
+ # df[col] = df[col].dt.tz_localize(None)
189
+
190
+ modification_container = st.container()
191
+
192
+ with modification_container:
193
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
194
+ for column in to_filter_columns:
195
+ left, right = st.columns((1, 20))
196
+ left.write("↳")
197
+ # Treat columns with < 10 unique values as categorical
198
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
199
+ user_cat_input = right.multiselect(
200
+ f"Values for {column}",
201
+ df[column].unique(),
202
+ default=list(df[column].unique()),
203
+ )
204
+ df = df[df[column].isin(user_cat_input)]
205
+ elif is_numeric_dtype(df[column]):
206
+ _min = float(df[column].min())
207
+ _max = float(df[column].max())
208
+ step = (_max - _min) / 100
209
+ user_num_input = right.slider(
210
+ f"Values for {column}",
211
+ _min,
212
+ _max,
213
+ (_min, _max),
214
+ step=step,
215
+ )
216
+ df = df[df[column].between(*user_num_input)]
217
+ # elif is_datetime64_any_dtype(df[column]):
218
+ # user_date_input = right.date_input(
219
+ # f"Values for {column}",
220
+ # value=(
221
+ # df[column].min(),
222
+ # df[column].max(),
223
+ # ),
224
+ # )
225
+ # if len(user_date_input) == 2:
226
+ # user_date_input = tuple(map(pd.to_datetime, user_date_input))
227
+ # start_date, end_date = user_date_input
228
+ # df = df.loc[df[column].between(start_date, end_date)]
229
+ else:
230
+ user_text_input = right.text_input(
231
+ f"Substring or regex in {column}",
232
+ )
233
+ if user_text_input:
234
+ df = df[df[column].str.contains(user_text_input)]
235
+
236
+ return df
237
+
238
+
239
+ # df = pd.read_csv(
240
+ # "https://raw.githubusercontent.com/mcnakhaee/palmerpenguins/master/palmerpenguins/data/penguins.csv"
241
+ # )
242
+
243
+
244
  # else:
245
  # st.info("🤔 No Targets found")
246
  # count_df = df['Target Label'].value_counts()