de-identification-leaderboard / utils /filterable_dataframe.py
zhaorui-nb
fix filter serach bug
2008979
raw
history blame contribute delete
No virus
2.09 kB
import streamlit as st
import re
def filterable_dataframe(df):
# 多選框選擇需要篩選的列
selected_columns = st.multiselect('Select columns to filter', options=df.columns)
filters = {}
# 動態生成篩選條件
for column in selected_columns:
with st.expander(f'Filter by {column}', expanded=True):
if df[column].dtype == 'object':
unique_values = df[column].unique()
multiselect_value = st.multiselect(f'Select values for {column}', options=list(unique_values))
re_pattern = st.text_input(f'Search re pattern {column}')
final_selected_values = []
if multiselect_value:
final_selected_values.extend(multiselect_value)
if re_pattern:
re_pattern = re.compile(re_pattern)
match_value = [value for value in unique_values if re_pattern.search(value)]
if match_value:
final_selected_values.extend(match_value)
# st.write('Match value:', match_value)
else:
st.write('No match value')
final_selected_values.append('')
if final_selected_values:
filters[column] = final_selected_values
else:
min_value, max_value = df[column].min(), df[column].max()
selected_range = st.slider(f'Select range for {column}', min_value, max_value, (min_value, max_value))
if selected_range != (min_value, max_value):
filters[column] = selected_range
# 根據篩選條件過濾數據框
filtered_df = df
for column, value in filters.items():
if isinstance(value, list):
filtered_df = filtered_df[filtered_df[column].isin(value)]
elif isinstance(value, tuple):
filtered_df = filtered_df[(filtered_df[column] >= value[0]) & (filtered_df[column] <= value[1])]
return filtered_df