InfoVoyager / helpers /utils.py
NEXAS's picture
Upload 12 files
f322558 verified
import streamlit as st
import pandas as pd
def clicked(button):
st.session_state.clicked[button] = True
def checkbox_clicked(button):
st.session_state.checkbox_menu[button] = st.session_state.checkbox_menu[button] == False
def additional_clicked_fun(button):
st.session_state.refreshed[button] += 1
@st.cache_data
def describe_dataframe(df):
# Initialize a list to hold descriptions for each column
column_descriptions = []
for column in df.columns:
# Basic column data
col_type = df[column].dtype
num_nulls = df[column].isnull().sum()
null_info = "has some missing values" if num_nulls > 0 else "has no missing values"
# Detailed stats for numeric columns
if pd.api.types.is_numeric_dtype(df[column]):
max_value = df[column].max()
min_value = df[column].min()
mean_value = df[column].mean()
column_descriptions.append(f"{column} (numeric) - type: {col_type}, {null_info}, max: {max_value}, min: {min_value}, mean: {mean_value:.2f}")
# Add more conditions for other data types (e.g., categorical, datetime) as needed
else:
column_descriptions.append(f"{column} - type: {col_type}, {null_info}")
# Combine all column descriptions into a single string
detailed_description = "; ".join(column_descriptions)
overall_description = f"The dataset has {len(df)} rows and {len(df.columns)} columns. Column details: {detailed_description}."
return overall_description
@st.cache_data
def to_show(df, show_selected, rows_to_show):
switch_dic = {
'First few rows': df.head(rows_to_show), 'Last few rows': df.tail(rows_to_show), 'Random':df.sample(rows_to_show)
}
st.write(f'There are {len(df)} rows and {len(df.columns)} columns.')
# columns = [col for col in df.columns]
# st.write('Column Names')
# st.write(columns)
return switch_dic[show_selected]