File size: 1,974 Bytes
f322558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
import pandas as pd

def clicked(button):
    st.session_state.clicked[button] = True

def checkbox_clicked(button):
    st.session_state.checkbox_menu[button] = st.session_state.checkbox_menu[button] == False

def additional_clicked_fun(button):
    st.session_state.refreshed[button] += 1

@st.cache_data
def describe_dataframe(df):
    # Initialize a list to hold descriptions for each column
    column_descriptions = []
    
    for column in df.columns:
        # Basic column data
        col_type = df[column].dtype
        num_nulls = df[column].isnull().sum()
        null_info = "has some missing values" if num_nulls > 0 else "has no missing values"
        
        # Detailed stats for numeric columns
        if pd.api.types.is_numeric_dtype(df[column]):
            max_value = df[column].max()
            min_value = df[column].min()
            mean_value = df[column].mean()
            column_descriptions.append(f"{column} (numeric) - type: {col_type}, {null_info}, max: {max_value}, min: {min_value}, mean: {mean_value:.2f}")
        # Add more conditions for other data types (e.g., categorical, datetime) as needed
        else:
            column_descriptions.append(f"{column} - type: {col_type}, {null_info}")
    
    # Combine all column descriptions into a single string
    detailed_description = "; ".join(column_descriptions)
    
    overall_description = f"The dataset has {len(df)} rows and {len(df.columns)} columns. Column details: {detailed_description}."
    
    return overall_description

@st.cache_data
def to_show(df, show_selected, rows_to_show):
    switch_dic = {
        'First few rows': df.head(rows_to_show), 'Last few rows': df.tail(rows_to_show), 'Random':df.sample(rows_to_show)
    }
    st.write(f'There are {len(df)} rows and {len(df.columns)} columns.')
    # columns = [col for col in df.columns]
    # st.write('Column Names')
    # st.write(columns)

    return switch_dic[show_selected]