import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt class DataAnalyzer: def __init__(self, data): self.data = data st.header("Exploratory Data Analysis") def show_eda(self): st.write("Number of rows:", self.data.shape[0]) st.write("Number of columns:", self.data.shape[1]) columns_by_dtype = {} for column_name, dtype in self.data.dtypes.items(): dtype_str = str(dtype) if dtype_str not in columns_by_dtype: columns_by_dtype[dtype_str] = [column_name] else: columns_by_dtype[dtype_str].append(column_name) col_type_df = [] for dtype, columns in columns_by_dtype.items(): col_type_df.append([dtype, ', '.join(columns)]) df = pd.DataFrame(col_type_df, columns=["Data Type", "Column Names"]) st.subheader("Columns by Data Type") st.dataframe(df, hide_index=True, use_container_width=True) def show_summary_statistics(self): if st.button('Show Summary Statistics'): st.write(self.data.describe()) st.write(self.data.describe(include=object)) def show_null_value_statistics(self): st.subheader("Null Value Statistics") null_counts = self.data.isnull().sum() total_null = null_counts.sum() total_rows = self.data.shape[0] null_percentages = (null_counts / total_rows) * 100 null_stats_df = pd.DataFrame({ 'Column Name': null_counts.index, 'Null Values': null_counts.values, 'Percentage Null': null_percentages.values }) null_stats_df.loc[len(null_stats_df)] = ['Total', total_null, (total_null / (total_rows * self.data.shape[1])) * 100] st.dataframe(null_stats_df, hide_index=True, use_container_width=True) def show_count_plots(self): st.subheader("Count Plots") sns.set(style="whitegrid") for column_name in self.data.columns: unique_values = self.data[column_name].nunique() if unique_values <= 12: fig, ax = plt.subplots(figsize=(10, 6)) sns.countplot(data=self.data, x=column_name, ax=ax) ax.set_title(f'Count Plot of {column_name}') ax.set_xticklabels(ax.get_xticklabels()) st.pyplot(fig) else: fig, ax = plt.subplots(figsize=(10, 6)) sns.histplot(data=self.data, x=column_name, bins=20, ax=ax) ax.set_title(f'Histogram of {column_name}') ax.set_xlabel(column_name) st.pyplot(fig)