Insights / data_analyzer.py
shrutisd1003's picture
null values handled
577321a
raw
history blame
No virus
2.24 kB
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
class DataAnalyzer:
def __init__(self, data):
self.data = data
st.header("Exploratory Data Analysis")
def show_eda(self):
st.subheader("Summary")
st.write("Number of rows:", self.data.shape[0])
st.write("Number of columns:", self.data.shape[1])
null_counts = self.data.isnull().sum()
total_null = null_counts.sum()
total_rows = self.data.shape[0]
null_percentages = (null_counts / total_rows) * 100
columns_stats = []
for column_name in self.data.columns:
dtype = self.data[column_name].dtype
null_count = null_counts[column_name]
null_percentage = null_percentages[column_name]
columns_stats.append({
'Column Name': column_name,
"Data type": dtype,
'Null Values': null_count,
'Percentage Null': null_percentage
})
null_stats_df = pd.DataFrame(columns_stats)
st.dataframe(null_stats_df, hide_index=True, use_container_width=True)
st.write("Total percentage of null values:", (total_null / (total_rows * self.data.shape[1])) * 100)
def show_summary_statistics(self):
if st.button('Show Summary Statistics'):
st.write(self.data.describe())
st.write(self.data.describe(include=object))
def count_plot(self, column_name):
st.write(column_name)
unique_values = self.data[column_name].nunique()
fig, ax = plt.subplots(figsize=(9, 5))
if unique_values <= 12:
sns.countplot(data=self.data, x=column_name, ax=ax)
else:
sns.histplot(data=self.data, x=column_name, bins=20, ax=ax)
st.pyplot(fig)
def show_count_plots(self):
st.subheader("Count Plots")
sns.set(style="whitegrid")
left, right = st.columns(2)
with left:
for i in range(0, len(self.data.columns), 2):
self.count_plot(self.data.columns[i])
with right:
for i in range(1, len(self.data.columns), 2):
self.count_plot(self.data.columns[i])