Spaces:
Runtime error
Runtime error
# importing the libraries | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import plotly.express as px | |
import streamlit as st | |
# Title and Markdown | |
st.title("AN EXAMPLE EDA APP") | |
st.markdown(''' <h3>This is an example of how to do EDA in streamlit app</h3>''',unsafe_allow_html=True) | |
# File upload | |
file_up = st.file_uploader("Upload a file", type='csv') | |
# Check if the file uploaded is successfull or not, if successfull then read the file | |
if file_up is not None: | |
st.success("File uploaded successfully") | |
df = pd.read_csv(file_up) | |
obj = [] | |
int_float = [] | |
for i in df.columns: | |
clas = df[i].dtypes | |
if clas == 'object': | |
obj.append(i) | |
else: | |
int_float.append(i) | |
# Remove null values and replace them with mean and median value | |
with st.form(key='my_form'): | |
with st.sidebar: | |
st.sidebar.header("To remove NULL values press below button") | |
submit_button = st.form_submit_button(label="Remove NULL") | |
if submit_button: | |
for i in df.columns: | |
clas = df[i].dtypes | |
if clas == 'object': | |
df[i].fillna(df[i].mode()[0], inplace = True) | |
else: | |
df[i].fillna(df[i].mean(), inplace = True) | |
# finding the number of null values in each column | |
ls = [] | |
for i in df.columns: | |
dd = sum(pd.isnull(df[i])) | |
ls.append(dd) | |
# if number of null values are zero it will display some text else it will plot bar plot by each column | |
if max(ls) == 0: | |
st.write("Total no. of NULL values: ", str(max(ls))) | |
else: | |
st.write("Bar plot to know the number of NULL values in each column") | |
st.write("Total number of null values: ", str(max(ls))) | |
fig = px.bar(x=df.columns, y=ls,labels={'x':"Column Names",'y':"No. of Null values"}) | |
st.plotly_chart(fig) | |
# Frequency Plot | |
st.sidebar.header("Select variable") | |
selected = st.sidebar.selectbox('Object variables',obj) | |
st.write("Bar Plot to know the frequency of each category") | |
frequency = df[selected].value_counts() | |
fig2 = px.bar(frequency, x=frequency.index,y=selected,labels={'x':selected, 'y':'count'}) | |
st.plotly_chart(fig2) | |
# Correlation chart | |
st.sidebar.header("Select variable") | |
selected2 = st.sidebar.multiselect("Variables",int_float) | |
st.write("Scatter plot for correlation") | |
if len(selected2) == 2: | |
fig3 = px.scatter(df,x=selected2[0], y=selected2[1]) | |
st.plotly_chart(fig3) | |
else: | |
st.write("Select any 2 variables only") |