Spaces:
Sleeping
Sleeping
shrutisd1003
commited on
Commit
•
3150207
1
Parent(s):
411dde3
count plots modified
Browse files- app.py +2 -2
- data_analyzer.py +17 -17
app.py
CHANGED
@@ -8,7 +8,6 @@ from data_QA import DataQA
|
|
8 |
import os
|
9 |
from streamlit_option_menu import option_menu
|
10 |
|
11 |
-
|
12 |
import pandas as pd
|
13 |
|
14 |
def main():
|
@@ -22,6 +21,7 @@ def main():
|
|
22 |
data = pd.read_csv("data.csv")
|
23 |
except:
|
24 |
st.write("Please upload a csv file")
|
|
|
25 |
if os.path.getsize("data.csv") != 0:
|
26 |
with st.sidebar:
|
27 |
selected = option_menu(
|
@@ -56,7 +56,7 @@ def main():
|
|
56 |
data_analyzer = DataAnalyzer(data)
|
57 |
data_analyzer.show_null_value_statistics()
|
58 |
new_data_analyzer = DataAnalyzer(modified_data)
|
59 |
-
|
60 |
|
61 |
# modified_data = data_transformer.remove_columns()
|
62 |
|
|
|
8 |
import os
|
9 |
from streamlit_option_menu import option_menu
|
10 |
|
|
|
11 |
import pandas as pd
|
12 |
|
13 |
def main():
|
|
|
21 |
data = pd.read_csv("data.csv")
|
22 |
except:
|
23 |
st.write("Please upload a csv file")
|
24 |
+
|
25 |
if os.path.getsize("data.csv") != 0:
|
26 |
with st.sidebar:
|
27 |
selected = option_menu(
|
|
|
56 |
data_analyzer = DataAnalyzer(data)
|
57 |
data_analyzer.show_null_value_statistics()
|
58 |
new_data_analyzer = DataAnalyzer(modified_data)
|
59 |
+
new_data_analyzer.show_null_value_statistics()
|
60 |
|
61 |
# modified_data = data_transformer.remove_columns()
|
62 |
|
data_analyzer.py
CHANGED
@@ -44,23 +44,23 @@ class DataAnalyzer:
|
|
44 |
null_stats_df.loc[len(null_stats_df)] = ['Total', total_null, (total_null / (total_rows * self.data.shape[1])) * 100]
|
45 |
st.dataframe(null_stats_df, hide_index=True, use_container_width=True)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def show_count_plots(self):
|
48 |
st.subheader("Count Plots")
|
49 |
sns.set(style="whitegrid")
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
ax.set_title(f'Count Plot of {column_name}')
|
58 |
-
ax.set_xticklabels(ax.get_xticklabels())
|
59 |
-
st.pyplot(fig)
|
60 |
-
|
61 |
-
else:
|
62 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
63 |
-
sns.histplot(data=self.data, x=column_name, bins=20, ax=ax)
|
64 |
-
ax.set_title(f'Histogram of {column_name}')
|
65 |
-
ax.set_xlabel(column_name)
|
66 |
-
st.pyplot(fig)
|
|
|
44 |
null_stats_df.loc[len(null_stats_df)] = ['Total', total_null, (total_null / (total_rows * self.data.shape[1])) * 100]
|
45 |
st.dataframe(null_stats_df, hide_index=True, use_container_width=True)
|
46 |
|
47 |
+
def count_plot(self, column_name):
|
48 |
+
st.write(column_name)
|
49 |
+
unique_values = self.data[column_name].nunique()
|
50 |
+
fig, ax = plt.subplots(figsize=(9, 5))
|
51 |
+
if unique_values <= 12:
|
52 |
+
sns.countplot(data=self.data, x=column_name, ax=ax)
|
53 |
+
else:
|
54 |
+
sns.histplot(data=self.data, x=column_name, bins=20, ax=ax)
|
55 |
+
st.pyplot(fig)
|
56 |
+
|
57 |
def show_count_plots(self):
|
58 |
st.subheader("Count Plots")
|
59 |
sns.set(style="whitegrid")
|
60 |
+
left, right = st.columns(2)
|
61 |
+
with left:
|
62 |
+
for i in range(0, len(self.data.columns), 2):
|
63 |
+
self.count_plot(self.data.columns[i])
|
64 |
+
with right:
|
65 |
+
for i in range(1, len(self.data.columns), 2):
|
66 |
+
self.count_plot(self.data.columns[i])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|