shrutisd1003 commited on
Commit
3150207
1 Parent(s): 411dde3

count plots modified

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. data_analyzer.py +17 -17
app.py CHANGED
@@ -8,7 +8,6 @@ from data_QA import DataQA
8
  import os
9
  from streamlit_option_menu import option_menu
10
 
11
-
12
  import pandas as pd
13
 
14
  def main():
@@ -22,6 +21,7 @@ def main():
22
  data = pd.read_csv("data.csv")
23
  except:
24
  st.write("Please upload a csv file")
 
25
  if os.path.getsize("data.csv") != 0:
26
  with st.sidebar:
27
  selected = option_menu(
@@ -56,7 +56,7 @@ def main():
56
  data_analyzer = DataAnalyzer(data)
57
  data_analyzer.show_null_value_statistics()
58
  new_data_analyzer = DataAnalyzer(modified_data)
59
- data_analyzer.show_null_value_statistics()
60
 
61
  # modified_data = data_transformer.remove_columns()
62
 
 
8
  import os
9
  from streamlit_option_menu import option_menu
10
 
 
11
  import pandas as pd
12
 
13
  def main():
 
21
  data = pd.read_csv("data.csv")
22
  except:
23
  st.write("Please upload a csv file")
24
+
25
  if os.path.getsize("data.csv") != 0:
26
  with st.sidebar:
27
  selected = option_menu(
 
56
  data_analyzer = DataAnalyzer(data)
57
  data_analyzer.show_null_value_statistics()
58
  new_data_analyzer = DataAnalyzer(modified_data)
59
+ new_data_analyzer.show_null_value_statistics()
60
 
61
  # modified_data = data_transformer.remove_columns()
62
 
data_analyzer.py CHANGED
@@ -44,23 +44,23 @@ class DataAnalyzer:
44
  null_stats_df.loc[len(null_stats_df)] = ['Total', total_null, (total_null / (total_rows * self.data.shape[1])) * 100]
45
  st.dataframe(null_stats_df, hide_index=True, use_container_width=True)
46
 
 
 
 
 
 
 
 
 
 
 
47
  def show_count_plots(self):
48
  st.subheader("Count Plots")
49
  sns.set(style="whitegrid")
50
-
51
- for column_name in self.data.columns:
52
- unique_values = self.data[column_name].nunique()
53
-
54
- if unique_values <= 12:
55
- fig, ax = plt.subplots(figsize=(10, 6))
56
- sns.countplot(data=self.data, x=column_name, ax=ax)
57
- ax.set_title(f'Count Plot of {column_name}')
58
- ax.set_xticklabels(ax.get_xticklabels())
59
- st.pyplot(fig)
60
-
61
- else:
62
- fig, ax = plt.subplots(figsize=(10, 6))
63
- sns.histplot(data=self.data, x=column_name, bins=20, ax=ax)
64
- ax.set_title(f'Histogram of {column_name}')
65
- ax.set_xlabel(column_name)
66
- st.pyplot(fig)
 
44
  null_stats_df.loc[len(null_stats_df)] = ['Total', total_null, (total_null / (total_rows * self.data.shape[1])) * 100]
45
  st.dataframe(null_stats_df, hide_index=True, use_container_width=True)
46
 
47
+ def count_plot(self, column_name):
48
+ st.write(column_name)
49
+ unique_values = self.data[column_name].nunique()
50
+ fig, ax = plt.subplots(figsize=(9, 5))
51
+ if unique_values <= 12:
52
+ sns.countplot(data=self.data, x=column_name, ax=ax)
53
+ else:
54
+ sns.histplot(data=self.data, x=column_name, bins=20, ax=ax)
55
+ st.pyplot(fig)
56
+
57
  def show_count_plots(self):
58
  st.subheader("Count Plots")
59
  sns.set(style="whitegrid")
60
+ left, right = st.columns(2)
61
+ with left:
62
+ for i in range(0, len(self.data.columns), 2):
63
+ self.count_plot(self.data.columns[i])
64
+ with right:
65
+ for i in range(1, len(self.data.columns), 2):
66
+ self.count_plot(self.data.columns[i])