Atharva Thakur commited on
Commit
a0155bf
0 Parent(s):

Initial Commit

Browse files
Files changed (9) hide show
  1. .gitignore +161 -0
  2. README.md +33 -0
  3. app.py +28 -0
  4. data_analyzer.py +13 -0
  5. data_filter.py +12 -0
  6. data_loader.py +21 -0
  7. data_transformer.py +14 -0
  8. data_visualizer.py +49 -0
  9. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .aider*
2
+ # Byte-compiled / optimized / DLL files
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # poetry
99
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
+ #poetry.lock
104
+
105
+ # pdm
106
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
+ #pdm.lock
108
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
+ # in version control.
110
+ # https://pdm.fming.dev/#use-with-ide
111
+ .pdm.toml
112
+
113
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114
+ __pypackages__/
115
+
116
+ # Celery stuff
117
+ celerybeat-schedule
118
+ celerybeat.pid
119
+
120
+ # SageMath parsed files
121
+ *.sage.py
122
+
123
+ # Environments
124
+ .env
125
+ .venv
126
+ env/
127
+ venv/
128
+ ENV/
129
+ env.bak/
130
+ venv.bak/
131
+
132
+ # Spyder project settings
133
+ .spyderproject
134
+ .spyproject
135
+
136
+ # Rope project settings
137
+ .ropeproject
138
+
139
+ # mkdocs documentation
140
+ /site
141
+
142
+ # mypy
143
+ .mypy_cache/
144
+ .dmypy.json
145
+ dmypy.json
146
+
147
+ # Pyre type checker
148
+ .pyre/
149
+
150
+ # pytype static type analyzer
151
+ .pytype/
152
+
153
+ # Cython debug symbols
154
+ cython_debug/
155
+
156
+ # PyCharm
157
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
160
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161
+ #.idea/
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Insights
2
+
3
+ ## Modules
4
+
5
+ - `DataLoader`: Handles the loading of data either by uploading a CSV file or inputting a URL to a CSV file.
6
+ - `DataAnalyzer`: Provides summary statistics and data types of the loaded dataset.
7
+ - `DataFilter`: Allows users to filter rows based on user-defined conditions.
8
+ - `DataTransformer`: Enables users to perform operations on columns.
9
+ - `DataVisualizer`: Visualizes data with various types of plots (Histogram, Box Plot, Pie Chart, Scatter Plot, Heatmap).
10
+
11
+ ## Features
12
+
13
+ - Upload CSV files or load data from a URL.
14
+ - Display the uploaded dataset.
15
+ - Show summary statistics and data types.
16
+ - Filter rows based on user-defined conditions.
17
+ - Perform operations on columns.
18
+ - Visualize data with various types of plots (Histogram, Box Plot, Pie Chart, Scatter Plot, Heatmap).
19
+ - Transform data.
20
+
21
+ ## Detailed Installation Instructions
22
+
23
+ 1. Install the required packages:
24
+ The project's dependencies are listed in the 'requirements.txt' file. You can install all of them using pip:
25
+ ```
26
+ pip install -r requirements.txt
27
+ ```
28
+ 2. Run the application:
29
+ Now, you're ready to run the application. Use the following command to start the Streamlit server:
30
+ ```
31
+ streamlit run app.py
32
+ ```
33
+
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from data_loader import DataLoader
3
+ from data_analyzer import DataAnalyzer
4
+ from data_filter import DataFilter
5
+ from data_transformer import DataTransformer
6
+ from data_visualizer import DataVisualizer
7
+
8
+ def main():
9
+ st.title('Dataset Explorer')
10
+
11
+ data_loader = DataLoader()
12
+ data = data_loader.load_data()
13
+
14
+ data_analyzer = DataAnalyzer(data)
15
+ data_analyzer.show_summary_statistics()
16
+ data_analyzer.show_data_types()
17
+
18
+ data_filter = DataFilter(data)
19
+ data = data_filter.filter_rows()
20
+
21
+ data_transformer = DataTransformer(data)
22
+ data = data_transformer.perform_column_operation()
23
+
24
+ data_visualizer = DataVisualizer(data)
25
+ data_visualizer.visualize_data()
26
+
27
+ if __name__ == "__main__":
28
+ main()
data_analyzer.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ class DataAnalyzer:
4
+ def __init__(self, data):
5
+ self.data = data
6
+
7
+ def show_summary_statistics(self):
8
+ if st.button('Show Summary Statistics'):
9
+ st.write(self.data.describe())
10
+
11
+ def show_data_types(self):
12
+ if st.button('Show Data Types'):
13
+ st.write(self.data.dtypes)
data_filter.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ class DataFilter:
4
+ def __init__(self, data):
5
+ self.data = data
6
+
7
+ def filter_rows(self):
8
+ filter_condition = st.sidebar.text_input('Filter rows (e.g., age > 30)')
9
+ if filter_condition:
10
+ self.data = self.data.query(filter_condition)
11
+ st.write(self.data)
12
+ return self.data
data_loader.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ class DataLoader:
5
+ def __init__(self):
6
+ self.data = pd.DataFrame() # Initialize data as an empty DataFrame
7
+
8
+ def load_data(self):
9
+ data_source = st.selectbox('Select data source', ['Upload a CSV file', 'Input a URL'])
10
+ if data_source == 'Upload a CSV file':
11
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
12
+ if uploaded_file is not None:
13
+ self.data = pd.read_csv(uploaded_file)
14
+ elif data_source == 'Input a URL':
15
+ url = st.text_input('Enter the URL of a CSV file')
16
+ if url:
17
+ try:
18
+ self.data = pd.read_csv(url)
19
+ except:
20
+ st.error('Could not load data from the provided URL. Please make sure the URL is correct and points to a CSV file.')
21
+ return self.data
data_transformer.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ class DataTransformer:
5
+ def __init__(self, data):
6
+ self.data = data
7
+
8
+ def perform_column_operation(self):
9
+ column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)')
10
+ if column_operation:
11
+ column, operation = column_operation.split()
12
+ self.data[column] = self.data[column].apply(lambda x: eval(operation))
13
+ st.write(self.data)
14
+ return self.data
data_visualizer.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+
7
+ class DataVisualizer:
8
+ def __init__(self, data):
9
+ self.data = data
10
+
11
+ def visualize_data(self):
12
+ plot_type = st.selectbox('Choose a type of plot', ['Histogram', 'Box Plot', 'Pie Chart', 'Scatter Plot', 'Heatmap'])
13
+ if plot_type == 'Histogram':
14
+ numeric_columns = self.data.select_dtypes(include=[np.number]).columns
15
+ if numeric_columns.empty:
16
+ st.warning('No numeric columns in the data to visualize.')
17
+ else:
18
+ column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
19
+ fig, ax = plt.subplots()
20
+ ax.hist(self.data[column_to_visualize])
21
+ st.pyplot(fig)
22
+ elif plot_type == 'Box Plot':
23
+ numeric_columns = self.data.select_dtypes(include=[np.number]).columns
24
+ if numeric_columns.empty:
25
+ st.warning('No numeric columns in the data to visualize.')
26
+ else:
27
+ column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
28
+ fig, ax = plt.subplots()
29
+ ax.boxplot(self.data[column_to_visualize].dropna())
30
+ st.pyplot(fig)
31
+ elif plot_type == 'Pie Chart':
32
+ column_to_visualize = st.selectbox('Choose a column to visualize', self.data.select_dtypes(include=['object']).columns)
33
+ fig, ax = plt.subplots()
34
+ self.data[column_to_visualize].value_counts().plot(kind='pie', ax=ax, autopct='%1.1f%%', textprops={'fontsize': 'small'})
35
+ st.pyplot(fig)
36
+ elif plot_type == 'Scatter Plot':
37
+ columns_to_visualize = st.multiselect('Choose two columns to visualize', self.data.select_dtypes(include=[np.number]).columns)
38
+ if len(columns_to_visualize) != 2:
39
+ st.warning('Please select exactly two columns for scatter plot.')
40
+ else:
41
+ fig, ax = plt.subplots()
42
+ ax.scatter(self.data[columns_to_visualize[0]], self.data[columns_to_visualize[1]])
43
+ st.pyplot(fig)
44
+ elif plot_type == 'Heatmap':
45
+ numeric_data = self.data.select_dtypes(include=[np.number])
46
+ corr = numeric_data.corr()
47
+ fig, ax = plt.subplots()
48
+ sns.heatmap(corr, annot=True, ax=ax)
49
+ st.pyplot(fig)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn