Spaces:

AtharvaThakur
/

Insights

Sleeping

App Files Files Community

Atharva Thakur commited on Feb 27, 2024

Commit

a0155bf

0 Parent(s):

Initial Commit

Browse files

Files changed (9) hide show

.gitignore +161 -0
README.md +33 -0
app.py +28 -0
data_analyzer.py +13 -0
data_filter.py +12 -0
data_loader.py +21 -0
data_transformer.py +14 -0
data_visualizer.py +49 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,161 @@

+.aider*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+# Insights
+## Modules
+- `DataLoader`: Handles the loading of data either by uploading a CSV file or inputting a URL to a CSV file.
+- `DataAnalyzer`: Provides summary statistics and data types of the loaded dataset.
+- `DataFilter`: Allows users to filter rows based on user-defined conditions.
+- `DataTransformer`: Enables users to perform operations on columns.
+- `DataVisualizer`: Visualizes data with various types of plots (Histogram, Box Plot, Pie Chart, Scatter Plot, Heatmap).
+## Features
+- Upload CSV files or load data from a URL.
+- Display the uploaded dataset.
+- Show summary statistics and data types.
+- Filter rows based on user-defined conditions.
+- Perform operations on columns.
+- Visualize data with various types of plots (Histogram, Box Plot, Pie Chart, Scatter Plot, Heatmap).
+- Transform data.
+## Detailed Installation Instructions
+1. Install the required packages:
+   The project's dependencies are listed in the 'requirements.txt' file. You can install all of them using pip:
+   ```
+   pip install -r requirements.txt
+   ```
+2. Run the application:
+   Now, you're ready to run the application. Use the following command to start the Streamlit server:
+   ```
+   streamlit run app.py
+   ```

app.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import streamlit as st
+from data_loader import DataLoader
+from data_analyzer import DataAnalyzer
+from data_filter import DataFilter
+from data_transformer import DataTransformer
+from data_visualizer import DataVisualizer
+def main():
+    st.title('Dataset Explorer')
+    data_loader = DataLoader()
+    data = data_loader.load_data()
+    data_analyzer = DataAnalyzer(data)
+    data_analyzer.show_summary_statistics()
+    data_analyzer.show_data_types()
+    data_filter = DataFilter(data)
+    data = data_filter.filter_rows()
+    data_transformer = DataTransformer(data)
+    data = data_transformer.perform_column_operation()
+    data_visualizer = DataVisualizer(data)
+    data_visualizer.visualize_data()
+if __name__ == "__main__":
+    main()

data_analyzer.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import streamlit as st
+class DataAnalyzer:
+    def __init__(self, data):
+        self.data = data
+    def show_summary_statistics(self):
+        if st.button('Show Summary Statistics'):
+            st.write(self.data.describe())
+    def show_data_types(self):
+        if st.button('Show Data Types'):
+            st.write(self.data.dtypes)

data_filter.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import streamlit as st
+class DataFilter:
+    def __init__(self, data):
+        self.data = data
+    def filter_rows(self):
+        filter_condition = st.sidebar.text_input('Filter rows (e.g., age > 30)')
+        if filter_condition:
+            self.data = self.data.query(filter_condition)
+            st.write(self.data)
+        return self.data

data_loader.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import streamlit as st
+import pandas as pd
+class DataLoader:
+    def __init__(self):
+        self.data = pd.DataFrame()  # Initialize data as an empty DataFrame
+    def load_data(self):
+        data_source = st.selectbox('Select data source', ['Upload a CSV file', 'Input a URL'])
+        if data_source == 'Upload a CSV file':
+            uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+            if uploaded_file is not None:
+                self.data = pd.read_csv(uploaded_file)
+        elif data_source == 'Input a URL':
+            url = st.text_input('Enter the URL of a CSV file')
+            if url:
+                try:
+                    self.data = pd.read_csv(url)
+                except:
+                    st.error('Could not load data from the provided URL. Please make sure the URL is correct and points to a CSV file.')
+        return self.data

data_transformer.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+import pandas as pd
+class DataTransformer:
+    def __init__(self, data):
+        self.data = data
+    def perform_column_operation(self):
+        column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)')
+        if column_operation:
+            column, operation = column_operation.split()
+            self.data[column] = self.data[column].apply(lambda x: eval(operation))
+            st.write(self.data)
+        return self.data

data_visualizer.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+class DataVisualizer:
+    def __init__(self, data):
+        self.data = data
+    def visualize_data(self):
+        plot_type = st.selectbox('Choose a type of plot', ['Histogram', 'Box Plot', 'Pie Chart', 'Scatter Plot', 'Heatmap'])
+        if plot_type == 'Histogram':
+            numeric_columns = self.data.select_dtypes(include=[np.number]).columns
+            if numeric_columns.empty:
+                st.warning('No numeric columns in the data to visualize.')
+            else:
+                column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
+                fig, ax = plt.subplots()
+                ax.hist(self.data[column_to_visualize])
+                st.pyplot(fig)
+        elif plot_type == 'Box Plot':
+            numeric_columns = self.data.select_dtypes(include=[np.number]).columns
+            if numeric_columns.empty:
+                st.warning('No numeric columns in the data to visualize.')
+            else:
+                column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
+                fig, ax = plt.subplots()
+                ax.boxplot(self.data[column_to_visualize].dropna())
+                st.pyplot(fig)
+        elif plot_type == 'Pie Chart':
+            column_to_visualize = st.selectbox('Choose a column to visualize', self.data.select_dtypes(include=['object']).columns)
+            fig, ax = plt.subplots()
+            self.data[column_to_visualize].value_counts().plot(kind='pie', ax=ax, autopct='%1.1f%%', textprops={'fontsize': 'small'})
+            st.pyplot(fig)
+        elif plot_type == 'Scatter Plot':
+            columns_to_visualize = st.multiselect('Choose two columns to visualize', self.data.select_dtypes(include=[np.number]).columns)
+            if len(columns_to_visualize) != 2:
+                st.warning('Please select exactly two columns for scatter plot.')
+            else:
+                fig, ax = plt.subplots()
+                ax.scatter(self.data[columns_to_visualize[0]], self.data[columns_to_visualize[1]])
+                st.pyplot(fig)
+        elif plot_type == 'Heatmap':
+            numeric_data = self.data.select_dtypes(include=[np.number])
+            corr = numeric_data.corr()
+            fig, ax = plt.subplots()
+            sns.heatmap(corr, annot=True, ax=ax)
+            st.pyplot(fig)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+pandas
+numpy
+matplotlib
+seaborn