Spaces:
Sleeping
Sleeping
Merge remote-tracking branch 'origin/main' into LLMdataparty
Browse files- .github/workflows/StreamlitTesting.yml +38 -0
- .gitignore +2 -0
- Experiment.py +57 -0
- test.py → Experiments.py +0 -0
- app.py +58 -13
- data.csv +5 -0
- data_analyzer.py +56 -3
- data_loader.py +10 -16
- data_transformer.py +38 -0
- data_visualizer.py +33 -8
- requirements.txt +4 -1
- test_app.py +9 -0
.github/workflows/StreamlitTesting.yml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Python application
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: [ "main" ]
|
6 |
+
pull_request:
|
7 |
+
|
8 |
+
permissions:
|
9 |
+
contents: read
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
build:
|
13 |
+
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
|
16 |
+
steps:
|
17 |
+
- uses: actions/checkout@v3
|
18 |
+
- name: Set up Python 3.10
|
19 |
+
uses: actions/setup-python@v3
|
20 |
+
with:
|
21 |
+
python-version: "3.10"
|
22 |
+
- name: Install dependencies
|
23 |
+
run: |
|
24 |
+
python -m pip install --upgrade pip
|
25 |
+
pip install flake8 pytest
|
26 |
+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
27 |
+
- name: Set up environment variables
|
28 |
+
run: |
|
29 |
+
echo "GOOGLE_API_KEY=${{ secrets.GOOGLE_API_KEY }}" >> $GITHUB_ENV
|
30 |
+
- name: Lint with flake8
|
31 |
+
run: |
|
32 |
+
# stop the build if there are Python syntax errors or undefined names
|
33 |
+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
34 |
+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
35 |
+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
36 |
+
- name: Test with pytest
|
37 |
+
run: |
|
38 |
+
pytest -vv
|
.gitignore
CHANGED
@@ -9,6 +9,8 @@ __pycache__/
|
|
9 |
|
10 |
# data set
|
11 |
data.csv
|
|
|
|
|
12 |
#Env variables
|
13 |
.env
|
14 |
# Distribution / packaging
|
|
|
9 |
|
10 |
# data set
|
11 |
data.csv
|
12 |
+
original_data.csv
|
13 |
+
|
14 |
#Env variables
|
15 |
.env
|
16 |
# Distribution / packaging
|
Experiment.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Function to upload dataset
|
6 |
+
def upload_dataset():
|
7 |
+
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
|
8 |
+
if uploaded_file is not None:
|
9 |
+
df = pd.read_csv(uploaded_file)
|
10 |
+
return df
|
11 |
+
|
12 |
+
# Function to impute null values
|
13 |
+
def impute_null(df):
|
14 |
+
# Implement your logic for null value imputation
|
15 |
+
col = st.multiselect('Choose columns to impute nulls', df.select_dtypes(include=[np.number]).columns)
|
16 |
+
option = st.selectbox('Impute nulls with', ('mean', 'mode', '0'))
|
17 |
+
if st.button('Impute Null'):
|
18 |
+
if option == "mean":
|
19 |
+
df[col] = df[col].fillna(df[col].mean())
|
20 |
+
elif option == "mode":
|
21 |
+
df[col] = df[col].fillna(df[col].mode().iloc[0]) # mode() returns a DataFrame, so we select the first row
|
22 |
+
elif option == "0":
|
23 |
+
df[col] = df[col].fillna(0)
|
24 |
+
st.success("Null values filled")
|
25 |
+
return df
|
26 |
+
|
27 |
+
# Function to display transformed data
|
28 |
+
def display_data(df):
|
29 |
+
st.write(df)
|
30 |
+
|
31 |
+
def main():
|
32 |
+
st.title("Data Transformation App")
|
33 |
+
|
34 |
+
# Step 1: Upload Dataset
|
35 |
+
st.sidebar.title("Upload Dataset")
|
36 |
+
df = upload_dataset()
|
37 |
+
|
38 |
+
if df is not None:
|
39 |
+
# Step 2: Perform Data Transformation
|
40 |
+
st.sidebar.title("Data Transformation")
|
41 |
+
if st.sidebar.button("Impute Null Values"):
|
42 |
+
df = impute_null(df)
|
43 |
+
st.success("Null values imputed successfully!")
|
44 |
+
|
45 |
+
# Step 3: Display Transformed Data
|
46 |
+
st.sidebar.title("Transformed Data")
|
47 |
+
if st.sidebar.checkbox("Show Transformed Data"):
|
48 |
+
display_data(df)
|
49 |
+
|
50 |
+
# Step 4: Store Transformed Data
|
51 |
+
# You can store the transformed data in a variable or a data structure here
|
52 |
+
|
53 |
+
# Step 5: Use Transformed Data
|
54 |
+
# You can utilize the transformed data for further analysis, visualization, etc.
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
main()
|
test.py → Experiments.py
RENAMED
File without changes
|
app.py
CHANGED
@@ -5,27 +5,72 @@ from data_filter import DataFilter
|
|
5 |
from data_transformer import DataTransformer
|
6 |
from data_visualizer import DataVisualizer
|
7 |
from data_QA import DataQA
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
def main():
|
10 |
st.title('Insights 📶')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
|
|
|
|
24 |
|
25 |
-
|
26 |
-
|
|
|
27 |
|
28 |
-
data_QA = DataQA(uploaded_file)
|
29 |
-
data_QA.ask_csv()
|
30 |
if __name__ == "__main__":
|
31 |
main()
|
|
|
5 |
from data_transformer import DataTransformer
|
6 |
from data_visualizer import DataVisualizer
|
7 |
from data_QA import DataQA
|
8 |
+
import os
|
9 |
+
from streamlit_option_menu import option_menu
|
10 |
+
|
11 |
+
|
12 |
+
import pandas as pd
|
13 |
|
14 |
def main():
|
15 |
st.title('Insights 📶')
|
16 |
+
data = pd.DataFrame()
|
17 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
18 |
+
if st.button('Load Data'):
|
19 |
+
data_loader = DataLoader()
|
20 |
+
data_loader.load_data(uploaded_file)
|
21 |
+
try:
|
22 |
+
data = pd.read_csv("data.csv")
|
23 |
+
except:
|
24 |
+
st.write("Please upload a csv file")
|
25 |
+
if os.path.getsize("data.csv") != 0:
|
26 |
+
with st.sidebar:
|
27 |
+
selected = option_menu(
|
28 |
+
menu_title="Main Menu",
|
29 |
+
options=["Data Loader", "Exploratory Data Analysis", "Data Cleaning", "Q/A", "Data Party"])
|
30 |
+
|
31 |
+
# --- DATA LOADER ---
|
32 |
+
if selected == "Data Loader":
|
33 |
+
st.toast("Data Loaded")
|
34 |
+
st.write(data.head())
|
35 |
+
|
36 |
+
# --- EDA ---
|
37 |
+
if selected == "Exploratory Data Analysis":
|
38 |
+
data = pd.read_csv("data.csv")
|
39 |
+
data_analyzer = DataAnalyzer(data)
|
40 |
+
data_analyzer.show_eda()
|
41 |
+
data_analyzer.show_null_value_statistics()
|
42 |
+
data_analyzer.show_count_plots()
|
43 |
+
data_analyzer.show_summary_statistics()
|
44 |
|
45 |
+
data_visualizer = DataVisualizer(data)
|
46 |
+
data_visualizer.visualize_data()
|
47 |
|
48 |
+
# --- DATA CLEANING ---
|
49 |
+
if selected == "Data Cleaning":
|
50 |
+
data_transformer = DataTransformer(data)
|
51 |
+
|
52 |
+
modified_data = data_transformer.perform_column_operation()
|
53 |
+
modified_data = data_transformer.remove_null()
|
54 |
+
modified_data = data_transformer.impute_null()
|
55 |
+
data = modified_data
|
56 |
+
data_analyzer = DataAnalyzer(data)
|
57 |
+
data_analyzer.show_null_value_statistics()
|
58 |
+
new_data_analyzer = DataAnalyzer(modified_data)
|
59 |
+
data_analyzer.show_null_value_statistics()
|
60 |
|
61 |
+
# modified_data = data_transformer.remove_columns()
|
62 |
+
|
63 |
+
# data_filter = DataFilter(modified_data)
|
64 |
+
# data = data_filter.filter_rows()
|
65 |
|
66 |
+
# --- QUESTION AND ANSWER ---
|
67 |
+
if selected == "Q/A":
|
68 |
+
data_QA = DataQA(data)
|
69 |
+
data_QA.ask_csv()
|
70 |
|
71 |
+
# --- DATA PARTY ---
|
72 |
+
if selected == "Data Party":
|
73 |
+
st.write("To be continued... :)")
|
74 |
|
|
|
|
|
75 |
if __name__ == "__main__":
|
76 |
main()
|
data.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Name,Age,Email
|
2 |
+
John Doe,30,johndoe@example.com
|
3 |
+
Jane Smith,25,janesmith@example.com
|
4 |
+
Michael Johnson,35,michaeljohnson@example.com
|
5 |
+
Emily Brown,28,emilybrown@example.com
|
data_analyzer.py
CHANGED
@@ -1,13 +1,66 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
2 |
|
3 |
class DataAnalyzer:
|
4 |
def __init__(self, data):
|
5 |
self.data = data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def show_summary_statistics(self):
|
8 |
if st.button('Show Summary Statistics'):
|
9 |
st.write(self.data.describe())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
|
6 |
class DataAnalyzer:
|
7 |
def __init__(self, data):
|
8 |
self.data = data
|
9 |
+
st.header("Exploratory Data Analysis")
|
10 |
+
|
11 |
+
def show_eda(self):
|
12 |
+
st.write("Number of rows:", self.data.shape[0])
|
13 |
+
st.write("Number of columns:", self.data.shape[1])
|
14 |
+
columns_by_dtype = {}
|
15 |
+
for column_name, dtype in self.data.dtypes.items():
|
16 |
+
dtype_str = str(dtype)
|
17 |
+
if dtype_str not in columns_by_dtype:
|
18 |
+
columns_by_dtype[dtype_str] = [column_name]
|
19 |
+
else:
|
20 |
+
columns_by_dtype[dtype_str].append(column_name)
|
21 |
+
col_type_df = []
|
22 |
+
for dtype, columns in columns_by_dtype.items():
|
23 |
+
col_type_df.append([dtype, ', '.join(columns)])
|
24 |
+
df = pd.DataFrame(col_type_df, columns=["Data Type", "Column Names"])
|
25 |
+
st.subheader("Columns by Data Type")
|
26 |
+
st.dataframe(df, hide_index=True, use_container_width=True)
|
27 |
|
28 |
def show_summary_statistics(self):
|
29 |
if st.button('Show Summary Statistics'):
|
30 |
st.write(self.data.describe())
|
31 |
+
st.write(self.data.describe(include=object))
|
32 |
+
|
33 |
+
def show_null_value_statistics(self):
|
34 |
+
st.subheader("Null Value Statistics")
|
35 |
+
null_counts = self.data.isnull().sum()
|
36 |
+
total_null = null_counts.sum()
|
37 |
+
total_rows = self.data.shape[0]
|
38 |
+
null_percentages = (null_counts / total_rows) * 100
|
39 |
+
null_stats_df = pd.DataFrame({
|
40 |
+
'Column Name': null_counts.index,
|
41 |
+
'Null Values': null_counts.values,
|
42 |
+
'Percentage Null': null_percentages.values
|
43 |
+
})
|
44 |
+
null_stats_df.loc[len(null_stats_df)] = ['Total', total_null, (total_null / (total_rows * self.data.shape[1])) * 100]
|
45 |
+
st.dataframe(null_stats_df, hide_index=True, use_container_width=True)
|
46 |
+
|
47 |
+
def show_count_plots(self):
|
48 |
+
st.subheader("Count Plots")
|
49 |
+
sns.set(style="whitegrid")
|
50 |
+
|
51 |
+
for column_name in self.data.columns:
|
52 |
+
unique_values = self.data[column_name].nunique()
|
53 |
+
|
54 |
+
if unique_values <= 12:
|
55 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
56 |
+
sns.countplot(data=self.data, x=column_name, ax=ax)
|
57 |
+
ax.set_title(f'Count Plot of {column_name}')
|
58 |
+
ax.set_xticklabels(ax.get_xticklabels())
|
59 |
+
st.pyplot(fig)
|
60 |
|
61 |
+
else:
|
62 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
63 |
+
sns.histplot(data=self.data, x=column_name, bins=20, ax=ax)
|
64 |
+
ax.set_title(f'Histogram of {column_name}')
|
65 |
+
ax.set_xlabel(column_name)
|
66 |
+
st.pyplot(fig)
|
data_loader.py
CHANGED
@@ -3,21 +3,15 @@ import pandas as pd
|
|
3 |
|
4 |
class DataLoader:
|
5 |
def __init__(self):
|
6 |
-
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
if
|
11 |
-
|
12 |
if uploaded_file is not None:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
if url:
|
19 |
-
try:
|
20 |
-
self.data = pd.read_csv(url)
|
21 |
-
except:
|
22 |
-
st.error('Could not load data from the provided URL. Please make sure the URL is correct and points to a CSV file.')
|
23 |
-
return self.data,uploaded_file
|
|
|
3 |
|
4 |
class DataLoader:
|
5 |
def __init__(self):
|
6 |
+
pass
|
7 |
|
8 |
+
@st.cache_data(experimental_allow_widgets=True)
|
9 |
+
def load_data(_,uploaded_file):
|
10 |
+
if True:
|
11 |
+
data = pd.DataFrame()
|
12 |
if uploaded_file is not None:
|
13 |
+
data = pd.read_csv(uploaded_file)
|
14 |
+
data.to_csv('./original_data.csv', index=False)
|
15 |
+
data.to_csv('./data.csv',index=False)
|
16 |
+
print("data loader ran once")
|
17 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
data_transformer.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
class DataTransformer:
|
5 |
def __init__(self, data):
|
6 |
self.data = data
|
|
|
|
|
7 |
|
8 |
def perform_column_operation(self):
|
9 |
column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)')
|
@@ -13,8 +16,43 @@ class DataTransformer:
|
|
13 |
st.write(self.data)
|
14 |
return self.data
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
#transformed data is not retained
|
17 |
#null values handling
|
18 |
#2 options - to remove or to impute that is the question
|
|
|
|
|
|
|
19 |
#give option to analyse the transformed dataset or save it.
|
20 |
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
|
5 |
class DataTransformer:
|
6 |
def __init__(self, data):
|
7 |
self.data = data
|
8 |
+
st.header("Data Cleaning")
|
9 |
+
st.divider()
|
10 |
|
11 |
def perform_column_operation(self):
|
12 |
column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)')
|
|
|
16 |
st.write(self.data)
|
17 |
return self.data
|
18 |
|
19 |
+
def remove_null(self):
|
20 |
+
st.header("Remove Null Values")
|
21 |
+
col = st.multiselect('Choose columns to remove nulls', self.data.columns)
|
22 |
+
if st.button('Remove Null'):
|
23 |
+
self.data.dropna(subset=col, inplace=True)
|
24 |
+
st.success("Null values removed")
|
25 |
+
return self.data
|
26 |
+
|
27 |
+
def impute_null(self):
|
28 |
+
st.header("Impute Null Values")
|
29 |
+
col = st.multiselect('Choose columns to impute nulls', self.data.select_dtypes(include=[np.number]).columns)
|
30 |
+
option = st.selectbox('Impute nulls with', ('mean', 'mode', '0'))
|
31 |
+
if st.button('Impute Null'):
|
32 |
+
if option == "mean":
|
33 |
+
self.data[col] = self.data[col].fillna(self.data[col].mean())
|
34 |
+
elif option == "mode":
|
35 |
+
self.data[col] = self.data[col].fillna(self.data[col].mode().iloc[0]) # mode() returns a DataFrame, so we select the first row
|
36 |
+
elif option == "0":
|
37 |
+
self.data[col] = self.data[col].fillna(0)
|
38 |
+
st.success("Null values filled")
|
39 |
+
self.data.to_csv("data.csv", index=False)
|
40 |
+
return self.data
|
41 |
+
|
42 |
+
def remove_columns(self):
|
43 |
+
st.header("Remove Columns")
|
44 |
+
col = st.multiselect('Choose columns to remove', self.data.columns)
|
45 |
+
if st.button('Remove Columns'):
|
46 |
+
self.data.drop(columns=col, inplace=True)
|
47 |
+
st.success("Columns removed")
|
48 |
+
return self.data
|
49 |
+
|
50 |
+
# PROBLEMS RESOLVED
|
51 |
#transformed data is not retained
|
52 |
#null values handling
|
53 |
#2 options - to remove or to impute that is the question
|
54 |
+
|
55 |
+
# PROBLEMS TO BE ADDRESSED
|
56 |
+
#categorical to numerical
|
57 |
#give option to analyse the transformed dataset or save it.
|
58 |
|
data_visualizer.py
CHANGED
@@ -7,9 +7,11 @@ import seaborn as sns
|
|
7 |
class DataVisualizer:
|
8 |
def __init__(self, data):
|
9 |
self.data = data
|
|
|
10 |
|
11 |
def visualize_data(self):
|
12 |
plot_type = st.selectbox('Choose a type of plot', ['Histogram', 'Box Plot', 'Pie Chart', 'Scatter Plot', 'Heatmap'])
|
|
|
13 |
if plot_type == 'Histogram':
|
14 |
numeric_columns = self.data.select_dtypes(include=[np.number]).columns
|
15 |
if numeric_columns.empty:
|
@@ -18,7 +20,11 @@ class DataVisualizer:
|
|
18 |
column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
|
19 |
fig, ax = plt.subplots()
|
20 |
ax.hist(self.data[column_to_visualize])
|
|
|
|
|
|
|
21 |
st.pyplot(fig)
|
|
|
22 |
elif plot_type == 'Box Plot':
|
23 |
numeric_columns = self.data.select_dtypes(include=[np.number]).columns
|
24 |
if numeric_columns.empty:
|
@@ -27,23 +33,42 @@ class DataVisualizer:
|
|
27 |
column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
|
28 |
fig, ax = plt.subplots()
|
29 |
ax.boxplot(self.data[column_to_visualize].dropna())
|
|
|
|
|
30 |
st.pyplot(fig)
|
|
|
31 |
elif plot_type == 'Pie Chart':
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
elif plot_type == 'Scatter Plot':
|
37 |
-
|
38 |
-
|
39 |
-
st.
|
|
|
|
|
|
|
|
|
40 |
else:
|
41 |
fig, ax = plt.subplots()
|
42 |
-
ax.scatter(self.data[
|
|
|
|
|
|
|
43 |
st.pyplot(fig)
|
|
|
44 |
elif plot_type == 'Heatmap':
|
45 |
numeric_data = self.data.select_dtypes(include=[np.number])
|
46 |
corr = numeric_data.corr()
|
47 |
fig, ax = plt.subplots()
|
48 |
sns.heatmap(corr, annot=True, ax=ax)
|
|
|
49 |
st.pyplot(fig)
|
|
|
7 |
class DataVisualizer:
|
8 |
def __init__(self, data):
|
9 |
self.data = data
|
10 |
+
st.subheader("Data Visualizer")
|
11 |
|
12 |
def visualize_data(self):
|
13 |
plot_type = st.selectbox('Choose a type of plot', ['Histogram', 'Box Plot', 'Pie Chart', 'Scatter Plot', 'Heatmap'])
|
14 |
+
|
15 |
if plot_type == 'Histogram':
|
16 |
numeric_columns = self.data.select_dtypes(include=[np.number]).columns
|
17 |
if numeric_columns.empty:
|
|
|
20 |
column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
|
21 |
fig, ax = plt.subplots()
|
22 |
ax.hist(self.data[column_to_visualize])
|
23 |
+
ax.set_title(f'Histogram of {column_to_visualize}')
|
24 |
+
ax.set_xlabel(column_to_visualize)
|
25 |
+
ax.set_ylabel('Frequency')
|
26 |
st.pyplot(fig)
|
27 |
+
|
28 |
elif plot_type == 'Box Plot':
|
29 |
numeric_columns = self.data.select_dtypes(include=[np.number]).columns
|
30 |
if numeric_columns.empty:
|
|
|
33 |
column_to_visualize = st.selectbox('Choose a column to visualize', numeric_columns)
|
34 |
fig, ax = plt.subplots()
|
35 |
ax.boxplot(self.data[column_to_visualize].dropna())
|
36 |
+
ax.set_title(f'Box Plot of {column_to_visualize}')
|
37 |
+
ax.set_ylabel(column_to_visualize)
|
38 |
st.pyplot(fig)
|
39 |
+
|
40 |
elif plot_type == 'Pie Chart':
|
41 |
+
nonnumeric_columns = self.data.select_dtypes(include=['object']).columns
|
42 |
+
if nonnumeric_columns.empty:
|
43 |
+
st.warning('No non numeric columns in the data to visualize.')
|
44 |
+
else:
|
45 |
+
column_to_visualize = st.selectbox('Choose a column to visualize', nonnumeric_columns)
|
46 |
+
fig, ax = plt.subplots()
|
47 |
+
self.data[column_to_visualize].value_counts().plot(kind='pie', ax=ax, autopct='%1.1f%%', textprops={'fontsize': 'small'})
|
48 |
+
ax.set_title(f'Pie Chart of {column_to_visualize}')
|
49 |
+
ax.set_ylabel('')
|
50 |
+
st.pyplot(fig)
|
51 |
+
|
52 |
elif plot_type == 'Scatter Plot':
|
53 |
+
left, right = st.columns(2)
|
54 |
+
with left:
|
55 |
+
x_col = st.selectbox('Choose values on X axis', self.data.select_dtypes(include=[np.number]).columns)
|
56 |
+
with right:
|
57 |
+
y_col = st.selectbox('Choose values on Y axis', self.data.select_dtypes(include=[np.number]).columns)
|
58 |
+
if x_col == y_col:
|
59 |
+
st.warning('Please select two different columns for scatter plot.')
|
60 |
else:
|
61 |
fig, ax = plt.subplots()
|
62 |
+
ax.scatter(self.data[x_col], self.data[y_col])
|
63 |
+
ax.set_title(f'Scatter Plot of {x_col} vs {y_col}')
|
64 |
+
ax.set_xlabel(x_col)
|
65 |
+
ax.set_ylabel(y_col)
|
66 |
st.pyplot(fig)
|
67 |
+
|
68 |
elif plot_type == 'Heatmap':
|
69 |
numeric_data = self.data.select_dtypes(include=[np.number])
|
70 |
corr = numeric_data.corr()
|
71 |
fig, ax = plt.subplots()
|
72 |
sns.heatmap(corr, annot=True, ax=ax)
|
73 |
+
ax.set_title('Correlation Heatmap')
|
74 |
st.pyplot(fig)
|
requirements.txt
CHANGED
@@ -7,4 +7,7 @@ langchain-google-genai
|
|
7 |
langchain-experimental
|
8 |
python-dotenv
|
9 |
tabulate
|
10 |
-
litellm
|
|
|
|
|
|
|
|
7 |
langchain-experimental
|
8 |
python-dotenv
|
9 |
tabulate
|
10 |
+
litellm
|
11 |
+
streamlit_option_menu
|
12 |
+
pytest
|
13 |
+
|
test_app.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from streamlit.testing.v1 import AppTest
|
2 |
+
|
3 |
+
def test_smoke():
|
4 |
+
"""Basic smoke test"""
|
5 |
+
at = AppTest.from_file("app.py", default_timeout=10).run()
|
6 |
+
# Supported elements are primarily exposed as properties on the script
|
7 |
+
# results object, which returns a sequence of that element.
|
8 |
+
assert not at.exception
|
9 |
+
|