Spaces:
Sleeping
Sleeping
File size: 5,279 Bytes
af0168b a0155bf af0168b ed7b72b af0168b e827164 ad6eb22 3a7810d a0155bf af0168b ad6eb22 d14ee80 a0155bf b3e3dbd 61e1f0d 411dde3 3150207 7c20203 ce41758 a0155bf 7c20203 a0155bf 7c20203 d14ee80 7c20203 b12237c 3a7810d 7e93210 a0155bf 7c20203 7e93210 3a7810d ad2d033 7c20203 80a90b2 7c20203 d14ee80 577321a 3511f77 7c20203 ce41758 3a7810d ce41758 7c20203 ad6eb22 6b5f571 e5d5423 6b5f571 ed7b72b 6b5f571 1df83f9 6b5f571 1df83f9 6b5f571 1df83f9 ad6eb22 7c20203 0459303 3511f77 0459303 e827164 a0155bf 3511f77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
#---IMPORTS---
import streamlit as st
import numpy as np
import pandas as pd
import os
from streamlit_option_menu import option_menu
import pickle
#---MODULES IMPORT---
from Modules.data_loader import DataLoader
from Modules.data_analyzer import DataAnalyzer
from Modules.data_filter import DataFilter
from Modules.data_transformer import DataTransformer
from Modules.data_visualizer import DataVisualizer
from Modules.data_QA import DataQA
from Modules.MLtoolkit import MLToolkit
from Modules.llm_summary import LLM_summary
#---SKLEARN-IMPORT---
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, mean_absolute_error
def main():
st.title('Insights 📶')
data = pd.DataFrame()
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if st.button('Load Data'):
data_loader = DataLoader()
data_loader.load_data(uploaded_file)
try:
data = pd.read_csv("data.csv")
with st.sidebar:
selected = option_menu(
menu_title="Main Menu",
options=["Data Loader", "Exploratory Data Analysis", "Data Cleaning", "Q/A", "MLtoolkit"])
# --- DATA LOADER ---
if selected == "Data Loader":
st.toast("Data Loaded")
st.write(data.head())
# --- EDA ---
if selected == "Exploratory Data Analysis":
data = pd.read_csv("data.csv")
data_analyzer = DataAnalyzer(data)
data_analyzer.show_eda()
LLM_summary(data)
data_analyzer.show_count_plots(data)
data_visualizer = DataVisualizer(data)
data_visualizer.suggestions(data)
# data_visualizer.generate_viz()
data_visualizer.visualize_data()
# --- DATA CLEANING ---
if selected == "Data Cleaning":
st.header("Data Cleaning")
data_transformer = DataTransformer(data)
# modified_data = data_transformer.perform_column_operation()
data = data_transformer.handle_null()
data = data_transformer.categorical_to_numerical()
data = data_transformer.remove_columns()
# data_filter = DataFilter(modified_data)
# data = data_filter.filter_rows()
# --- QUESTION AND ANSWER ---
if selected == "Q/A":
try:
data_QA = DataQA()
data_QA.answer_query()
except Exception as e:
# Handle the exception (e.g., logging, printing an error message, etc.)
print(f"An error occurred: {e}")
if selected == "MLtoolkit":
try:
ml_toolkit = MLToolkit(data)
algorithm, algorithm_type = ml_toolkit.select_algorithm()
X, Y = ml_toolkit.select_features_and_target()
if (algorithm_type == "Regressor") and (algorithm == 'Decision Tree' or algorithm == 'Random Forest' or algorithm == "Linear Regression"):
params = ml_toolkit.add_parameter_regressor()
else:
params = ml_toolkit.add_parameter_classifier_general()
if algorithm_type == "Regressor":
algo_model = ml_toolkit.model_regressor(params)
else:
algo_model = ml_toolkit.model_classifier(params)
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.8)
algo_model.fit(x_train, y_train)
predict = algo_model.predict(x_test)
if algorithm != 'Linear Regression' and algorithm_type != 'Regressor':
st.write("Training Accuracy is:", algo_model.score(x_train, y_train) * 100)
st.write("Testing Accuracy is:", accuracy_score(y_test, predict) * 100)
else:
st.write("Mean Squared error is:", mean_squared_error(y_test, predict))
st.write("Mean Absolute error is:", mean_absolute_error(y_test, predict))
model_bytes = pickle.dumps(algo_model)
st.download_button(label="Download pickled model",
data=model_bytes,
file_name="model.pkl")
except ValueError as e:
error_message = str(e)
st.error("Value Error: "+error_message)
except TypeError as e:
error_message = str(e)
st.error("Type Error: "+error_message)
except Exception as e:
error_message = str(e)
st.error(error_message)
# st.write("An error occurred:", e)
# --- DATA PARTY ---
if selected == "Data Party":
st.write("To be Added:)")
except Exception as e:
# st.write("Please upload a csv file")
print(e)
if __name__ == "__main__":
main()
# TO DO:
# 1. automate categorical to numerical conversion
# 2. toggle btn for data (original and modified)
# 3. ask to save modified data before saving
# 4. streamline prompts in llm_summary
# 5. ml models |