import streamlit as st import pandas as pd import plotly.express as px import numpy as np from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.cluster import KMeans from sklearn.metrics import accuracy_score, r2_score, silhouette_score, confusion_matrix, classification_report, mean_squared_error from sklearn.preprocessing import StandardScaler from ydata_profiling import ProfileReport from streamlit_pandas_profiling import st_profile_report from groq import Groq from langchain_community.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain_community.document_loaders import TextLoader from langchain_community.tools.tavily_search import TavilySearchResults import os from dotenv import load_dotenv import tempfile # Load environment variables load_dotenv() # Initialize Groq client client = Groq(api_key=os.getenv("GROQ_API_KEY")) # Initialize embeddings for FAISS embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Set page config st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide") # Updated Custom CSS with a modern feel st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if 'metrics' not in st.session_state: st.session_state.metrics = {} if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'vector_store' not in st.session_state: st.session_state.vector_store = None if 'tavily_api_key' not in st.session_state: st.session_state.tavily_api_key = None # Helper Functions def convert_df_to_text(df): text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n" text += f"Missing Values: {df.isna().sum().sum()}\n" text += "Columns:\n" for col in df.columns: text += f"- {col} ({df[col].dtype}): " if pd.api.types.is_numeric_dtype(df[col]): text += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}" else: text += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}" text += f", Missing={df[col].isna().sum()}\n" return text def create_vector_store(df_text): with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file: temp_file.write(df_text) temp_path = temp_file.name loader = TextLoader(temp_path) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) texts = text_splitter.split_documents(documents) vector_store = FAISS.from_documents(texts, embeddings) os.unlink(temp_path) return vector_store def get_groq_response(prompt, mode, use_web_search=False): context = "" if st.session_state.vector_store: docs = st.session_state.vector_store.similarity_search(prompt, k=3) context = "\n\nDataset Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs]) if use_web_search and st.session_state.tavily_api_key: os.environ["TAVILY_API_KEY"] = st.session_state.tavily_api_key tavily = TavilySearchResults(max_results=3) web_results = tavily.invoke(prompt) context += "\n\nWeb Search Results:\n" + "\n".join([f"- {res['content'][:200]}..." for res in web_results]) prompts = { "Legal": "You are a neural network expert specializing in legal data analysis.", "Financial": "You are a neural network expert specializing in financial data analysis.", "Academic": "You are a neural network expert specializing in academic data analysis.", "Technical": "You are a neural network expert specializing in technical data analysis." } system_prompt = prompts.get(mode, "You are a neural network development assistant.") + "\n" + context response = client.chat.completions.create( model="llama3-70b-8192", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], temperature=0.7, max_tokens=1024 ) return response.choices[0].message.content # Visualization Functions def plot_confusion_matrix(y_true, y_pred): cm = confusion_matrix(y_true, y_pred) fig = px.imshow(cm, text_auto=True, color_continuous_scale='Blues', title="Confusion Matrix") return fig def plot_feature_importance(model, X): if hasattr(model, 'feature_importances_'): importance = model.feature_importances_ else: importance = np.abs(model.coef_) if hasattr(model, 'coef_') else np.ones(X.shape[1]) fig = px.bar(x=X.columns, y=importance, title="Feature Importance") return fig def plot_residuals(y_true, y_pred): residuals = y_true - y_pred fig = px.scatter(x=y_pred, y=residuals, title="Residual Plot", labels={"x": "Predicted", "y": "Residuals"}) return fig def plot_clusters(X, labels): fig = px.scatter(X, x=X.columns[0], y=X.columns[1], color=labels, title="Cluster Visualization") return fig # Pages def data_upload_page(): st.header("📤 Data Upload & Analysis") uploaded_file = st.file_uploader("Upload Dataset", type=["csv"]) if uploaded_file: df = pd.read_csv(uploaded_file) st.session_state.df = df st.session_state.vector_store = create_vector_store(convert_df_to_text(df)) st.session_state.metrics = {} st.subheader("Dataset Health Check") col1, col2, col3 = st.columns(3) col1.metric("Total Samples", df.shape[0]) col2.metric("Features", df.shape[1]) col3.metric("Missing Values", df.isna().sum().sum()) if st.button("Generate Full EDA Report"): with st.spinner("Generating comprehensive analysis..."): profile = ProfileReport(df, explorative=True) st_profile_report(profile) def model_training_page(): st.header("🧠 Neural Network Training Studio") if 'df' not in st.session_state: st.warning("Upload data first!") return df = st.session_state.df problem_type = st.selectbox("Select Problem Type", ["Classification", "Regression", "Clustering"]) mode = st.selectbox("Domain Specialization", ["Legal", "Financial", "Academic", "Technical"]) if problem_type != "Clustering": target = st.selectbox("Select Target Variable", df.columns) X = df.drop(columns=[target]) y = df[target] else: X = df y = None if st.button("Train Neural Network"): with st.spinner("Training in progress..."): X_scaled = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) if y is not None else (X_scaled, None, None, None) if problem_type == "Classification": model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) st.session_state.metrics = { "Accuracy": accuracy_score(y_test, y_pred), "Classification Report": classification_report(y_test, y_pred, output_dict=True) } elif problem_type == "Regression": model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) st.session_state.metrics = { "R2 Score": r2_score(y_test, y_pred), "Mean Squared Error": mean_squared_error(y_test, y_pred) } else: # Clustering model = KMeans(n_clusters=3, random_state=42) labels = model.fit_predict(X_scaled) st.session_state.metrics = { "Silhouette Score": silhouette_score(X_scaled, labels) } st.session_state.best_model = model st.session_state.X_test = X_test st.session_state.y_test = y_test st.session_state.y_pred = y_pred if y is not None else labels st.session_state.problem_type = problem_type st.success(f"Model trained successfully in {mode} mode!") def visualization_page(): st.header("🔍 Neural Network Evaluation Center") if 'best_model' not in st.session_state: st.warning("Train a model first!") return st.subheader("Performance Analysis") if st.session_state.problem_type == "Classification": st.plotly_chart(plot_confusion_matrix(st.session_state.y_test, st.session_state.y_pred)) st.plotly_chart(plot_feature_importance(st.session_state.best_model, pd.DataFrame(st.session_state.X_test, columns=st.session_state.df.columns[:-1]))) elif st.session_state.problem_type == "Regression": st.plotly_chart(plot_residuals(st.session_state.y_test, st.session_state.y_pred)) st.plotly_chart(plot_feature_importance(st.session_state.best_model, pd.DataFrame(st.session_state.X_test, columns=st.session_state.df.columns[:-1]))) else: # Clustering st.plotly_chart(plot_clusters(pd.DataFrame(st.session_state.X_test, columns=st.session_state.df.columns), st.session_state.y_pred)) st.subheader("Metrics") st.write(st.session_state.metrics) # Chatbot Interface def ai_assistant(): st.markdown('