import streamlit as st import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score def run(): st.title("4. Modeling") st.write("## Overview") st.write("Building and training machine learning models to make predictions.") st.write("## Key Concepts & Explanations") st.markdown(""" - **Model Selection**: Choose the model based on the problem (e.g., Classification, Regression). - **Training Data**: The subset used to train the model. - **Test Data**: The subset used to evaluate the model’s performance. """) file = st.file_uploader("Upload a dataset for modeling", type=["csv"]) if file: df = pd.read_csv(file) target = st.selectbox("Select the target variable", df.columns) features = st.multiselect("Select the feature columns", df.columns) if target and features: X = df[features] y = df[target] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = LogisticRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) st.write(f"Accuracy: {accuracy * 100:.2f}%") st.write("## Quiz: Conceptual Questions") q1 = st.radio("What is overfitting?", ["Model too simple", "Model too complex", "Data too large"]) if q1 == "Model too complex": st.success("✅ Correct!") else: st.error("❌ Incorrect.") st.write("## Code-Based Quiz") code_input = st.text_area("Write a function to split data into train and test sets", value="def split_data(df, target):\n X = df.drop(columns=[target])\n y = df[target]\n return train_test_split(X, y, test_size=0.2, random_state=42)") if "train_test_split" in code_input: st.success("✅ Correct!") else: st.error("❌ Try again.") st.write("## Learning Resources") st.markdown(""" - 📖 [Introduction to Machine Learning with Python](https://www.oreilly.com/library/view/introduction-to-machine/9781449369880/) """)