LLM-Tutor / pages /5_Modeling.py
georgeek's picture
setup
de2b822
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
def run():
st.title("4. Modeling")
st.write("## Overview")
st.write("Building and training machine learning models to make predictions.")
st.write("## Key Concepts & Explanations")
st.markdown("""
- **Model Selection**: Choose the model based on the problem (e.g., Classification, Regression).
- **Training Data**: The subset used to train the model.
- **Test Data**: The subset used to evaluate the model’s performance.
""")
file = st.file_uploader("Upload a dataset for modeling", type=["csv"])
if file:
df = pd.read_csv(file)
target = st.selectbox("Select the target variable", df.columns)
features = st.multiselect("Select the feature columns", df.columns)
if target and features:
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
st.write(f"Accuracy: {accuracy * 100:.2f}%")
st.write("## Quiz: Conceptual Questions")
q1 = st.radio("What is overfitting?", ["Model too simple", "Model too complex", "Data too large"])
if q1 == "Model too complex":
st.success("βœ… Correct!")
else:
st.error("❌ Incorrect.")
st.write("## Code-Based Quiz")
code_input = st.text_area("Write a function to split data into train and test sets", value="def split_data(df, target):\n X = df.drop(columns=[target])\n y = df[target]\n return train_test_split(X, y, test_size=0.2, random_state=42)")
if "train_test_split" in code_input:
st.success("βœ… Correct!")
else:
st.error("❌ Try again.")
st.write("## Learning Resources")
st.markdown("""
- πŸ“– [Introduction to Machine Learning with Python](https://www.oreilly.com/library/view/introduction-to-machine/9781449369880/)
""")