Spaces:
Running
Running
File size: 3,456 Bytes
62004ae dcea5c3 62004ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay
import data_generator
# Load dataset from CSV
business_data = pd.read_csv("business_data.csv")
X = business_data.iloc[:, :-1].values
y = business_data.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Business Problem Description
"""
This simulated dataset represents a business classification problem where a company is trying to categorize customer behaviors
into two distinct segments. The classification is based on factors such as purchase history, engagement levels, and
customer loyalty indicators. The data is structured in a way that requires a non-linear classification approach, making it
an ideal case for Support Vector Machines with polynomial or RBF kernels.
"""
# Streamlit App
st.title("SVM Business Classification App")
st.sidebar.header("Model Hyperparameters")
C = st.sidebar.slider("Regularization (C)", 0.01, 10.0, 1.0)
epsilon = st.sidebar.slider("Epsilon", 0.01, 1.0, 0.1)
# Display Scatter Plot of Data
st.subheader("Dataset Scatter Plot")
fig, ax = plt.subplots()
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', edgecolors='k')
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
ax.set_title("Business Data Classification")
st.pyplot(fig)
# Tabs for different kernel types
tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
def train_and_evaluate(kernel, degree=3, gamma='scale'):
model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)
return model, accuracy, report, y_pred
# Linear Kernel
with tab1:
st.subheader("Linear Kernel")
model, acc, report, y_pred = train_and_evaluate("linear")
st.write(f"**Accuracy:** {acc:.2f}")
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
fig, ax = plt.subplots()
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
st.pyplot(fig)
# Polynomial Kernel
with tab2:
st.subheader("Polynomial Kernel")
degree = st.slider("Polynomial Degree", 2, 5, 3)
model, acc, report, y_pred = train_and_evaluate("poly", degree)
st.write(f"**Accuracy:** {acc:.2f}")
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
fig, ax = plt.subplots()
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
st.pyplot(fig)
# RBF Kernel
with tab3:
st.subheader("RBF Kernel")
gamma = st.slider("Gamma", 0.01, 1.0, 0.1)
model, acc, report, y_pred = train_and_evaluate("rbf", gamma=gamma)
st.write(f"**Accuracy:** {acc:.2f}")
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
fig, ax = plt.subplots()
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
st.pyplot(fig)
st.write("This app demonstrates how different SVM kernels impact classification performance in a non-linear business problem.")
|