|
|
|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import accuracy_score |
|
|
|
|
|
st.title("Machine Learning Model Visualization") |
|
st.write("This application demonstrates random forest classification on the iris dataset") |
|
|
|
|
|
@st.cache_data |
|
def load_data(): |
|
from sklearn.datasets import load_iris |
|
iris = load_iris() |
|
df = pd.DataFrame(iris.data, columns=iris.feature_names) |
|
df['target'] = iris.target |
|
return df, iris.target_names |
|
|
|
data, target_names = load_data() |
|
|
|
|
|
st.subheader("Dataset Exploration") |
|
if st.checkbox("Display dataset"): |
|
st.dataframe(data) |
|
|
|
|
|
st.subheader("Feature Selection") |
|
features = st.multiselect( |
|
"Select features for model training", |
|
options=data.columns[:-1], |
|
default=data.columns[0] |
|
) |
|
|
|
if len(features) > 0: |
|
|
|
st.subheader("Model Parameters") |
|
n_estimators = st.slider("Number of trees", 1, 100, 10) |
|
max_depth = st.slider("Maximum tree depth", 1, 20, 5) |
|
|
|
|
|
if st.button("Train Model"): |
|
X = data[features] |
|
y = data['target'] |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) |
|
|
|
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42) |
|
model.fit(X_train, y_train) |
|
|
|
|
|
y_pred = model.predict(X_test) |
|
accuracy = accuracy_score(y_test, y_pred) |
|
|
|
st.success(f"Model accuracy: {accuracy:.4f}") |
|
|
|
|
|
if len(features) > 1: |
|
st.subheader("Feature Importance") |
|
fig, ax = plt.subplots() |
|
ax.bar(features, model.feature_importances_) |
|
plt.xticks(rotation=45) |
|
st.pyplot(fig) |
|
else: |
|
st.warning("Please select at least one feature for model training") |