|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV |
|
from sklearn.preprocessing import StandardScaler, LabelEncoder |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.svm import SVC |
|
from xgboost import XGBClassifier |
|
from sklearn.pipeline import Pipeline |
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc,classification_report |
|
from sklearn.impute import SimpleImputer |
|
import openpyxl |
|
import optuna |
|
import joblib |
|
import plotly.express as px |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
st.set_page_config(page_title="ML Model Deployment", layout="wide") |
|
|
|
def load_data(file): |
|
try: |
|
if file.name.endswith('.csv'): |
|
data = pd.read_csv(file) |
|
elif file.name.endswith(('.xls', '.xlsx')): |
|
data = pd.read_excel(file) |
|
return data |
|
except Exception as e: |
|
st.error(f"Error loading file: {e}") |
|
return None |
|
|
|
def auto_process_data(data): |
|
processed_data = data.copy() |
|
label_encoders = {} |
|
|
|
if processed_data.isnull().sum().sum() > 0: |
|
st.info("Automatically handling missing values...") |
|
|
|
num_cols = processed_data.select_dtypes(include=['int64', 'float64']).columns |
|
if len(num_cols) > 0: |
|
num_imputer = SimpleImputer(strategy='median') |
|
processed_data[num_cols] = num_imputer.fit_transform(processed_data[num_cols]) |
|
|
|
cat_cols = processed_data.select_dtypes(include=['object']).columns |
|
if len(cat_cols) > 0: |
|
for col in cat_cols: |
|
if processed_data[col].isnull().any(): |
|
most_frequent = processed_data[col].mode()[0] |
|
processed_data[col].fillna(most_frequent, inplace=True) |
|
|
|
for column in processed_data.select_dtypes(include=['object']): |
|
label_encoders[column] = LabelEncoder() |
|
processed_data[column] = label_encoders[column].fit_transform(processed_data[column].astype(str)) |
|
|
|
return processed_data, label_encoders |
|
|
|
def get_model_configs(): |
|
models = { |
|
'Logistic Regression': { |
|
'pipeline': Pipeline([ |
|
('scaler', StandardScaler()), |
|
('classifier', LogisticRegression()) |
|
]), |
|
'params': { |
|
'classifier__penalty':['l1','l2'], |
|
'classifier__C':[0.01,0.1,1], |
|
'classifier__max_iter': [100, 200], |
|
'classifier__solver':['liblinear','saga'] |
|
} |
|
}, |
|
'Support Vector Machine': { |
|
'pipeline': Pipeline([ |
|
('scaler', StandardScaler()), |
|
('classifier', SVC(probability=True)) |
|
]), |
|
'params': { |
|
'classifier__C': [0.001, 0.1, 1], |
|
'classifier__kernel': ['linear', 'rbf', 'sigmoid'], |
|
'classifier__gamma': ['scale', 'auto', 0.01, 0.1, 1], |
|
'classifier__max_iter':[100,200] |
|
} |
|
}, |
|
'Random Forest': { |
|
'pipeline': Pipeline([ |
|
('scaler', StandardScaler()), |
|
('classifier', RandomForestClassifier()) |
|
]), |
|
'params': { |
|
'classifier__n_estimators':[100,200], |
|
'classifier__max_depth': [None, 10, 20], |
|
'classifier__min_samples_split': [2,5,10], |
|
'classifier__min_samples_leaf':[1,2,4], |
|
} |
|
}, |
|
'XgBoost':{ |
|
'pipeline':Pipeline([ |
|
('scaled',StandardScaler()), |
|
('classifier',XGBClassifier(use_label_encoder=False,eval_metric='logloss')) |
|
]), |
|
'params':{ |
|
'classifier__n_estimators': [100, 200], |
|
'classifier__learning_rate': [0.01, 0.05, 0.1], |
|
'classifier__max_depth': [3, 5, 7], |
|
'classifier__min_child_weight': [1, 3, 5], |
|
'classifier__subsample': [0.8, 1.0] |
|
} |
|
} |
|
} |
|
return models |
|
|
|
def train_model(X_train, y_train, selected_model, progress_bar=None): |
|
models = get_model_configs() |
|
model_config = models[selected_model] |
|
|
|
with st.spinner(f"Training {selected_model}..."): |
|
grid_search = GridSearchCV( |
|
estimator=model_config['pipeline'], |
|
param_grid=model_config['params'], |
|
cv=5, |
|
n_jobs=-1, |
|
verbose=0, |
|
scoring="accuracy" |
|
) |
|
grid_search.fit(X_train, y_train) |
|
|
|
if progress_bar: |
|
progress_bar.progress(1.0) |
|
|
|
return grid_search.best_estimator_, grid_search.best_score_ |
|
def objective(trial, X_train, y_train, model_name): |
|
models = get_model_configs() |
|
model_config = models[model_name] |
|
dataset_size = len(X_train) |
|
cv_folds = 5 if dataset_size > 1000 else (3 if dataset_size > 500 else min(2, dataset_size)) |
|
params = {} |
|
|
|
if model_name == 'Logistic Regression': |
|
params = { |
|
'classifier__penalty': trial.suggest_categorical('classifier__penalty', ['l1', 'l2']), |
|
'classifier__C': trial.suggest_float('classifier__C', 0.01, 1.0, log=True), |
|
'classifier__solver': trial.suggest_categorical('classifier__solver', ['liblinear', 'saga']), |
|
'classifier__max_iter': trial.suggest_int('classifier__max_iter', 100, 200) |
|
} |
|
|
|
elif model_name == 'Support Vector Machine': |
|
params = { |
|
'classifier__C': trial.suggest_float('classifier__C', 0.001, 1.0, log=True), |
|
'classifier__kernel': trial.suggest_categorical('classifier__kernel', ['linear', 'rbf', 'sigmoid']), |
|
'classifier__gamma': trial.suggest_categorical('classifier__gamma', ['scale', 'auto', 0.01, 0.1, 1]), |
|
'classifier__max_iter': trial.suggest_int('classifier__max_iter', 100, 200) |
|
} |
|
|
|
elif model_name == 'Random Forest': |
|
params = { |
|
'classifier__n_estimators': trial.suggest_int('classifier__n_estimators', 100, 200), |
|
'classifier__max_depth': trial.suggest_categorical('classifier__max_depth', [None, 10, 20]), |
|
'classifier__min_samples_split': trial.suggest_int('classifier__min_samples_split', 2, 10), |
|
'classifier__min_samples_leaf': trial.suggest_int('classifier__min_samples_leaf', 1, 4) |
|
} |
|
elif model_name == 'XGBoost': |
|
params = { |
|
'classifier__n_estimators': trial.suggest_int('classifier__n_estimators', 100, 300), |
|
'classifier__learning_rate': trial.suggest_float('classifier__learning_rate', 0.01, 0.2, log=True), |
|
'classifier__max_depth': trial.suggest_int('classifier__max_depth', 3, 10), |
|
'classifier__min_child_weight': trial.suggest_int('classifier__min_child_weight', 1, 6) |
|
} |
|
|
|
pipeline = model_config['pipeline'].set_params(**params) |
|
pipeline.fit(X_train, y_train) |
|
|
|
score = cross_val_score(pipeline, X_train, y_train, cv=cv_folds, scoring="accuracy").mean() |
|
return score |
|
def auto_train(X_train, y_train, X_test, y_test): |
|
models = get_model_configs() |
|
results = {} |
|
best_score = 0 |
|
best_model = None |
|
best_model_name = None |
|
|
|
st.write("🔄 Training models with Optuna hyperparameter tuning...") |
|
|
|
progress_cols = st.columns(len(models)) |
|
progress_bars = {model_name: progress_cols[i].progress(0.0) for i, model_name in enumerate(models)} |
|
|
|
for model_name in models.keys(): |
|
st.write(f"🛠 Training {model_name}...") |
|
|
|
|
|
study = optuna.create_study(direction='maximize') |
|
study.optimize(lambda trial: objective(trial, X_train, y_train, model_name), n_trials=20) |
|
|
|
|
|
best_params = study.best_params |
|
pipeline = models[model_name]['pipeline'].set_params(**best_params) |
|
pipeline.fit(X_train, y_train) |
|
|
|
|
|
y_pred = pipeline.predict(X_test) |
|
test_accuracy = accuracy_score(y_test, y_pred) |
|
|
|
results[model_name] = { |
|
'model': pipeline, |
|
'cv_score': study.best_value, |
|
'test_accuracy': test_accuracy |
|
} |
|
|
|
progress_bars[model_name].progress(1.0) |
|
|
|
|
|
if test_accuracy > best_score: |
|
best_score = test_accuracy |
|
best_model = pipeline |
|
best_model_name = model_name |
|
|
|
|
|
results_df = pd.DataFrame({ |
|
'Model': list(results.keys()), |
|
'Cross-Validation Score': [results[model]['cv_score'] for model in results], |
|
'Test Accuracy': [results[model]['test_accuracy'] for model in results] |
|
}).sort_values('Test Accuracy', ascending=False) |
|
|
|
st.subheader("📊 Model Performance Comparison") |
|
st.dataframe(results_df) |
|
|
|
st.success(f"🏆 Best model: **{best_model_name}** with accuracy: **{best_score:.2%}**") |
|
|
|
return best_model, best_model_name |
|
|
|
def get_classification_report(y_true, y_pred): |
|
report_dict = classification_report(y_true, y_pred, output_dict=True) |
|
df = pd.DataFrame(report_dict).transpose() |
|
return df |
|
def evaluate_models(X_train, X_test, y_train, y_test): |
|
models =get_model_configs() |
|
|
|
results = {} |
|
|
|
plt.figure(figsize=(10, 6)) |
|
|
|
for name, model in models.items(): |
|
model.fit(X_train, y_train) |
|
y_pred = model.predict(X_test) |
|
y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None |
|
|
|
accuracy = accuracy_score(y_test, y_pred) |
|
precision = precision_score(y_test, y_pred, average='binary') |
|
recall = recall_score(y_test, y_pred, average='binary') |
|
f1 = f1_score(y_test, y_pred, average='binary') |
|
roc_auc = roc_auc_score(y_test, y_prob) if y_prob is not None else None |
|
|
|
results[name] = { |
|
"Accuracy": accuracy, |
|
"Precision": precision, |
|
"Recall": recall, |
|
"F1-score": f1, |
|
"ROC-AUC": roc_auc |
|
} |
|
|
|
if y_prob is not None: |
|
fpr, tpr, _ = roc_curve(y_test, y_prob) |
|
plt.plot(fpr, tpr, label=f"{name} (AUC = {roc_auc:.2f})") |
|
|
|
plt.plot([0, 1], [0, 1], linestyle="--", color="gray") |
|
plt.xlabel("False Positive Rate") |
|
plt.ylabel("True Positive Rate") |
|
plt.title("ROC Curves") |
|
plt.legend() |
|
plt.show() |
|
|
|
fig, axes = plt.subplots(2, 2, figsize=(12, 10)) |
|
for ax, (name, model) in zip(axes.ravel(), models.items()): |
|
y_pred = model.predict(X_test) |
|
cm = confusion_matrix(y_test, y_pred) |
|
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax) |
|
ax.set_title(f"{name} - Confusion Matrix") |
|
ax.set_xlabel("Predicted Label") |
|
ax.set_ylabel("True Label") |
|
|
|
plt.tight_layout() |
|
plt.show() |
|
|
|
results_df = pd.DataFrame(results).T |
|
results_df.plot(kind="bar", figsize=(10, 6)) |
|
plt.title("Model Comparison") |
|
plt.ylabel("Score") |
|
plt.xticks(rotation=45) |
|
plt.legend(title="Metrics") |
|
plt.show() |
|
|
|
return results_df |
|
|
|
def main(): |
|
st.title("🤖 Machine Learning Model Deployment") |
|
|
|
st.sidebar.header("Navigation") |
|
page = st.sidebar.radio("Go to", ["Home","Data Upload & Analysis", "Model Training","Visualisation", "Prediction"]) |
|
|
|
if 'data' not in st.session_state: |
|
st.session_state.data = None |
|
if 'processed_data' not in st.session_state: |
|
st.session_state.processed_data = None |
|
if 'label_encoders' not in st.session_state: |
|
st.session_state.label_encoders = None |
|
if 'model' not in st.session_state: |
|
st.session_state.model = None |
|
if 'features' not in st.session_state: |
|
st.session_state.features = None |
|
if 'target' not in st.session_state: |
|
st.session_state.target = None |
|
if 'model_name' not in st.session_state: |
|
st.session_state.model_name = None |
|
|
|
if page=="Home": |
|
st.title("🚀 AutoML: Effortless Machine Learning") |
|
st.markdown( |
|
""" |
|
Welcome to **AutoML**, a powerful yet easy-to-use tool that automates the process of building and evaluating |
|
machine learning models. Whether you're a beginner exploring data or an expert looking for quick model deployment, |
|
AutoML simplifies the entire workflow. |
|
""" |
|
) |
|
|
|
st.header("🔹 Features") |
|
st.markdown( |
|
""" |
|
- **Automated Model Selection** – Let AutoML pick the best algorithm for your data. |
|
- **Hyperparameter Tuning** – Optimize model performance without manual tweaking. |
|
- **Data Preprocessing** – Handle missing values, scaling, encoding, and feature engineering. |
|
- **Performance Evaluation** – Compare models with key metrics and visualizations. |
|
- **Model Export** – Save trained models for deployment. |
|
""" |
|
) |
|
|
|
st.header("🚀 Get Started") |
|
st.markdown( |
|
""" |
|
1. **Upload your dataset** – Provide a CSV or Excel file with your data. |
|
2. **Select your target variable** – Choose the column to predict. |
|
3. **Let AutoML do the magic!** – Sit back and watch the automation work. |
|
""" |
|
) |
|
|
|
st.header("📊 Visual Insights") |
|
st.markdown( |
|
""" |
|
Explore interactive charts and performance metrics to make informed decisions. |
|
Use visualizations to compare model accuracy, precision, recall, and other key statistics. |
|
""" |
|
) |
|
|
|
st.success("Start automating your ML workflows now! 🎯") |
|
st.write('''Developed By Gourav Singh,Ankit Yadav,Pushpansh''') |
|
|
|
if page == "Data Upload & Analysis": |
|
st.header("📊 Data Upload & Analysis") |
|
|
|
uploaded_file = st.file_uploader("Upload your dataset (CSV or Excel)", type=['csv', 'xlsx', 'xls']) |
|
|
|
if uploaded_file is not None: |
|
st.session_state.data = load_data(uploaded_file) |
|
|
|
if st.session_state.data is not None: |
|
st.session_state.processed_data, st.session_state.label_encoders = auto_process_data(st.session_state.data) |
|
|
|
st.success("Data loaded and automatically processed!") |
|
|
|
st.subheader("Dataset Overview") |
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
st.info(f"Number of rows: {st.session_state.data.shape[0]}") |
|
with col2: |
|
st.info(f"Number of columns: {st.session_state.data.shape[1]}") |
|
with col3: |
|
missing_values = st.session_state.data.isnull().sum().sum() |
|
st.info(f"Missing values: {missing_values} (Automatically handled)") |
|
|
|
st.subheader("Original Data Preview") |
|
st.dataframe(st.session_state.data.head()) |
|
|
|
st.subheader("Processed Data Preview") |
|
st.dataframe(st.session_state.processed_data.head()) |
|
|
|
st.subheader("Statistical Description") |
|
st.dataframe(st.session_state.processed_data.describe()) |
|
|
|
st.subheader("Correlation Heatmap") |
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
sns.heatmap(st.session_state.processed_data.corr(), annot=True, cmap='coolwarm', ax=ax) |
|
st.pyplot(fig) |
|
|
|
elif page == "Model Training": |
|
st.header("🎯 Auto Model Training") |
|
|
|
if st.session_state.processed_data is None: |
|
st.warning("Please upload and process your data first!") |
|
return |
|
|
|
st.subheader("Select Features and Target") |
|
columns = st.session_state.processed_data.columns.tolist() |
|
|
|
st.session_state.features = st.multiselect("Select features", columns, default=columns[:-1]) |
|
st.session_state.target = st.selectbox("Select target variable", columns) |
|
|
|
if st.button("Auto Train Models"): |
|
if len(st.session_state.features) > 0 and st.session_state.target: |
|
X = st.session_state.processed_data[st.session_state.features] |
|
y = st.session_state.processed_data[st.session_state.target] |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
st.session_state.model, st.session_state.model_name = auto_train(X_train, y_train, X_test, y_test) |
|
|
|
y_pred = st.session_state.model.predict(X_test) |
|
|
|
st.subheader("Best Model Performance") |
|
|
|
accuracy = accuracy_score(y_test, y_pred) |
|
st.metric("Accuracy", f"{accuracy:.2%}") |
|
|
|
st.text("Classification Report:") |
|
|
|
df_report = get_classification_report(y_test, y_pred) |
|
st.dataframe(df_report) |
|
|
|
if st.session_state.model_name == "Random Forest": |
|
st.subheader("Feature Importance") |
|
|
|
importance_df = pd.DataFrame({ |
|
'Feature': st.session_state.features, |
|
'Importance': st.session_state.model.named_steps['classifier'].feature_importances_ |
|
}).sort_values('Importance', ascending=False) |
|
|
|
fig = px.bar(importance_df, x='Feature', y='Importance', |
|
title='Feature Importance Plot') |
|
st.plotly_chart(fig) |
|
|
|
model_data = { |
|
'model': st.session_state.model, |
|
'model_name': st.session_state.model_name, |
|
'label_encoders': st.session_state.label_encoders, |
|
'features': st.session_state.features, |
|
'target': st.session_state.target |
|
} |
|
joblib.dump(model_data, 'model_data.joblib') |
|
st.download_button( |
|
label="Download trained model", |
|
data=open('model_data.joblib', 'rb'), |
|
file_name='model_data.joblib', |
|
mime='application/octet-stream' |
|
) |
|
elif page=="Visualisation": |
|
st.header("Model Visualisation") |
|
if st.session_state.model is None: |
|
st.warning("Please train a model first!") |
|
return |
|
|
|
if st.session_state.processed_data is not None and st.session_state.features and st.session_state.target: |
|
X = st.session_state.processed_data[st.session_state.features] |
|
y = st.session_state.processed_data[st.session_state.target] |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
viz_option = st.selectbox( |
|
"Select visualization type", |
|
["Model Comparison", "ROC Curves", "Confusion Matrix"] |
|
) |
|
|
|
if viz_option == "Model Comparison": |
|
st.subheader("Model Performance Metrics") |
|
|
|
|
|
models = get_model_configs() |
|
results = {} |
|
|
|
progress_bar = st.progress(0) |
|
progress_text = st.empty() |
|
|
|
for i, (name, model_config) in enumerate(models.items()): |
|
progress_text.text(f"Training {name}...") |
|
pipeline = model_config['pipeline'] |
|
pipeline.fit(X_train, y_train) |
|
|
|
y_pred = pipeline.predict(X_test) |
|
y_prob = pipeline.predict_proba(X_test)[:, 1] if hasattr(pipeline, "predict_proba") else None |
|
|
|
accuracy = accuracy_score(y_test, y_pred) |
|
precision = precision_score(y_test, y_pred, average='binary') |
|
recall = recall_score(y_test, y_pred, average='binary') |
|
f1 = f1_score(y_test, y_pred, average='binary') |
|
roc_auc = roc_auc_score(y_test, y_prob) if y_prob is not None else None |
|
|
|
results[name] = { |
|
"Accuracy": accuracy, |
|
"Precision": precision, |
|
"Recall": recall, |
|
"F1-score": f1, |
|
"ROC-AUC": roc_auc |
|
} |
|
|
|
progress_bar.progress((i + 1) / len(models)) |
|
|
|
progress_text.empty() |
|
|
|
results_df = pd.DataFrame(results).T |
|
st.dataframe(results_df) |
|
|
|
fig = px.bar( |
|
results_df.reset_index().melt(id_vars='index', var_name='Metric', value_name='Score'), |
|
x='index', y='Score', color='Metric', |
|
barmode='group', |
|
title='Model Comparison', |
|
labels={'index': 'Model'} |
|
) |
|
st.plotly_chart(fig) |
|
|
|
elif viz_option == "ROC Curves": |
|
st.subheader("ROC Curves") |
|
|
|
models = get_model_configs() |
|
|
|
fig = plt.figure(figsize=(10, 6)) |
|
|
|
for name, model_config in models.items(): |
|
pipeline = model_config['pipeline'] |
|
pipeline.fit(X_train, y_train) |
|
|
|
if hasattr(pipeline, "predict_proba"): |
|
y_prob = pipeline.predict_proba(X_test)[:, 1] |
|
fpr, tpr, _ = roc_curve(y_test, y_prob) |
|
roc_auc = auc(fpr, tpr) |
|
plt.plot(fpr, tpr, lw=2, label=f'{name} (AUC = {roc_auc:.2f})') |
|
|
|
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--') |
|
plt.xlim([0.0, 1.0]) |
|
plt.ylim([0.0, 1.05]) |
|
plt.xlabel('False Positive Rate') |
|
plt.ylabel('True Positive Rate') |
|
plt.title('Receiver Operating Characteristic (ROC) Curves') |
|
plt.legend(loc="lower right") |
|
|
|
st.pyplot(fig) |
|
|
|
elif viz_option == "Confusion Matrix": |
|
st.subheader("Confusion Matrices") |
|
|
|
models = get_model_configs() |
|
|
|
if len(models) > 4: |
|
st.warning("Showing confusion matrices for the first 4 models") |
|
model_items = list(models.items())[:4] |
|
else: |
|
model_items = list(models.items()) |
|
|
|
num_models = len(model_items) |
|
cols = 2 |
|
rows = (num_models + 1) // 2 |
|
|
|
fig, axes = plt.subplots(rows, cols, figsize=(12, 10)) |
|
axes = axes.flatten() if num_models > 1 else [axes] |
|
|
|
for i, (name, model_config) in enumerate(model_items): |
|
pipeline = model_config['pipeline'] |
|
pipeline.fit(X_train, y_train) |
|
|
|
y_pred = pipeline.predict(X_test) |
|
cm = confusion_matrix(y_test, y_pred) |
|
|
|
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=axes[i]) |
|
axes[i].set_title(f"{name} - Confusion Matrix") |
|
axes[i].set_xlabel("Predicted") |
|
axes[i].set_ylabel("Actual") |
|
|
|
for j in range(num_models, len(axes)): |
|
fig.delaxes(axes[j]) |
|
|
|
plt.tight_layout() |
|
st.pyplot(fig) |
|
|
|
st.subheader("Current Model Performance") |
|
best_model_pred = st.session_state.model.predict(X_test) |
|
|
|
st.metric("Accuracy", f"{accuracy_score(y_test, best_model_pred):.2%}") |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.metric("Precision", f"{precision_score(y_test, best_model_pred):.2%}") |
|
st.metric("F1 Score", f"{f1_score(y_test, best_model_pred):.2%}") |
|
with col2: |
|
st.metric("Recall", f"{recall_score(y_test, best_model_pred):.2%}") |
|
if hasattr(st.session_state.model, "predict_proba"): |
|
best_proba = st.session_state.model.predict_proba(X_test)[:, 1] |
|
st.metric("AUC", f"{roc_auc_score(y_test, best_proba):.2%}") |
|
|
|
else: |
|
st.warning("Please load and preprocess your dataset before running evaluation.") |
|
|
|
|
|
elif page == "Prediction": |
|
st.header("🎲 Make Predictions") |
|
|
|
if st.session_state.model is None: |
|
st.warning("Please train a model first!") |
|
return |
|
|
|
st.subheader("Enter Feature Values") |
|
st.info(f"Using best model: {st.session_state.model_name}") |
|
|
|
input_data = {} |
|
for feature in st.session_state.features: |
|
if feature in st.session_state.label_encoders: |
|
options = st.session_state.label_encoders[feature].classes_ |
|
value = st.selectbox(f"Select {feature}", options) |
|
input_data[feature] = st.session_state.label_encoders[feature].transform([value])[0] |
|
else: |
|
input_data[feature] = st.number_input(f"Enter value for {feature}", value=0.0) |
|
if st.button("Predict"): |
|
input_df = pd.DataFrame([input_data]) |
|
|
|
prediction = st.session_state.model.predict(input_df) |
|
|
|
if st.session_state.target in st.session_state.label_encoders: |
|
original_prediction = st.session_state.label_encoders[st.session_state.target].inverse_transform(prediction) |
|
st.success(f"Predicted {st.session_state.target}: {original_prediction[0]}") |
|
else: |
|
st.success(f"Predicted {st.session_state.target}: {prediction[0]}") |
|
|
|
proba = st.session_state.model.predict_proba(input_df) |
|
st.subheader("Prediction Probability") |
|
|
|
if st.session_state.target in st.session_state.label_encoders: |
|
classes = st.session_state.label_encoders[st.session_state.target].classes_ |
|
else: |
|
classes = st.session_state.model.classes_ |
|
|
|
proba_df = pd.DataFrame( |
|
proba, |
|
columns=classes |
|
) |
|
st.dataframe(proba_df) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|