Spaces:
Sleeping
Sleeping
import pandas as pd | |
import warnings | |
import streamlit as st | |
from classification import ClassificationModels | |
from regression import RegressionModels | |
warnings.filterwarnings("ignore") | |
import uuid | |
import time | |
# data cleaning: https://bank-performance.streamlit.app/ | |
# https://docs.streamlit.io/library/api-reference/layout | |
# Define function for each page | |
# def classification(): | |
# st.title("Home Page") | |
# st.write("Welcome to the Home Page") | |
def regressor(): | |
EDA, train, test = st.tabs(['EDA/Transformation','Train','Test']) | |
with train: | |
st.title("Regression / Train data") | |
spectra = st.file_uploader("**Upload file**", type={"csv", "txt"}) | |
if spectra is not None: | |
spectra_df = pd.read_csv(spectra) | |
st.write(spectra_df.head(5)) | |
# st.write("Headers", spectra_df.columns.tolist()) | |
st.write("**Total Rows**", spectra_df.shape[0]) | |
st.divider() | |
option = st.text_input("**Select Output Column**:") | |
st.divider() | |
if option: | |
st.write("**You have selected output column**: ", option) | |
y = spectra_df[option] | |
X= spectra_df.drop(option, axis=1) | |
# Define the columns with your content | |
col1, col2 = st.columns([4,1], gap="small") | |
# Add content to col1 | |
with col1: | |
st.write("Train data excluding output") | |
st.write(X.head(5)) | |
# Add content to col2 | |
with col2: | |
st.write("Output") | |
st.write(y.head(5)) | |
st.divider() | |
# Select models | |
# models_list = [ | |
# 'Linear Regression', 'Polynomial Regression', 'Ridge Regression', | |
# 'Lasso Regression', 'ElasticNet Regression', 'Logistic Regression', | |
# 'Decision Tree Regression', 'Random Forest Regression', | |
# 'Gradient Boosting Regression', 'Support Vector Regression (SVR)', | |
# 'XGBoost', 'LightGBM' | |
# ] | |
models_list = [ | |
'Linear Regression', | |
'Polynomial Regression', | |
'Ridge Regression', | |
'Lasso Regression', | |
'ElasticNet Regression', | |
'Logistic Regression', | |
'Decision Tree Regression', | |
'Random Forest Regression', | |
'Gradient Boosting Regression', | |
'Support Vector Regression (SVR)', | |
'XGBoost', | |
'LightGBM' | |
] | |
selected_models = st.multiselect('Select Regression Models', models_list) | |
if selected_models: | |
# Initialize RegressionModels class | |
models = RegressionModels() | |
# Add data | |
models.add_data(X, y) | |
# Split data into training and testing sets | |
models.split_data() | |
# Train and evaluate selected models | |
for model_name in selected_models: | |
st.subheader(f"Model: {model_name}") | |
models.fit(model_name) | |
y_pred = models.train(model_name) | |
mse, r2 = models.evaluate(model_name) | |
st.write(f"MSE: {mse}") | |
st.write(f"R-squared: {r2}") | |
def NLP(): | |
st.title("Contact Page") | |
st.write("You can reach us at example@example.com") | |
def Image(): | |
st.title("Home Page") | |
st.write("Welcome to the Home Page") | |
def Voice(): | |
st.title("Home Page") | |
st.write("Welcome to the Home Page") | |
def Video(): | |
st.title("Home Page") | |
st.write("Welcome to the Home Page") | |
def LLMs(): | |
st.title("About Page") | |
st.write("This is the About Page") | |
def resume(): | |
st.title("Contact Page") | |
st.write("You can reach us at example@example.com") | |
# Main function to run the app | |
def main(): | |
st.sidebar.title("Deep Learning/ Data Science/ AI Models") | |
# page_options = ["Classification", "Regressor", "NLP", "Image", "Voice", "Video", "LLMs"] | |
page_options = ["Classification", "Regressor", "NLP", "LLMs", "AI"] | |
choice = st.sidebar.radio("Select", page_options) | |
if choice == "Classification": | |
train, test = st.tabs(['Train','Test']) | |
with train: | |
st.title("Classification / Train data") | |
spectra = st.file_uploader("**Upload file**", type={"csv", "txt"}) | |
if spectra is not None: | |
spectra_df = pd.read_csv(spectra) | |
st.write(spectra_df.head(5)) | |
# st.write("Headers", spectra_df.columns.tolist()) | |
st.write("**Total Rows**", spectra_df.shape[0]) | |
st.divider() | |
option = st.text_input("**Select Output Column**:") | |
st.divider() | |
if option: | |
st.write("**You have selected output column**: ", option) | |
y = spectra_df[option] | |
X= spectra_df.drop(option, axis=1) | |
# Define the columns with your content | |
col1, col2 = st.columns([4,1], gap="small") | |
# Add content to col1 | |
with col1: | |
st.write("Train data excluding output") | |
st.write(X.head(5)) | |
# Add content to col2 | |
with col2: | |
st.write("Output") | |
st.write(y.head(5)) | |
st.divider() | |
list_of_classifier_models = [ | |
"Naive Bayes Classifier", | |
"Logistic Regression", | |
"Decision Tree", | |
"Random Forests", | |
"SVM", | |
"KNN", | |
"K-Means Clustering" | |
] | |
models_hyperparameters = { | |
"Naive Bayes Classifier": [], | |
"Logistic Regression": ["C", "max_iter"], | |
"Decision Tree": ["max_depth", "criterion"], | |
"Random Forests": ["n_estimators", "max_depth", "criterion"], | |
"SVM": ["C", "kernel"], | |
"KNN": ["n_neighbors", "algorithm"], | |
"K-Means Clustering": ["n_clusters", "init"] | |
} | |
selected_models = st.multiselect("**Select Models**:",list_of_classifier_models) | |
# Execute further code based on selected models | |
if selected_models: | |
# st.write("Selected Models:", selected_models) | |
# Toggle to add hyperparameters | |
add_hyperparameters = st.toggle("Add Hyperparameters") | |
# If hyperparameters should be added | |
if add_hyperparameters: | |
num_models = len(selected_models) | |
max_items_per_row = 4 | |
num_rows = (num_models + max_items_per_row - 1) // max_items_per_row # Calculate number of rows | |
#Dictionary to store selected hyperparameters for each model | |
hyperparameters_values = {} | |
for row in range(num_rows): | |
cols = st.columns(min(num_models - row * max_items_per_row, max_items_per_row)) # Calculate number of columns for this row | |
for i, col in enumerate(cols): | |
model_index = row * max_items_per_row + i | |
with col: | |
if model_index < num_models: | |
selected_model = selected_models[model_index] | |
st.write(f"Selected Model: {selected_model}") # Display selected model name | |
# initializing | |
if selected_model not in hyperparameters_values: | |
hyperparameters_values[selected_model] = {} | |
# selected_model = st.selectbox(f"Select Model {row}-{i}", selected_models, index=model_index) | |
selected_hyperparameters = models_hyperparameters[selected_models[model_index]] | |
for hyperparameter in selected_hyperparameters: | |
if hyperparameter == "max_depth": | |
max_depth = st.slider(f"Max Depth {selected_model} {hyperparameter}", min_value=1, max_value=20, value=5) | |
hyperparameters_values[selected_model][hyperparameter] = max_depth | |
st.write("Selected Max Depth:", max_depth) | |
elif hyperparameter == "criterion": | |
criterion = st.selectbox(f"Criterion {selected_model} {hyperparameter}", ["gini", "entropy"]) | |
hyperparameters_values[selected_model][hyperparameter] = criterion | |
st.write("Selected Criterion:", criterion) | |
elif hyperparameter == "C": | |
C = st.slider(f"C {selected_model} {hyperparameter}", min_value=0.01, max_value=10.0, value=1.0) | |
hyperparameters_values[selected_model][hyperparameter] = C | |
st.write("Selected C:", C) | |
elif hyperparameter == "max_iter": | |
max_iter = st.slider(f"Max Iterations {selected_model} {hyperparameter}", min_value=100, max_value=10000, step=100, value=1000) | |
hyperparameters_values[selected_model][hyperparameter] = max_iter | |
st.write("Selected Max Iterations:", max_iter) | |
elif hyperparameter == "n_estimators": | |
n_estimators = st.slider(f"Number of Estimators {selected_model} {hyperparameter}", min_value=1, max_value=100, value=10) | |
hyperparameters_values[selected_model][hyperparameter] = n_estimators | |
st.write("Selected Number of Estimators:", n_estimators) | |
elif hyperparameter == "kernel": | |
kernel = st.selectbox(f"Kernel {selected_model} {hyperparameter}", ["linear", "poly", "rbf", "sigmoid"]) | |
hyperparameters_values[selected_model][hyperparameter] = kernel | |
st.write("Selected Kernel:", kernel) | |
elif hyperparameter == "n_neighbors": | |
n_neighbors = st.slider(f"Number of Neighbors {selected_model} {hyperparameter}", min_value=1, max_value=50, value=5) | |
hyperparameters_values[selected_model][hyperparameter] = n_neighbors | |
st.write("Selected Number of Neighbors:", n_neighbors) | |
elif hyperparameter == "algorithm": | |
algorithm = st.selectbox(f"Algorithm {selected_model} {hyperparameter}", ["auto", "ball_tree", "kd_tree", "brute"]) | |
hyperparameters_values[selected_model][hyperparameter] = algorithm | |
st.write("Selected Algorithm:", algorithm) | |
elif hyperparameter == "n_clusters": | |
n_clusters = st.slider(f"Number of Clusters {selected_model} {hyperparameter}", min_value=2, max_value=20, value=5) | |
hyperparameters_values[selected_model][hyperparameter] = n_clusters | |
st.write("Selected Number of Clusters:", n_clusters) | |
elif hyperparameter == "init": | |
init = st.selectbox(f"Initialization Method {selected_model} {hyperparameter}", ["k-means++", "random"]) | |
hyperparameters_values[selected_model][hyperparameter] = init | |
st.write("Selected Initialization Method:", init) # Add more hyperparameters as needed for each model | |
# st.write("Hyperparameters:", hyperparameters_values) | |
clf = ClassificationModels(X,y,hyperparameters_values) | |
# model_accuracy = {} | |
# Split the data | |
clf.split_data() | |
accuracy_dict= {} | |
for models in selected_models: | |
model_hyperparameters = hyperparameters_values.get(models, {}) # Get selected hyperparameters for this model | |
if models not in accuracy_dict: | |
accuracy_dict[models] = 0 | |
# st.write("trained param",trained_models) | |
# for model_name in model_hyperparameters | |
if models == "Naive Bayes Classifier": | |
naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters) | |
naive_bayes_accuracy = clf.evaluate_model(naive_bayes_model) | |
# naive_bayes_classification_report = clf.evaluate_classification_report(naive_bayes_model) | |
# st.write("Naive Bayes Accuracy:", naive_bayes_accuracy) | |
accuracy_dict[models] = naive_bayes_accuracy | |
# st.write("Naive Bayes Classification Report:", pd.DataFrame(naive_bayes_classification_report)) | |
if models == "Logistic Regression": | |
# st.write("Logistic Regression Model:", model_hyperparameters) | |
logistic_regression_model = clf.logistic_regression(model_hyperparameters) | |
logistic_regression_accuracy = clf.evaluate_model(logistic_regression_model) | |
# logistic_regression_classification_report = clf.evaluate_classification_report(logistic_regression_model) | |
# st.write("Logistic Regression Accuracy:", logistic_regression_accuracy) | |
accuracy_dict[models] = logistic_regression_accuracy | |
# st.write("Logistic Regression Classification Report:", pd.DataFrame(logistic_regression_classification_report)) | |
if models == "Decision Tree": | |
decision_tree_model = clf.decision_tree(model_hyperparameters) | |
decision_tree_accuracy = clf.evaluate_model(decision_tree_model) | |
# decision_tree_classification_report = clf.evaluate_classification_report(decision_tree_model) | |
# st.write("Decision Tree Accuracy:", decision_tree_accuracy) | |
accuracy_dict[models] = decision_tree_accuracy | |
# st.write("Decision Tree Classification Report:", pd.DataFrame(decision_tree_classification_report)) | |
if models == "Random Forests": | |
random_forests_model = clf.random_forests(model_hyperparameters) | |
random_forests_accuracy = clf.evaluate_model(random_forests_model) | |
accuracy_dict[models] = random_forests_accuracy | |
# random_forest_classification_report = clf.evaluate_classification_report(random_forests_model) | |
# st.write("Random Forests Accuracy:", random_forests_accuracy) | |
# st.write("Random Forests Classification Report:", pd.DataFrame(random_forest_classification_report)) | |
if models == "SVM": | |
svm_model = clf.support_vector_machines(model_hyperparameters) | |
svm_accuracy = clf.evaluate_model(svm_model) | |
accuracy_dict[models] = svm_accuracy | |
# svm_classification_report = clf.evaluate_classification_report(svm_model) | |
# st.write("Support Vector Machines Accuracy:", svm_accuracy) | |
# st.write("Support Vector Machines Classification Report:", pd.DataFrame(svm_classification_report)) | |
if models == "KNN": | |
knn_model = clf.k_nearest_neighbour(model_hyperparameters) | |
knn_accuracy = clf.evaluate_model(knn_model) | |
accuracy_dict[models] = knn_accuracy | |
# knn_classification_report = clf.evaluate_classification_report(knn_model) | |
# st.write("K-Nearest Neighbors Accuracy:", knn_accuracy) | |
# st.write("K-Nearest Neighbors Classification Report:", pd.DataFrame(knn_classification_report)) | |
if models == "K- Means Clustering": | |
kmeans_model = clf.k_means_clustering(model_hyperparameters) | |
kmeans_accuracy = clf.evaluate_model(kmeans_model) | |
accuracy_dict[models] = kmeans_accuracy | |
# knn_classification_report = clf.evaluate_classification_report(knn_model) | |
# st.write("K-Nearest Neighbors Accuracy:", kmeans_accuracy) | |
# st.write("K-Nearest Neighbors Classification Report:", pd.DataFrame(knn_classification_report)) | |
st.divider() | |
st.write("Models Accuracy:", accuracy_dict) | |
max_key = '' | |
max_value = 0 | |
for i in accuracy_dict: | |
if accuracy_dict[i] > max_value: | |
max_key = i | |
max_value = accuracy_dict[i] | |
st.write("Efficient Model is :",max_key, accuracy_dict[max_key]) | |
st.divider() | |
st.write("Scroll up and Click on <**Test**> tab to test Model performance") | |
with test: | |
st.title("Classification / Test") | |
spectra_1 = st.file_uploader("Upload file test the model", type={"csv", "txt"}) | |
if spectra_1 is not None: | |
spectra_df1 = pd.read_csv(spectra_1) | |
Actual = spectra_df1['Disease'] | |
spectra_df1 = spectra_df1.drop(columns=['Disease']) | |
st.write(spectra_df1.head(5)) | |
st.divider() | |
model_dict ={ | |
"Naive Bayes Classifier":'GaussianNB()', | |
"Logistic Regression":'LogisticRegression()', | |
"Decision Tree":'DecisionTreeClassifier()', | |
"Random Forests":'RandomForestClassifier()', | |
"SVM":'SVC()', | |
"KNN":'KNeighborsClassifier()', | |
"K- Means Clustering":'KMeans()' | |
} | |
X= spectra_df1 | |
if max_key == "Naive Bayes Classifier": | |
# naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters) | |
naive_bayes_model =naive_bayes_model.predict() | |
st.write("Naive Bayes Model:", naive_bayes_model) | |
if max_key == "Logistic Regression": | |
st.write("Logistic Regression Model Hyperparameter:", model_hyperparameters) | |
logistic_regression_model_ = logistic_regression_model.predict(X) | |
X['Predict'] = logistic_regression_model_ | |
X['Actual'] = Actual | |
st.write("Output : ", X) | |
logistic_regression_accuracy = clf.evaluate_model(logistic_regression_model) | |
# logistic_regression_classification_report = clf.evaluate_classification_report(logistic_regression_model) | |
st.write("Logistic Regression Accuracy:", logistic_regression_accuracy) | |
# accuracy_dict[models] = logistic_regression_accuracy | |
if max_key == "Decision Tree": | |
decision_tree_model_ = decision_tree_model.predict(X) | |
X['Predict'] = decision_tree_model_ | |
X['Actual'] = Actual | |
st.write("Output : ", X) | |
if max_key == "Random Forests": | |
random_forests_model = random_forests_model.predict(X) | |
st.write("Random Forests Model:", random_forests_model) | |
if max_key == "SVM": | |
svm_model = svm_model.predict(X) | |
st.write("Support Vector Machines Model:", svm_model) | |
if max_key == "KNN": | |
knn_model = knn_model.predict(X) | |
st.write("K-Nearest Neighbors Model:", knn_model) | |
if max_key == "K- Means Clustering": | |
kmeans_model =kmeans_model.predict(X) | |
st.write("K-Means Clustering Model:", kmeans_model) | |
st.divider() | |
data_frame = pd.DataFrame(X).to_csv().encode('utf-8') | |
st.download_button( | |
label="Download data as CSV", | |
data=data_frame, | |
file_name='large_df.csv', | |
mime='text/csv', | |
) | |
st.divider() | |
elif choice == "Regressor": | |
regressor() | |
elif choice == "NLP": | |
NLP() | |
if choice == "Image": | |
Image() | |
if choice == "Voice": | |
Voice() | |
if choice == "Video": | |
Video() | |
if choice == "LLMs": | |
LLMs() | |
if __name__ == "__main__": | |
main() | |