import pandas as pd import warnings import streamlit as st from classification import ClassificationModels from regression import RegressionModels warnings.filterwarnings("ignore") import uuid import time # data cleaning: https://bank-performance.streamlit.app/ # https://docs.streamlit.io/library/api-reference/layout # Define function for each page # def classification(): # st.title("Home Page") # st.write("Welcome to the Home Page") def regressor(): EDA, train, test = st.tabs(['EDA/Transformation','Train','Test']) with train: st.title("Regression / Train data") spectra = st.file_uploader("**Upload file**", type={"csv", "txt"}) if spectra is not None: spectra_df = pd.read_csv(spectra) st.write(spectra_df.head(5)) # st.write("Headers", spectra_df.columns.tolist()) st.write("**Total Rows**", spectra_df.shape[0]) st.divider() option = st.text_input("**Select Output Column**:") st.divider() if option: st.write("**You have selected output column**: ", option) y = spectra_df[option] X= spectra_df.drop(option, axis=1) # Define the columns with your content col1, col2 = st.columns([4,1], gap="small") # Add content to col1 with col1: st.write("Train data excluding output") st.write(X.head(5)) # Add content to col2 with col2: st.write("Output") st.write(y.head(5)) st.divider() # Select models # models_list = [ # 'Linear Regression', 'Polynomial Regression', 'Ridge Regression', # 'Lasso Regression', 'ElasticNet Regression', 'Logistic Regression', # 'Decision Tree Regression', 'Random Forest Regression', # 'Gradient Boosting Regression', 'Support Vector Regression (SVR)', # 'XGBoost', 'LightGBM' # ] models_list = [ 'Linear Regression', 'Polynomial Regression', 'Ridge Regression', 'Lasso Regression', 'ElasticNet Regression', 'Logistic Regression', 'Decision Tree Regression', 'Random Forest Regression', 'Gradient Boosting Regression', 'Support Vector Regression (SVR)', 'XGBoost', 'LightGBM' ] selected_models = st.multiselect('Select Regression Models', models_list) if selected_models: # Initialize RegressionModels class models = RegressionModels() # Add data models.add_data(X, y) # Split data into training and testing sets models.split_data() # Train and evaluate selected models for model_name in selected_models: st.subheader(f"Model: {model_name}") models.fit(model_name) y_pred = models.train(model_name) mse, r2 = models.evaluate(model_name) st.write(f"MSE: {mse}") st.write(f"R-squared: {r2}") def NLP(): st.title("Contact Page") st.write("You can reach us at example@example.com") def Image(): st.title("Home Page") st.write("Welcome to the Home Page") def Voice(): st.title("Home Page") st.write("Welcome to the Home Page") def Video(): st.title("Home Page") st.write("Welcome to the Home Page") def LLMs(): st.title("About Page") st.write("This is the About Page") def resume(): st.title("Contact Page") st.write("You can reach us at example@example.com") # Main function to run the app def main(): st.sidebar.title("Deep Learning/ Data Science/ AI Models") # page_options = ["Classification", "Regressor", "NLP", "Image", "Voice", "Video", "LLMs"] page_options = ["Classification", "Regressor", "NLP", "LLMs", "AI"] choice = st.sidebar.radio("Select", page_options) if choice == "Classification": train, test = st.tabs(['Train','Test']) with train: st.title("Classification / Train data") spectra = st.file_uploader("**Upload file**", type={"csv", "txt"}) if spectra is not None: spectra_df = pd.read_csv(spectra) st.write(spectra_df.head(5)) # st.write("Headers", spectra_df.columns.tolist()) st.write("**Total Rows**", spectra_df.shape[0]) st.divider() option = st.text_input("**Select Output Column**:") st.divider() if option: st.write("**You have selected output column**: ", option) y = spectra_df[option] X= spectra_df.drop(option, axis=1) # Define the columns with your content col1, col2 = st.columns([4,1], gap="small") # Add content to col1 with col1: st.write("Train data excluding output") st.write(X.head(5)) # Add content to col2 with col2: st.write("Output") st.write(y.head(5)) st.divider() list_of_classifier_models = [ "Naive Bayes Classifier", "Logistic Regression", "Decision Tree", "Random Forests", "SVM", "KNN", "K-Means Clustering" ] models_hyperparameters = { "Naive Bayes Classifier": [], "Logistic Regression": ["C", "max_iter"], "Decision Tree": ["max_depth", "criterion"], "Random Forests": ["n_estimators", "max_depth", "criterion"], "SVM": ["C", "kernel"], "KNN": ["n_neighbors", "algorithm"], "K-Means Clustering": ["n_clusters", "init"] } selected_models = st.multiselect("**Select Models**:",list_of_classifier_models) # Execute further code based on selected models if selected_models: # st.write("Selected Models:", selected_models) # Toggle to add hyperparameters add_hyperparameters = st.toggle("Add Hyperparameters") # If hyperparameters should be added if add_hyperparameters: num_models = len(selected_models) max_items_per_row = 4 num_rows = (num_models + max_items_per_row - 1) // max_items_per_row # Calculate number of rows #Dictionary to store selected hyperparameters for each model hyperparameters_values = {} for row in range(num_rows): cols = st.columns(min(num_models - row * max_items_per_row, max_items_per_row)) # Calculate number of columns for this row for i, col in enumerate(cols): model_index = row * max_items_per_row + i with col: if model_index < num_models: selected_model = selected_models[model_index] st.write(f"Selected Model: {selected_model}") # Display selected model name # initializing if selected_model not in hyperparameters_values: hyperparameters_values[selected_model] = {} # selected_model = st.selectbox(f"Select Model {row}-{i}", selected_models, index=model_index) selected_hyperparameters = models_hyperparameters[selected_models[model_index]] for hyperparameter in selected_hyperparameters: if hyperparameter == "max_depth": max_depth = st.slider(f"Max Depth {selected_model} {hyperparameter}", min_value=1, max_value=20, value=5) hyperparameters_values[selected_model][hyperparameter] = max_depth st.write("Selected Max Depth:", max_depth) elif hyperparameter == "criterion": criterion = st.selectbox(f"Criterion {selected_model} {hyperparameter}", ["gini", "entropy"]) hyperparameters_values[selected_model][hyperparameter] = criterion st.write("Selected Criterion:", criterion) elif hyperparameter == "C": C = st.slider(f"C {selected_model} {hyperparameter}", min_value=0.01, max_value=10.0, value=1.0) hyperparameters_values[selected_model][hyperparameter] = C st.write("Selected C:", C) elif hyperparameter == "max_iter": max_iter = st.slider(f"Max Iterations {selected_model} {hyperparameter}", min_value=100, max_value=10000, step=100, value=1000) hyperparameters_values[selected_model][hyperparameter] = max_iter st.write("Selected Max Iterations:", max_iter) elif hyperparameter == "n_estimators": n_estimators = st.slider(f"Number of Estimators {selected_model} {hyperparameter}", min_value=1, max_value=100, value=10) hyperparameters_values[selected_model][hyperparameter] = n_estimators st.write("Selected Number of Estimators:", n_estimators) elif hyperparameter == "kernel": kernel = st.selectbox(f"Kernel {selected_model} {hyperparameter}", ["linear", "poly", "rbf", "sigmoid"]) hyperparameters_values[selected_model][hyperparameter] = kernel st.write("Selected Kernel:", kernel) elif hyperparameter == "n_neighbors": n_neighbors = st.slider(f"Number of Neighbors {selected_model} {hyperparameter}", min_value=1, max_value=50, value=5) hyperparameters_values[selected_model][hyperparameter] = n_neighbors st.write("Selected Number of Neighbors:", n_neighbors) elif hyperparameter == "algorithm": algorithm = st.selectbox(f"Algorithm {selected_model} {hyperparameter}", ["auto", "ball_tree", "kd_tree", "brute"]) hyperparameters_values[selected_model][hyperparameter] = algorithm st.write("Selected Algorithm:", algorithm) elif hyperparameter == "n_clusters": n_clusters = st.slider(f"Number of Clusters {selected_model} {hyperparameter}", min_value=2, max_value=20, value=5) hyperparameters_values[selected_model][hyperparameter] = n_clusters st.write("Selected Number of Clusters:", n_clusters) elif hyperparameter == "init": init = st.selectbox(f"Initialization Method {selected_model} {hyperparameter}", ["k-means++", "random"]) hyperparameters_values[selected_model][hyperparameter] = init st.write("Selected Initialization Method:", init) # Add more hyperparameters as needed for each model # st.write("Hyperparameters:", hyperparameters_values) clf = ClassificationModels(X,y,hyperparameters_values) # model_accuracy = {} # Split the data clf.split_data() accuracy_dict= {} for models in selected_models: model_hyperparameters = hyperparameters_values.get(models, {}) # Get selected hyperparameters for this model if models not in accuracy_dict: accuracy_dict[models] = 0 # st.write("trained param",trained_models) # for model_name in model_hyperparameters if models == "Naive Bayes Classifier": naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters) naive_bayes_accuracy = clf.evaluate_model(naive_bayes_model) # naive_bayes_classification_report = clf.evaluate_classification_report(naive_bayes_model) # st.write("Naive Bayes Accuracy:", naive_bayes_accuracy) accuracy_dict[models] = naive_bayes_accuracy # st.write("Naive Bayes Classification Report:", pd.DataFrame(naive_bayes_classification_report)) if models == "Logistic Regression": # st.write("Logistic Regression Model:", model_hyperparameters) logistic_regression_model = clf.logistic_regression(model_hyperparameters) logistic_regression_accuracy = clf.evaluate_model(logistic_regression_model) # logistic_regression_classification_report = clf.evaluate_classification_report(logistic_regression_model) # st.write("Logistic Regression Accuracy:", logistic_regression_accuracy) accuracy_dict[models] = logistic_regression_accuracy # st.write("Logistic Regression Classification Report:", pd.DataFrame(logistic_regression_classification_report)) if models == "Decision Tree": decision_tree_model = clf.decision_tree(model_hyperparameters) decision_tree_accuracy = clf.evaluate_model(decision_tree_model) # decision_tree_classification_report = clf.evaluate_classification_report(decision_tree_model) # st.write("Decision Tree Accuracy:", decision_tree_accuracy) accuracy_dict[models] = decision_tree_accuracy # st.write("Decision Tree Classification Report:", pd.DataFrame(decision_tree_classification_report)) if models == "Random Forests": random_forests_model = clf.random_forests(model_hyperparameters) random_forests_accuracy = clf.evaluate_model(random_forests_model) accuracy_dict[models] = random_forests_accuracy # random_forest_classification_report = clf.evaluate_classification_report(random_forests_model) # st.write("Random Forests Accuracy:", random_forests_accuracy) # st.write("Random Forests Classification Report:", pd.DataFrame(random_forest_classification_report)) if models == "SVM": svm_model = clf.support_vector_machines(model_hyperparameters) svm_accuracy = clf.evaluate_model(svm_model) accuracy_dict[models] = svm_accuracy # svm_classification_report = clf.evaluate_classification_report(svm_model) # st.write("Support Vector Machines Accuracy:", svm_accuracy) # st.write("Support Vector Machines Classification Report:", pd.DataFrame(svm_classification_report)) if models == "KNN": knn_model = clf.k_nearest_neighbour(model_hyperparameters) knn_accuracy = clf.evaluate_model(knn_model) accuracy_dict[models] = knn_accuracy # knn_classification_report = clf.evaluate_classification_report(knn_model) # st.write("K-Nearest Neighbors Accuracy:", knn_accuracy) # st.write("K-Nearest Neighbors Classification Report:", pd.DataFrame(knn_classification_report)) if models == "K- Means Clustering": kmeans_model = clf.k_means_clustering(model_hyperparameters) kmeans_accuracy = clf.evaluate_model(kmeans_model) accuracy_dict[models] = kmeans_accuracy # knn_classification_report = clf.evaluate_classification_report(knn_model) # st.write("K-Nearest Neighbors Accuracy:", kmeans_accuracy) # st.write("K-Nearest Neighbors Classification Report:", pd.DataFrame(knn_classification_report)) st.divider() st.write("Models Accuracy:", accuracy_dict) max_key = '' max_value = 0 for i in accuracy_dict: if accuracy_dict[i] > max_value: max_key = i max_value = accuracy_dict[i] st.write("Efficient Model is :",max_key, accuracy_dict[max_key]) st.divider() st.write("Scroll up and Click on <**Test**> tab to test Model performance") with test: st.title("Classification / Test") spectra_1 = st.file_uploader("Upload file test the model", type={"csv", "txt"}) if spectra_1 is not None: spectra_df1 = pd.read_csv(spectra_1) Actual = spectra_df1['Disease'] spectra_df1 = spectra_df1.drop(columns=['Disease']) st.write(spectra_df1.head(5)) st.divider() model_dict ={ "Naive Bayes Classifier":'GaussianNB()', "Logistic Regression":'LogisticRegression()', "Decision Tree":'DecisionTreeClassifier()', "Random Forests":'RandomForestClassifier()', "SVM":'SVC()', "KNN":'KNeighborsClassifier()', "K- Means Clustering":'KMeans()' } X= spectra_df1 if max_key == "Naive Bayes Classifier": # naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters) naive_bayes_model =naive_bayes_model.predict() st.write("Naive Bayes Model:", naive_bayes_model) if max_key == "Logistic Regression": st.write("Logistic Regression Model Hyperparameter:", model_hyperparameters) logistic_regression_model_ = logistic_regression_model.predict(X) X['Predict'] = logistic_regression_model_ X['Actual'] = Actual st.write("Output : ", X) logistic_regression_accuracy = clf.evaluate_model(logistic_regression_model) # logistic_regression_classification_report = clf.evaluate_classification_report(logistic_regression_model) st.write("Logistic Regression Accuracy:", logistic_regression_accuracy) # accuracy_dict[models] = logistic_regression_accuracy if max_key == "Decision Tree": decision_tree_model_ = decision_tree_model.predict(X) X['Predict'] = decision_tree_model_ X['Actual'] = Actual st.write("Output : ", X) if max_key == "Random Forests": random_forests_model = random_forests_model.predict(X) st.write("Random Forests Model:", random_forests_model) if max_key == "SVM": svm_model = svm_model.predict(X) st.write("Support Vector Machines Model:", svm_model) if max_key == "KNN": knn_model = knn_model.predict(X) st.write("K-Nearest Neighbors Model:", knn_model) if max_key == "K- Means Clustering": kmeans_model =kmeans_model.predict(X) st.write("K-Means Clustering Model:", kmeans_model) st.divider() data_frame = pd.DataFrame(X).to_csv().encode('utf-8') st.download_button( label="Download data as CSV", data=data_frame, file_name='large_df.csv', mime='text/csv', ) st.divider() elif choice == "Regressor": regressor() elif choice == "NLP": NLP() if choice == "Image": Image() if choice == "Voice": Voice() if choice == "Video": Video() if choice == "LLMs": LLMs() if __name__ == "__main__": main()