| import pandas as pd |
| import numpy as np |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder |
| from sklearn.linear_model import LinearRegression, LogisticRegression |
| from sklearn.ensemble import GradientBoostingClassifier |
| import xgboost |
| from sklearn.compose import ColumnTransformer |
| |
| from sklearn.pipeline import Pipeline |
| from sklearn.model_selection import train_test_split |
| from sklearn.metrics import classification_report, r2_score |
| import streamlit as st |
| import shap |
| import matplotlib as mt |
|
|
| def train(data=None,problem="Regression",model="LinearRegression",label=None): |
|
|
| df = pd.read_csv(data) |
|
|
| target = df[label].copy() |
| features = df.drop(label, axis=1) |
|
|
| X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True,stratify=target) |
|
|
| num_features = [] |
| cat_features = [] |
| cols = list(features.columns) |
| for i in cols: |
| if df[i].dtypes == "object": |
| cat_features.append(i) |
| else: |
| num_features.append(i) |
|
|
| if problem == "Regression": |
| if cat_features[0]!="": |
|
|
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), |
| ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)]) |
| else: |
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features)]) |
| |
| |
| |
| final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())]) |
|
|
| final_pipe.fit(X_train,y_train) |
|
|
| |
|
|
| |
|
|
| return final_pipe, X_train,X_test,y_train,y_test |
| if problem == "Classification": |
| if model == "GradientBoosting": |
|
|
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), |
| ("cat_trf",OneHotEncoder(),cat_features)]) |
| |
| |
| lbl_encd = LabelEncoder() |
|
|
| lbl_encd.fit(y_train) |
| y_train_trf = lbl_encd.transform(y_train) |
|
|
| y_test_trf = lbl_encd.fit(y_test) |
| |
| final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))]) |
|
|
| final_pipe.fit(X_train,y_train_trf) |
| |
| |
|
|
| return final_pipe, X_train,X_test,y_train_trf,y_test_trf |
| elif model == "LogisticRegression": |
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), |
| ("cat_trf",OneHotEncoder(),cat_features)]) |
| |
| |
| lbl_encd = LabelEncoder() |
|
|
| lbl_encd.fit(y_train) |
| y_train_trf = lbl_encd.transform(y_train) |
|
|
| y_test_trf = lbl_encd.fit(y_test) |
| |
| final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))]) |
|
|
| final_pipe.fit(X_train,y_train_trf) |
| |
| |
|
|
| return final_pipe, X_train,X_test,y_train_trf,y_test_trf |
|
|
|
|
| def predict(model=None,x=None): |
|
|
| |
| y_hat = model.predict(x) |
|
|
| return y_hat |
|
|
| def evaluate(y_true,y_pred, problem="Regression"): |
|
|
| if problem == "Regression": |
| metric = r2_score(y_true,y_pred) |
| return metric |
| else: |
| metric = classification_report(y_true,y_pred,output_dict=True) |
| met_df = pd.DataFrame(metric).transpose() |
| file = met_df.to_csv().encode('utf-8') |
|
|
| return file |
|
|
| st.title("No Code Machine Learning Studio: ") |
|
|
| st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg") |
| st.write("Drag & Drop Portal for Machine Learing") |
| prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification")) |
|
|
| train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"]) |
|
|
| if prob_type == "Classification": |
|
|
| model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression")) |
| else: |
| model = "LinearRegression" |
|
|
|
|
| def explain(model="LinearRegression",train_data=None,test_data=None): |
| explainer = shap.LinearExplainer(model,train_data,feature_dependence=False) |
| shap_values = explainer.shap_values(test_data) |
|
|
| shap.summary_plot(shap_values,test_data,plot_type="violin",show=False) |
| mt.pyplot.gcf().axes[-1].set_box_aspect(10) |
|
|
|
|
| y = st.text_input("Please write your target column name: ") |
| |
| |
|
|
| if st.button("Train"): |
| |
| model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) |
| |
| |
|
|
| y_hat_train = predict(model_,X_train) |
| y_hat_test = predict(model_,X_test) |
|
|
| if prob_type == "Classification": |
| st.write("Classification report of training set: ") |
| report = evaluate(y_train,y_hat_train,prob_type) |
|
|
| st.download_button(label="Click here to download the report",data=report, mime="text/csv") |
| st.write("Classification report of testing dataset: ") |
| report_test = evaluate(y_train,y_hat_train,prob_type) |
| st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv") |
|
|
| else: |
| st.write("r2 score on training set: ") |
| st.write(evaluate(y_train,y_hat_train)) |
| st.write("r2 score on test set: ") |
|
|
| st.write(evaluate(y_test,y_hat_test,prob_type)) |
| |