Spaces:
Sleeping
Sleeping
from sklearn.model_selection import GridSearchCV | |
from sklearn.metrics import confusion_matrix, classification_report | |
import multiprocessing | |
from sklearn.linear_model import Ridge, Lasso, LogisticRegression | |
from sklearn.metrics import mean_squared_error | |
import numpy as np | |
from sklearn.linear_model import ElasticNet | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.linear_model import LinearRegression | |
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor | |
from sklearn.svm import SVC | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.neural_network import MLPClassifier | |
import xgboost as xgb | |
import json | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from striprtf.striprtf import rtf_to_text | |
from IPython.display import display | |
import warnings | |
from sklearn.exceptions import ConvergenceWarning | |
import streamlit as st | |
import os | |
from striprtf.striprtf import rtf_to_text | |
from sklearn.model_selection import train_test_split, GridSearchCV | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.metrics import confusion_matrix, classification_report | |
from sklearn.feature_extraction.text import HashingVectorizer | |
from sklearn.pipeline import Pipeline | |
from sklearn.impute import SimpleImputer | |
from sklearn.preprocessing import LabelEncoder | |
warnings.filterwarnings("ignore", category=ConvergenceWarning) | |
# Function to parse RTF file and call hackathon_problem function | |
def rtf_parser(file_path): | |
# Read the RTF file | |
with open(file_path, 'r') as file: | |
rtf_content = file.read() | |
# Convert the RTF content to text | |
text_content = rtf_to_text(rtf_content) | |
# Call the hackathon_problem function with the text content | |
hackathon_problem(text_content) | |
def hackathon_problem(text_content): | |
# Read JSON file | |
json_data = json.loads(text_content) | |
# Check if dataset file exists | |
dataset_file = json_data["design_state_data"]["session_info"]["dataset"] | |
if not os.path.exists(dataset_file): | |
st.error(f"Dataset file '{dataset_file}' not found.") | |
return | |
# Load Data | |
data = pd.read_csv(json_data["design_state_data"]["session_info"]["dataset"]) | |
# Encode categorical columns | |
label_encoders = {} | |
for column in data.columns: | |
if data[column].dtype == 'object': | |
label_encoders[column] = LabelEncoder() | |
data[column] = label_encoders[column].fit_transform(data[column]) | |
# Define preprocessing steps based on JSON data | |
preprocessing_steps = [] | |
for feature, details in json_data["design_state_data"]["feature_handling"].items(): | |
if "missing_values" in details and details["missing_values"] == "Impute": | |
if details["impute_with"] == "Average of values": | |
strategy = 'mean' | |
else: | |
strategy = 'median' | |
preprocessing_steps.append((feature + '_imputer', SimpleImputer(strategy=strategy))) | |
# Apply preprocessing steps | |
for step in preprocessing_steps: | |
feature_name, transformer = step | |
data[feature_name] = transformer.fit_transform(data[[feature_name]]) | |
# Separate X and y | |
selected_features = [feature for feature, details in json_data["design_state_data"]["feature_handling"].items() if details["is_selected"]] | |
X = data[selected_features] | |
Y = data[json_data["design_state_data"]["target"]["target"]] | |
# Split Data | |
train_ratio = json_data["design_state_data"]["train"]["train_ratio"] | |
random_seed = json_data["design_state_data"]["train"]["random_seed"] | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1 - train_ratio, random_state=random_seed) | |
# Get the number of available CPU cores for parallelism | |
num_cores = multiprocessing.cpu_count() | |
selected_algorithm = None | |
for algorithm, details in json_data["design_state_data"]["algorithms"].items(): | |
if details["is_selected"]: | |
selected_algorithm = algorithm | |
break | |
if selected_algorithm == "RandomForestClassifier": | |
model = RandomForestClassifier() | |
parameters = { | |
"n_estimators": np.linspace(details["min_trees"], details["max_trees"], num=3, dtype=int), | |
"max_depth": np.linspace(details["min_depth"], details["max_depth"], num=3, dtype=int), | |
"min_samples_leaf": np.linspace(details["min_samples_per_leaf_min_value"], details["min_samples_per_leaf_max_value"], num=3, dtype=int) | |
} | |
# Modify GridSearchCV instantiation to use parallel processing | |
grid_search = GridSearchCV(model, parameters, cv=5, n_jobs=num_cores) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Evaluate Models | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('RandomForestClassifier.png') | |
st.image('RandomForestClassifier.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
if selected_algorithm == "RandomForestRegressor": | |
# Your RandomForestRegressor code | |
model = RandomForestRegressor() | |
parameters = { | |
"n_estimators": list(range(details["min_trees"], details["max_trees"] + 1)), | |
"max_depth": list(range(details["min_depth"], details["max_depth"] + 1)), | |
"min_samples_leaf": list(range(details["min_samples_per_leaf_min_value"], details["min_samples_per_leaf_max_value"] + 1))} | |
# Modify GridSearchCV instantiation to use parallel processing | |
grid_search = GridSearchCV(model, parameters, cv=5, n_jobs=num_cores) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Predict on test data | |
Y_pred = best_model.predict(X_test) | |
# Calculate R-squared | |
r_squared = best_model.score(X_test, Y_test) | |
# Calculate adjusted R-squared | |
n = len(Y_test) | |
k = X_test.shape[1] # Number of predictors | |
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - k - 1) | |
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred)) | |
# Create DataFrame for metrics | |
metrics_df = pd.DataFrame({ | |
'Metric': ['R-squared', 'Adjusted R-squared', 'Root Mean Squared Error (RMSE)'], | |
'Value': [r_squared, adjusted_r_squared, rmse] | |
}) | |
# Style DataFrame | |
styled_metrics_df = ( | |
metrics_df.style | |
.set_properties(**{'text-align': 'left'}) # Align text to the left | |
.highlight_max(color='lightgreen') # Highlight maximum value | |
.set_caption('Model Evaluation Metrics') # Add caption | |
) | |
# Display styled DataFrame | |
st.write("metrics_df:") | |
st.write(styled_metrics_df) | |
if selected_algorithm == "LinearRegression": | |
# Your LinearRegression code | |
best_model = LinearRegression() | |
best_model.fit(X_train, Y_train) | |
# Predict on test data | |
Y_pred = best_model.predict(X_test) | |
# Calculate R-squared | |
r_squared = best_model.score(X_test, Y_test) | |
# Calculate adjusted R-squared | |
n = len(Y_test) | |
k = X_test.shape[1] # Number of predictors | |
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - k - 1) | |
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred)) | |
# Create DataFrame for metrics | |
metrics_df = pd.DataFrame({ | |
'Metric': ['R-squared', 'Adjusted R-squared', 'Root Mean Squared Error (RMSE)'], | |
'Value': [r_squared, adjusted_r_squared, rmse] | |
}) | |
# Style DataFrame | |
styled_metrics_df = ( | |
metrics_df.style | |
.set_properties(**{'text-align': 'left'}) # Align text to the left | |
.highlight_max(color='lightgreen') # Highlight maximum value | |
.set_caption('Model Evaluation Metrics') # Add caption | |
) | |
# Display styled DataFrame | |
st.write("metrics_df:") | |
st.write(styled_metrics_df) | |
if selected_algorithm == "LogisticRegression": | |
model = LogisticRegression() | |
parameters = { | |
"C": np.linspace(details["min_regparam"], details["max_regparam"], num=5), | |
"max_iter": np.linspace(details["min_iter"], details["max_iter"], num=5, dtype=int), | |
"l1_ratio": np.linspace(details["min_elasticnet"], details["max_elasticnet"], num=5) | |
} | |
# Modify GridSearchCV instantiation to use parallel processing | |
grid_search = GridSearchCV(model, parameters, cv=5, n_jobs=num_cores) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Evaluate Models | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('LogisticRegression.png') | |
st.image('LogisticRegression.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
if selected_algorithm in ["RidgeRegression", "LassoRegression"]: | |
if selected_algorithm == "RidgeRegression": | |
model = Ridge() | |
elif selected_algorithm == "LassoRegression": | |
model = Lasso() | |
parameters = { | |
"alpha": [i/10 for i in range(int(details["min_regparam"]*10), int(details["max_regparam"]*10)+1)], | |
"max_iter": list(range(details["min_iter"], details["max_iter"] + 1))} | |
# Modify GridSearchCV instantiation to use parallel processing | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Evaluate Models | |
Y_pred = best_model.predict(X_test) | |
# Calculate R-squared | |
r_squared = best_model.score(X_test, Y_test) | |
# Calculate adjusted R-squared | |
n = len(Y_test) | |
k = X_test.shape[1] # Number of predictors | |
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - k - 1) | |
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred)) | |
# Create DataFrame for metrics | |
metrics_df = pd.DataFrame({ | |
'Metric': ['R-squared', 'Adjusted R-squared', 'Root Mean Squared Error (RMSE)'], | |
'Value': [r_squared, adjusted_r_squared, rmse] | |
}) | |
# Style DataFrame | |
styled_metrics_df = ( | |
metrics_df.style | |
.set_properties(**{'text-align': 'left'}) # Align text to the left | |
.highlight_max(color='lightgreen') # Highlight maximum value | |
.set_caption('Model Evaluation Metrics') # Add caption | |
) | |
# Display styled DataFrame | |
st.write("metrics_df:") | |
st.write(styled_metrics_df) | |
if selected_algorithm == "ElasticNetRegression": | |
model = ElasticNet() | |
# Hyperparameters | |
parameters = { | |
"alpha": [i/10 for i in range(int(details["min_regparam"]*10), int(details["max_regparam"]*10)+1)], | |
"l1_ratio": [i/10 for i in range(int(details["min_elasticnet"]*10), int(details["max_elasticnet"]*10)+1)], | |
"max_iter": list(range(details["min_iter"], details["max_iter"] + 1))} | |
# Modify GridSearchCV instantiation to use parallel processing | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Evaluate Models | |
Y_pred = best_model.predict(X_test) | |
# Calculate R-squared | |
r_squared = best_model.score(X_test, Y_test) | |
# Calculate adjusted R-squared | |
n = len(Y_test) | |
k = X_test.shape[1] # Number of predictors | |
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - k - 1) | |
# Calculate RMSE | |
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred)) | |
# Create DataFrame for metrics | |
metrics_df = pd.DataFrame({ | |
'Metric': ['R-squared', 'Adjusted R-squared', 'Root Mean Squared Error (RMSE)'], | |
'Value': [r_squared, adjusted_r_squared, rmse] | |
}) | |
# Style DataFrame | |
styled_metrics_df = ( | |
metrics_df.style | |
.set_properties(**{'text-align': 'left'}) # Align text to the left | |
.highlight_max(color='lightgreen') # Highlight maximum value | |
.set_caption('Model Evaluation Metrics') # Add caption | |
) | |
# Display styled DataFrame | |
st.write("metrics_df:") | |
st.write(styled_metrics_df) | |
if selected_algorithm == "xg_boost": | |
# XGBoost specific handling | |
model = xgb.XGBClassifier(objective='multi:softmax', | |
booster='dart' if details['dart'] else 'gbtree', | |
tree_method = details['tree_method'] if details['tree_method'] != "" else "auto", | |
random_state=details['random_state'],) | |
parameters = { | |
'n_estimators': [details["max_num_of_trees"]] if details["max_num_of_trees"] > 0 else [5], | |
'max_depth': details['max_depth_of_tree'], | |
'learning_rate': [value * 0.001 for value in details['learningRate']], | |
'reg_alpha': [value * 0.01 for value in details['l1_regularization']], | |
'reg_lambda': [value * 0.01 for value in details['l2_regularization']], | |
'gamma': [value * 0.01 for value in details['gamma']], | |
'min_child_weight': [value * 0.01 for value in details['min_child_weight']], | |
'subsample': [value * 0.01 for value in details['sub_sample']], | |
'colsample_bytree': [value * 0.01 for value in details['col_sample_by_tree']] | |
} | |
# Perform grid search with cross-validation | |
grid_search = GridSearchCV(model, parameters, cv=5 , n_jobs=num_cores) | |
# Fit the model with early stopping on the validation set | |
grid_search.fit(X_train, Y_train, eval_set=[(X_test, Y_test)], early_stopping_rounds=details['early_stopping_rounds'] if details['early_stopping'] else None) | |
# Get the best model from grid search | |
best_model = grid_search.best_estimator_ | |
# Make predictions on the test set | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('Xg_boost.png') | |
st.image('Xg_boost.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
if selected_algorithm == "DecisionTreeClassifier": | |
# Decision Tree Classifier specific handling | |
criterion = 'gini' if details['use_gini'] else 'entropy' | |
# Fix the following line to use 'use_entropy' instead of 'use_best' | |
splitter = 'best' if details['use_best'] and not details['use_random'] else 'random' | |
model = DecisionTreeClassifier(criterion=criterion, splitter=splitter) | |
parameters = { | |
'max_depth': list(range(details['min_depth'], details['max_depth'] + 1)), | |
'min_samples_leaf': details['min_samples_per_leaf']} | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('dt_class_cm.png') | |
st.image('dt_class_cm.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
if selected_algorithm == "DecisionTreeRegressor": | |
# Decision Tree Regressor specific handling | |
splitter = 'best' if details.get('use_best', False) and not details.get('use_random', False) else 'random' | |
random_state = details.get('random_state', 10) # Use the provided random state or default to 10 | |
model = DecisionTreeRegressor( splitter=splitter, random_state=random_state) | |
parameters = { | |
'max_depth': list(range(details['min_depth'], details['max_depth'] + 1)), | |
'min_samples_leaf': details['min_samples_per_leaf'] | |
} | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Predict on test data | |
Y_pred = best_model.predict(X_test) | |
# Calculate R-squared | |
r_squared = best_model.score(X_test, Y_test) | |
# Calculate adjusted R-squared | |
n = len(Y_test) | |
k = X_test.shape[1] # Number of predictors | |
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - k - 1) | |
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred)) | |
# Create DataFrame for metrics | |
metrics_df = pd.DataFrame({ | |
'Metric': ['R-squared', 'Adjusted R-squared', 'Root Mean Squared Error (RMSE)'], | |
'Value': [r_squared, adjusted_r_squared, rmse] | |
}) | |
# Style DataFrame | |
styled_metrics_df = ( | |
metrics_df.style | |
.set_properties(**{'text-align': 'left'}) # Align text to the left | |
.highlight_max(color='lightgreen') # Highlight maximum value | |
.set_caption('Model Evaluation Metrics') # Add caption | |
) | |
# Display styled DataFrame | |
st.write("metrics_df:") | |
st.write(styled_metrics_df) | |
if selected_algorithm == "SVM": | |
# SVM specific handling | |
kernels = [] | |
if details['linear_kernel']: | |
kernels.append('linear') | |
if details['rep_kernel']: | |
kernels.append('rbf') | |
if details['polynomial_kernel']: | |
kernels.append('poly') | |
if details['sigmoid_kernel']: | |
kernels.append('sigmoid') | |
model = SVC() | |
parameters = { | |
'C': details['c_value'], | |
'kernel': kernels, | |
'gamma': ['auto', 'scale'] if details['scale'] else details['custom_gamma_values'], | |
'tol': [10 ** -details['tolerance']], | |
'max_iter': [details['max_iterations']] | |
} | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
# Predict on test data | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('SVM.png') | |
st.image('SVM.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
if selected_algorithm == "KNN": | |
model = KNeighborsClassifier() | |
parameters = { | |
'n_neighbors': details['k_value'], | |
'weights': ['uniform', 'distance'] if details['distance_weighting'] else ['uniform'], | |
'algorithm': ['auto'] if details['neighbour_finding_algorithm'] == "Automatic" else [details['neighbour_finding_algorithm']], | |
'p': [details['p_value']] if details['p_value'] > 0 else [1] | |
} | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('KNN.png') | |
st.image('KNN.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
if selected_algorithm == "neural_network": | |
# Neural Network specific handling | |
# Initialize the MLPClassifier model with early stopping parameter | |
model = MLPClassifier(early_stopping=details['early_stopping']) | |
# Define the parameters for grid search | |
parameters = { | |
'hidden_layer_sizes': details['hidden_layer_sizes'], | |
'activation': ['identity', 'logistic', 'tanh', 'relu'], | |
'alpha': [details['alpha_value']] if details['alpha_value'] > 0 else [.1], | |
'max_iter': [details['max_iterations']] if details['max_iterations'] > 0 else [100], | |
'tol': [10 ** -details['convergence_tolerance']] if details['convergence_tolerance'] > 0 else [0.1], | |
'solver': [details['solver'].lower()], | |
'learning_rate_init': [details['initial_learning_rate']] if details['initial_learning_rate'] > 0 else [0.01], | |
'shuffle': [details['shuffle_data']], | |
'batch_size': ['auto'] if details['automatic_batching'] else [details['batch_size']], | |
'beta_1': [details['beta_1']] if details['beta_1'] != 0 else [.1], | |
'beta_2': [details['beta_2']] if details['beta_2'] != 0 else [.1], | |
'epsilon': [details['epsilon']] if details['epsilon'] != 0 else [.1], | |
'power_t': [details['power_t']] if details['power_t'] != 0 else [.1], | |
'momentum': [details['momentum']] if details['momentum'] != 0 else [.1], | |
'nesterovs_momentum': [details['use_nesterov_momentum']] | |
} | |
grid_search = GridSearchCV(model, parameters, cv=5) | |
grid_search.fit(X_train, Y_train) | |
best_model = grid_search.best_estimator_ | |
Y_pred = best_model.predict(X_test) | |
confusion_mat = confusion_matrix(Y_test, Y_pred) | |
# Create heatmap with seaborn | |
sns.heatmap(confusion_mat, annot=True, cmap="viridis", fmt="d", cbar=False, | |
linewidths=0.5, linecolor='gray', square=True, | |
xticklabels=True, yticklabels=True, annot_kws={"size": 10}) | |
# Customize axis labels | |
plt.xlabel("Predicted Labels") | |
plt.ylabel("True Labels") | |
# Show plot | |
plt.savefig('neural_network.png') | |
st.image('neural_network.png') | |
# plt.show() | |
# Assign value to classification_rep_df | |
classification_rep_dict = classification_report(Y_test, Y_pred, output_dict=True) | |
classification_rep_df = pd.DataFrame(classification_rep_dict) | |
# Add some styling to the DataFrame | |
classification_rep_styled = classification_rep_df.style.background_gradient(cmap='viridis') | |
# Inside each block where you print the classification report, replace the print statement with the following: | |
# Print the styled classification report | |
st.write("Classification Report:") | |
st.write(classification_rep_styled) | |
def save_uploaded_file(uploaded_file): | |
file_path = os.path.join(uploaded_file.name) | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
return file_path | |
# Define the main function | |
def main(): | |
st.set_page_config(page_title="AutoML with Streamlit", layout="wide") # Set page title and layout | |
# Set background color of sidebar to primary color | |
st.markdown(""" | |
<style> | |
.sidebar .sidebar-content { | |
background-color: #3498db; /* Primary color */ | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Set background color of main web area to light gray | |
st.markdown(""" | |
<style> | |
.block-container { | |
background-color: #f9f9f9; /* Background color */ | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Add a title section with accent color text | |
st.title("AutoML with Json") | |
st.write("This application allows you to upload an RTF file and perform AutoML tasks.") | |
# Add a file uploader section | |
st.sidebar.title("Upload RTF File") | |
uploaded_file = st.sidebar.file_uploader("", type=["rtf"], help="Please upload your RTF file here") | |
if uploaded_file is not None: | |
file_path = save_uploaded_file(uploaded_file) | |
rtf_parser(file_path) | |
# Entry point of the script | |
if __name__ == "__main__": | |
main() |