{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test MSE: 1.9502589114484195\n", "0.9252566690286127\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import HistGradientBoostingRegressor\n", "from sklearn.metrics import mean_squared_error,r2_score\n", "import pickle\n", "\n", "# Load the dataset from CSV\n", "selected_columns = ['Process type', 'Part Od', 'Part ID', 'Part Width', 'Finish Wt', 'Input Weight']\n", "data = pd.read_csv('/Users/abhijay/Desktop/EY_INTERNSHIP/Data_4.csv', usecols=selected_columns)\n", "\n", "# Assuming 'Input Weight' is the target variable, and other columns are features\n", "X = data.drop(columns=['Input Weight'])\n", "y = data['Input Weight']\n", "\n", "# Split the data into training and testing sets (80% train, 20% test)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "from sklearn.model_selection import RandomizedSearchCV\n", "from scipy.stats import uniform, randint\n", "\n", "# Define the parameter grid\n", "param_dist = {\n", " 'learning_rate': uniform(0.01, 0.2),\n", " 'max_iter': randint(100, 1000),\n", " 'max_leaf_nodes': randint(10, 50),\n", " 'max_depth': randint(3, 10),\n", " 'min_samples_leaf': randint(1, 20),\n", " 'max_bins': randint(50, 256),\n", " 'l2_regularization': uniform(0, 1),\n", " 'early_stopping': [True, False]\n", "}\n", "\n", "# Instantiate the model\n", "model = HistGradientBoostingRegressor()\n", "\n", "# Instantiate the RandomizedSearchCV object\n", "random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=100, cv=5, random_state=42, n_jobs=-1)\n", "\n", "# Fit the random search model\n", "random_search.fit(X_train, y_train)\n", "\n", "# Get the best estimator\n", "best_model = random_search.best_estimator_\n", "\n", "# Use the best model to predict on the test set\n", "y_pred = best_model.predict(X_test)\n", "\n", "\n", "\n", "\n", "from sklearn.metrics import mean_squared_error\n", "mse_test = mean_squared_error(y_test, y_pred)\n", "r_square=r2_score(y_test, y_pred)\n", "print(\"Test MSE: \", mse_test)\n", "print(r_square)\n", "\n", "\n", "with open('best_model.pkl', 'wb') as model_file:\n", " pickle.dump(best_model, model_file)\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 2 }