{ "cells": [ { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "from flask import Flask, render_template, request, url_for\n", "import pickle\n", "import numpy as np\n", "\n", "linreg = pickle.load(open('Models/lr.pkl', 'rb'))\n", "knn_model = pickle.load(open('Models/knn_model.pkl', 'rb'))\n", "gaussian_nb = pickle.load(open('Models/nbG_model.pkl', 'rb'))\n", "multinomial_nb = pickle.load(open('Models/nbM_model.pkl', 'rb'))\n", "bernoulli_nb = pickle.load(open('Models/nbB_model.pkl', 'rb'))\n", "\n", "job_map = {\n", " 1: 'Junior',\n", " 2: 'Senior',\n", " 3: 'Project Manager',\n", " 4: 'CTO',\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "while True:\n", " salary = float(input(\"Enter salary: \"))\n", " print(\"Salary entered: \", salary)\n", "\n", " experience = float(input(\"Enter experience: \"))\n", " print(\"Experience entered: \", experience)\n", "\n", " with open('Models/tts.pkl', 'rb') as f:\n", " data = pickle.load(f)\n", "\n", " X=data['X']\n", " y=data['y']\n", "\n", " X = np.vstack((X, np.array([salary, experience])))\n", " y= np.hstack((y, experience)) # use a new label for the user's input\n", "\n", " # Split the data into training and testing sets\n", " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", " # Fit the Naive Bayes models on the training data\n", " gaussian_nb.fit(X_train, y_train)\n", " multinomial_nb.fit(X_train, y_train)\n", " bernoulli_nb.fit(X_train, y_train)\n", "\n", " # Evaluate the accuracy of the models on the testing set\n", " gaussian_accuracy = gaussian_nb.score(X_test, y_test)\n", " multinomial_accuracy = multinomial_nb.score(X_test, y_test)\n", " bernoulli_accuracy = bernoulli_nb.score(X_test, y_test)\n", "\n", " # Use each Naive Bayes model to make a prediction based on the user's input values\n", " gaussian_prediction = gaussian_nb.predict([[salary, experience]])[0]\n", " multinomial_prediction = multinomial_nb.predict([[salary, experience]])[0]\n", " bernoulli_prediction = bernoulli_nb.predict([[salary, experience]])[0]\n", "\n", " # Map the predicted job titles to their corresponding string values\n", " gaussian_prediction = job_map.get(gaussian_prediction)\n", " multinomial_prediction = job_map.get(multinomial_prediction)\n", " bernoulli_prediction = job_map.get(bernoulli_prediction)\n", "\n", " # # Print the accuracy and predicted job title for each Naive Bayes model\n", " # print(\"Gaussian Accuracy: {:.2f}%, Prediction: {}\".format(gaussian_accuracy * 100, gaussian_prediction))\n", " # print(\"Multinomial Accuracy: {:.2f}%, Prediction: {}\".format(multinomial_accuracy * 100, multinomial_prediction))\n", " # print(\"Bernoulli Accuracy: {:.2f}%, Prediction: {}\".format(bernoulli_accuracy * 100, bernoulli_prediction))\n", " # print(\"\\n\")\n", "\n", " # # Evaluate the accuracy of the models on the new input\n", " # gaussian_accuracy_new = gaussian_nb.score([[salary, experience]], [5])\n", " # multinomial_accuracy_new = multinomial_nb.score([[salary, experience]], [5])\n", " # bernoulli_accuracy_new = bernoulli_nb.score([[salary, experience]], [5])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " # X=data['X']\n", " # y=data['y']\n", "\n", " # # Combine the user's input values with the existing data\n", " # X_new = np.vstack((X, np.array([salary, experience])))\n", " # y_new = np.hstack((y, 5)) # use a new label for the user's input\n", "\n", " # n_splits=10\n", "\n", " # # Use k-fold cross-validation to generate a new test set for each iteration\n", " # kf = KFold(n_splits=n_splits, shuffle=False, random_state=None)\n", "\n", " # gaussian_accuracy = 0\n", " # multinomial_accuracy = 0\n", " # bernoulli_accuracy = 0\n", "\n", " # for train_index, test_index in kf.split(X_new):\n", " # X_train, X_test = X_new[train_index], X_new[test_index]\n", " # y_train, y_test = y_new[train_index], y_new[test_index]\n", "\n", " # # Fit the Naive Bayes models on the training data\n", " # gaussian_nb.fit(X_train, y_train)\n", " # multinomial_nb.fit(X_train, y_train)\n", " # bernoulli_nb.fit(X_train, y_train)\n", "\n", " # # Use each Naive Bayes model to make a prediction based on the user's input values\n", " # gaussian_prediction = gaussian_nb.predict([[salary, experience]])[0]\n", " # multinomial_prediction = multinomial_nb.predict([[salary, experience]])[0]\n", " # bernoulli_prediction = bernoulli_nb.predict([[salary, experience]])[0]\n", "\n", " # # Update the accuracy scores for each Naive Bayes model\n", " # gaussian_accuracy += gaussian_nb.score(X_test, y_test)\n", " # multinomial_accuracy += multinomial_nb.score(X_test, y_test)\n", " # bernoulli_accuracy += bernoulli_nb.score(X_test, y_test)\n", "\n", " # # Calculate the mean accuracy for each Naive Bayes model over all folds\n", " # gaussian_accuracy = round(gaussian_accuracy / n_splits * 100, 3)\n", " # multinomial_accuracy = round(multinomial_accuracy / n_splits * 100, 3)\n", " # bernoulli_accuracy = round(bernoulli_accuracy / n_splits * 100, 3)\n", "\n", " # # Map the predicted job titles to their corresponding string values\n", " # gaussian_prediction = job_map.get(gaussian_prediction)\n", " # multinomial_prediction = job_map.get(multinomial_prediction)\n", " # bernoulli_prediction = job_map.get(bernoulli_prediction)\n", "\n", " # # Render the results template with the predicted job classification and accuracy scores\n", " # return render_template('naive.html',\n", " # gaussian_prediction=gaussian_prediction,\n", " # multinomial_prediction=multinomial_prediction,\n", " # bernoulli_prediction=bernoulli_prediction,\n", " # gaussian_accuracy=str(gaussian_accuracy) + \"%\",\n", " # multinomial_accuracy=str(multinomial_accuracy) + \"%\",\n", " # bernoulli_accuracy=str(bernoulli_accuracy) + \"%\",\n", " # salary=salary,\n", " # experience=experience,\n", " # reset=True)\n", " # else:\n", " # # Render the job classification form\n", " # return render_template('naive.html')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " # if request.method == 'POST':\n", " # # Get the user's input values\n", " # salary = float(request.form['salary'])\n", " # experience = float(request.form['experience'])\n", "\n", " # with open('Models/tts.pkl', 'rb') as f:\n", " # data = pickle.load(f)\n", "\n", " # X=data['X']\n", " # y=data['y']\n", "\n", "\n", " # X = np.vstack((X, np.array([salary, experience])))\n", " # y= np.hstack((y, 5)) # use a new label for the user's input\n", "\n", "\n", " # # Split the data into training and testing sets\n", " # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)\n", "\n", " # # Fit the Naive Bayes models on the training data\n", " # gaussian_nb.fit(X_train, y_train)\n", " # multinomial_nb.fit(X_train, y_train)\n", " # bernoulli_nb.fit(X_train, y_train)\n", "\n", " # # Use each Naive Bayes model to make a prediction based on the user's input values\n", " # gaussian_prediction = gaussian_nb.predict([[salary, experience]])[0]\n", " # multinomial_prediction = multinomial_nb.predict([[salary, experience]])[0]\n", " # bernoulli_prediction = bernoulli_nb.predict([[salary, experience]])[0]\n", "\n", " # # Evaluate the accuracy of the models on the testing set\n", " # gaussian_accuracy = round(gaussian_nb.score(X_test, y_test) * 100, 3) \n", " # multinomial_accuracy = round(multinomial_nb.score(X_test, y_test) * 100, 3)\n", " # bernoulli_accuracy = round(bernoulli_nb.score(X_test, y_test) * 100, 3)\n", "\n", " # # Map the predicted job titles to their corresponding string values\n", " # gaussian_prediction = job_map.get(gaussian_prediction)\n", " # multinomial_prediction = job_map.get(multinomial_prediction)\n", " # bernoulli_prediction = job_map.get(bernoulli_prediction)\n", "\n", " # # Render the results template with the predicted job classification and accuracy scores\n", " # return render_template('naive.html',\n", " # gaussian_prediction=gaussian_prediction,\n", " # multinomial_prediction=multinomial_prediction,\n", " # bernoulli_prediction=bernoulli_prediction,\n", " # gaussian_accuracy=str(gaussian_accuracy) + \"%\",\n", " # multinomial_accuracy=str(multinomial_accuracy) + \"%\",\n", " # bernoulli_accuracy=str(bernoulli_accuracy) + \"%\",\n", " # salary=salary,\n", " # experience=experience,\n", " # reset=True)\n", " # else:\n", " # # Render the job classification form\n", " # return render_template('naive.html')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Salary entered: 5000.0\n", "Experience entered: 5.0\n", "Gaussian Accuracy: 82.81%, Prediction: None\n", "Multinomial Accuracy: 26.67%, Prediction: None\n", "Bernoulli Accuracy: 21.05%, Prediction: Junior\n", "\n", "\n", "Salary entered: 4.0\n", "Experience entered: 5.0\n", "Gaussian Accuracy: 82.46%, Prediction: CTO\n", "Multinomial Accuracy: 25.96%, Prediction: Junior\n", "Bernoulli Accuracy: 20.70%, Prediction: Junior\n", "\n", "\n" ] }, { "ename": "ValueError", "evalue": "could not convert string to float: ''", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[3], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmodel_selection\u001b[39;00m \u001b[39mimport\u001b[39;00m KFold\n\u001b[0;32m 2\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m----> 3\u001b[0m salary \u001b[39m=\u001b[39m \u001b[39mfloat\u001b[39m(\u001b[39minput\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEnter salary: \u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[0;32m 4\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mSalary entered: \u001b[39m\u001b[39m\"\u001b[39m, salary)\n\u001b[0;32m 6\u001b[0m experience \u001b[39m=\u001b[39m \u001b[39mfloat\u001b[39m(\u001b[39minput\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEnter experience: \u001b[39m\u001b[39m\"\u001b[39m))\n", "\u001b[1;31mValueError\u001b[0m: could not convert string to float: ''" ] } ], "source": [ "# from sklearn.model_selection import KFold\n", "# while True:\n", "# salary = float(input(\"Enter salary: \"))\n", "# print(\"Salary entered: \", salary)\n", "\n", "# experience = float(input(\"Enter experience: \"))\n", "# print(\"Experience entered: \", experience)\n", "\n", "# with open('Models/tts.pkl', 'rb') as f:\n", "# data = pickle.load(f)\n", "\n", "# X=data['X']\n", "# y=data['y']\n", "\n", "# # Combine the user's input values with the existing data\n", "# X_new = np.vstack((X, np.array([salary, experience])))\n", "# y_new = np.hstack((y, 5)) # use a new label for the user's input\n", "\n", "# n_splits=5\n", "\n", "# # Use k-fold cross-validation to generate a new test set for each iteration\n", "# kf = KFold(n_splits=n_splits, shuffle=True, random_state=None)\n", "\n", "# gaussian_accuracy = 0\n", "# multinomial_accuracy = 0\n", "# bernoulli_accuracy = 0\n", "\n", "# for train_index, test_index in kf.split(X_new):\n", "# X_train, X_test = X_new[train_index], X_new[test_index]\n", "# y_train, y_test = y_new[train_index], y_new[test_index]\n", "\n", "# # Fit the Naive Bayes models on the training data\n", "# gaussian_nb.fit(X_train, y_train)\n", "# multinomial_nb.fit(X_train, y_train)\n", "# bernoulli_nb.fit(X_train, y_train)\n", "\n", "# # Use each Naive Bayes model to make a prediction based on the user's input values\n", "# gaussian_prediction = gaussian_nb.predict([[salary, experience]])[0]\n", "# multinomial_prediction = multinomial_nb.predict([[salary, experience]])[0]\n", "# bernoulli_prediction = bernoulli_nb.predict([[salary, experience]])[0]\n", "\n", "# # Update the accuracy scores for each Naive Bayes model\n", "# gaussian_accuracy += gaussian_nb.score(X_test, y_test)\n", "# multinomial_accuracy += multinomial_nb.score(X_test, y_test)\n", "# bernoulli_accuracy += bernoulli_nb.score(X_test, y_test)\n", "\n", "# # Calculate the mean accuracy for each Naive Bayes model over all folds\n", "# gaussian_accuracy /= n_splits\n", "# multinomial_accuracy /= n_splits\n", "# bernoulli_accuracy /= n_splits\n", "\n", "# # Map the predicted job titles to their corresponding string values\n", "# gaussian_prediction = job_map.get(gaussian_prediction)\n", "# multinomial_prediction = job_map.get(multinomial_prediction)\n", "# bernoulli_prediction = job_map.get(bernoulli_prediction)\n", "\n", "# # Print the accuracy and predicted job title for each Naive Bayes model\n", "# print(\"Gaussian Accuracy: {:.2f}%, Prediction: {}\".format(gaussian_accuracy * 100, gaussian_prediction))\n", "# print(\"Multinomial Accuracy: {:.2f}%, Prediction: {}\".format(multinomial_accuracy * 100, multinomial_prediction))\n", "# print(\"Bernoulli Accuracy: {:.2f}%, Prediction: {}\".format(bernoulli_accuracy * 100, bernoulli_prediction))\n", "# print(\"\\n\")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# @app.route('/predictnaive', methods=['GET', 'POST'])\n", "# def predictnaive():\n", "# if request.method == 'POST':\n", "# # Get the user's input values\n", "# salary = float(request.form['salary'])\n", "# experience = float(request.form['experience'])\n", "\n", "# # Load the data used to train and test the models\n", "# with open('Models/tts.pkl', 'rb') as f:\n", "# data = pickle.load(f)\n", " \n", "# # X_user = np.array([[salary, experience]])\n", "# # y_user = np.array([5])\n", "# # X_test_combined = np.concatenate((X_test, X_user))\n", "# # y_test_combined = np.concatenate((y_test, y_user))\n", "\n", "# X = np.vstack((data['X'], np.array([salary, experience])))\n", "# y = np.hstack((data['y'], 5)) # use a new label for the user's input\n", " \n", "# from sklearn.model_selection import train_test_split\n", "# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n", "\n", "# # Re-fit models on combined data\n", "# gaussian_nb.fit(X_train, y_train) \n", "# multinomial_nb.fit(X_train, y_train)\n", "# bernoulli_nb.fit(X_train, y_train)\n", "\n", "# # Use each Naive Bayes model to make a prediction based on the user's input values\n", "# gaussian_prediction = gaussian_nb.predict([[salary, experience]])[0]\n", "# multinomial_prediction = multinomial_nb.predict([[salary, experience]])[0]\n", "# bernoulli_prediction = bernoulli_nb.predict([[salary, experience]])[0]\n", "\n", "\n", "# # Calculate the accuracy of each Naive Bayes model\n", "# gaussian_accuracy = round(gaussian_nb.score(X_test, y_test), 3) * 100\n", "# multinomial_accuracy = round(multinomial_nb.score(X_test, y_test), 3) * 100\n", "# bernoulli_accuracy = round(bernoulli_nb.score(X_test, y_test), 3) * 100\n", "\n", "\n", "# gaussian_prediction = job_map.get(gaussian_prediction)\n", "# multinomial_prediction = job_map.get(multinomial_prediction)\n", "# bernoulli_prediction = job_map.get(bernoulli_prediction)\n", "\n", "# # Render the results template with the predicted job classification and accuracy scores\n", "# return render_template('naive.html', gaussian_prediction=gaussian_prediction, multinomial_prediction=multinomial_prediction, bernoulli_prediction=bernoulli_prediction, gaussian_accuracy=gaussian_accuracy, multinomial_accuracy=multinomial_accuracy, bernoulli_accuracy=bernoulli_accuracy, salary=salary, experience=experience, reset=True)\n", "# else:\n", "# # Render the job classification form\n", "# return render_template('naive.html')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }