{ "cells": [ { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import classification_report,accuracy_score,f1_score,confusion_matrix,precision_recall_fscore_support,recall_score\n", "from sklearn.preprocessing import StandardScaler\n", "import pandas\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "titanic dataset\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedsexagen_siblings_spousesparchfareclassdeckembark_townalone
00male35.0008.0500ThirdunknownSouthamptony
10male54.00051.8625FirstESouthamptony
21female58.00026.5500FirstCSouthamptony
31female55.00016.0000SecondunknownSouthamptony
41male34.00013.0000SecondDSouthamptony
\n", "
" ], "text/plain": [ " survived sex age n_siblings_spouses parch fare class \\\n", "0 0 male 35.0 0 0 8.0500 Third \n", "1 0 male 54.0 0 0 51.8625 First \n", "2 1 female 58.0 0 0 26.5500 First \n", "3 1 female 55.0 0 0 16.0000 Second \n", "4 1 male 34.0 0 0 13.0000 Second \n", "\n", " deck embark_town alone \n", "0 unknown Southampton y \n", "1 E Southampton y \n", "2 C Southampton y \n", "3 unknown Southampton y \n", "4 D Southampton y " ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eval = pd.read_csv('training.csv')\n", "train = pd.read_csv('eval.csv')\n", "print('titanic dataset')\n", "train.head()" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "training features\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexagen_siblings_spousesparchfareclassalone
0male35.0008.0500Thirdy
1male54.00051.8625Firsty
2female58.00026.5500Firsty
3female55.00016.0000Secondy
4male34.00013.0000Secondy
\n", "
" ], "text/plain": [ " sex age n_siblings_spouses parch fare class alone\n", "0 male 35.0 0 0 8.0500 Third y\n", "1 male 54.0 0 0 51.8625 First y\n", "2 female 58.0 0 0 26.5500 First y\n", "3 female 55.0 0 0 16.0000 Second y\n", "4 male 34.0 0 0 13.0000 Second y" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature_names = ['sex','age','n_siblings_spouses','parch','fare','class','alone']\n", "training_features = train[feature_names]\n", "outcome_feature = ['survived']\n", "outcome_label = train[outcome_feature]\n", "categorical_features = ['sex','n_siblings_spouses','parch','class','alone']\n", "numeric_features = ['age','fare']\n", "print('training features')\n", "training_features.head()" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fitted_training features:\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ ":3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " training_features[numeric_features] = ss.transform(training_features[numeric_features])\n", "/home/prince_tesla/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " self._setitem_single_column(loc, value[:, i].tolist(), pi)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexagen_siblings_spousesparchfareclassalone
0male0.44435300-0.543558Thirdy
1male1.788943000.711569Firsty
2female2.07201500-0.013576Firsty
3female1.85971100-0.315809Secondy
4male0.37358500-0.401752Secondy
........................
259female-0.26332601-0.029332Secondn
260male0.30281700-0.547976Thirdy
261female0.727424050.060192Thirdn
262male-0.12179000-0.401752Secondy
263male-0.192558000.085259Firsty
\n", "

264 rows × 7 columns

\n", "
" ], "text/plain": [ " sex age n_siblings_spouses parch fare class alone\n", "0 male 0.444353 0 0 -0.543558 Third y\n", "1 male 1.788943 0 0 0.711569 First y\n", "2 female 2.072015 0 0 -0.013576 First y\n", "3 female 1.859711 0 0 -0.315809 Second y\n", "4 male 0.373585 0 0 -0.401752 Second y\n", ".. ... ... ... ... ... ... ...\n", "259 female -0.263326 0 1 -0.029332 Second n\n", "260 male 0.302817 0 0 -0.547976 Third y\n", "261 female 0.727424 0 5 0.060192 Third n\n", "262 male -0.121790 0 0 -0.401752 Second y\n", "263 male -0.192558 0 0 0.085259 First y\n", "\n", "[264 rows x 7 columns]" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ss = StandardScaler()\n", "ss.fit(training_features[numeric_features])\n", "training_features[numeric_features] = ss.transform(training_features[numeric_features])\n", "print('fitted_training features:')\n", "training_features" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agefaresex_femalesex_malen_siblings_spouses_0n_siblings_spouses_1n_siblings_spouses_2n_siblings_spouses_3n_siblings_spouses_4n_siblings_spouses_5...parch_2parch_3parch_4parch_5parch_6class_Firstclass_Secondclass_Thirdalone_nalone_y
00.444353-0.54355801100000...0000000101
11.7889430.71156901100000...0000010001
22.072015-0.01357610100000...0000010001
31.859711-0.31580910100000...0000001001
40.373585-0.40175201100000...0000001001
\n", "

5 rows × 23 columns

\n", "
" ], "text/plain": [ " age fare sex_female sex_male n_siblings_spouses_0 \\\n", "0 0.444353 -0.543558 0 1 1 \n", "1 1.788943 0.711569 0 1 1 \n", "2 2.072015 -0.013576 1 0 1 \n", "3 1.859711 -0.315809 1 0 1 \n", "4 0.373585 -0.401752 0 1 1 \n", "\n", " n_siblings_spouses_1 n_siblings_spouses_2 n_siblings_spouses_3 \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " n_siblings_spouses_4 n_siblings_spouses_5 ... parch_2 parch_3 parch_4 \\\n", "0 0 0 ... 0 0 0 \n", "1 0 0 ... 0 0 0 \n", "2 0 0 ... 0 0 0 \n", "3 0 0 ... 0 0 0 \n", "4 0 0 ... 0 0 0 \n", "\n", " parch_5 parch_6 class_First class_Second class_Third alone_n alone_y \n", "0 0 0 0 0 1 0 1 \n", "1 0 0 1 0 0 0 1 \n", "2 0 0 1 0 0 0 1 \n", "3 0 0 0 1 0 0 1 \n", "4 0 0 0 1 0 0 1 \n", "\n", "[5 rows x 23 columns]" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_features = pd.get_dummies(training_features,columns=categorical_features)\n", "training_features.head()" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "engineering features:\n" ] }, { "data": { "text/plain": [ "['sex_female',\n", " 'n_siblings_spouses_8',\n", " 'n_siblings_spouses_1',\n", " 'parch_6',\n", " 'n_siblings_spouses_4',\n", " 'parch_0',\n", " 'parch_5',\n", " 'n_siblings_spouses_0',\n", " 'parch_3',\n", " 'sex_male',\n", " 'class_First',\n", " 'parch_2',\n", " 'alone_y',\n", " 'n_siblings_spouses_5',\n", " 'n_siblings_spouses_2',\n", " 'n_siblings_spouses_3',\n", " 'class_Second',\n", " 'parch_1',\n", " 'alone_n',\n", " 'class_Third',\n", " 'parch_4']" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "engineering_features = list(set(training_features.columns) - set(numeric_features))\n", "print('engineering features:')\n", "engineering_features" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LogisticRegression()" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr = LogisticRegression()\n", "model_lr = lr.fit(training_features,np.array(outcome_label['survived']))\n", "model_lr" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy score: 0.803030303030303\n", "classification report:\n", " precision recall f1-score support\n", "\n", " 0 0.81 0.89 0.85 165\n", " 1 0.78 0.66 0.71 99\n", "\n", " accuracy 0.80 264\n", " macro avg 0.80 0.77 0.78 264\n", "weighted avg 0.80 0.80 0.80 264\n", "\n", "confusion matrix:\n", " [[147 18]\n", " [ 34 65]]\n", "precison,recall,fscore,support ARRAYS:\n", " (array([0.8121547 , 0.78313253]), array([0.89090909, 0.65656566]), array([0.84971098, 0.71428571]), array([165, 99]))\n", "sensitivity score:\n", " 0.6565656565656566\n", "specificity score:\n", " 0.8909090909090909\n" ] }, { "data": { "text/plain": [ "264" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted_label = model_lr.predict(training_features)\n", "actual_label = np.array(outcome_label['survived'])\n", "print('accuracy score:',accuracy_score(actual_label,predicted_label))\n", "print('classification report:\\n',classification_report(actual_label,predicted_label))\n", "print('confusion matrix:\\n',confusion_matrix(actual_label,predicted_label))\n", "print('precison,recall,fscore,support ARRAYS:\\n',precision_recall_fscore_support(actual_label,predicted_label))\n", "def specificity(y_true , y_pred):\n", " w,x,y,z = precision_recall_fscore_support(actual_label,predicted_label)\n", " return(x[0])\n", "print('sensitivity score:\\n',recall_score(actual_label,predicted_label))\n", "print('specificity score:\\n',specificity(actual_label,predicted_label))\n", "len(actual_label)" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.98979592 0.9939759 ]\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVTElEQVR4nO3de5he87nw8e89mcSpIg51jEOKrcWLaqq0ipbtUFXpbrfSg1TjzetQ1dIDRdWppYjWW4c9rUMowaY2ulWlKVvtOm7UsV4RRSIEaYRGJTNzv3/MQ6cRM888mZlfnpXvx/W78jxrrfmtO6657uvOvX5rrchMJEmDr6V0AJK0tDIBS1IhJmBJKsQELEmFmIAlqZDWgT7BgpemucxCb7Pc2h8tHYKWQO3zZ8TiztGXnDN0tfcs9vkWhxWwJBUy4BWwJA2qzo7SEdTNBCypWjraS0dQNxOwpErJ7CwdQt1MwJKqpdMELEllWAFLUiFehJOkQpqoAnYdsKRKyY72ukdvIuLCiJgVEQ8vYt+REZERsVrte0TE2RExNSIejIite5vfBCypWjo76x+9uxjYfeGNEbEusCvwTLfNewAb18Z44LzeJjcBS6qW7Kx/9DZV5m3A7EXsOgv4NtD9tue9gUuyy53AiIhYq6f5TcCSqqWzo+4REeMj4t5uY3xv00fE3sCMzPzjQrvWAZ7t9n16bds78iKcpGrpw0W4zGwD2uo9PiKWB75LV/thsZmAJVXLwN6KvCEwCvhjRACMBO6LiG2AGcC63Y4dWdv2jkzAkqplAO+Ey8yHgNXf/B4RfwZGZ+ZLEXE98NWIuAL4EPBKZs7saT57wJIqJbOj7tGbiJgE3AFsEhHTI2JcD4ffCEwDpgI/Aw7pbX4rYEnV0o83YmTmfr3s36Db5wQO7cv8JmBJ1eLDeCSpkCa6FdkELKlaOhaUjqBuJmBJ1WILQpIKsQUhSYVYAUtSISZgSSojvQgnSYXYA5akQmxBSFIhVsCSVIgVsCQVYgUsSYW0D+gD2fuVCVhStVgBS1Ih9oAlqRArYEkqxApYkgqxApakQlwFIUmFZJaOoG4mYEnVYg9YkgoxAUtSIV6Ek6RCOjpKR1C3ltIBSFK/6uysf/QiIi6MiFkR8XC3badHxJ8i4sGIuDYiRnTbd3RETI2IxyNit97mNwFLqpZ+TMDAxcDuC22bDGyemVsA/w84GiAiNgX2BTar/cy5ETGkp8lNwJKqJTvrH71NlXkbMHuhbTdn5puLje8ERtY+7w1ckZlvZOZTwFRgm57mNwFLqpTszLpHRIyPiHu7jfF9PN1XgF/XPq8DPNtt3/TatnfkRThJ1dKHZWiZ2Qa0NXKaiDgGaAcua+TnwQQsqWoGYRVERHwZ+CSwc+Zbt97NANbtdtjI2rZ3ZAtCUrX070W4t4mI3YFvA5/KzHnddl0P7BsRy0TEKGBj4O6e5rICllQt/XgnXERMAnYCVouI6cDxdK16WAaYHBEAd2bmQZn5SERcBTxKV2vi0MzssRw3Affg2B9M4Lb/vptVVh7Bf/zi/Lftv/u+B/naUSewzlprArDLjh/m4K98YbHOOX/+fI4+6UweffwJRqw0nDNOPJp11lqDP9x9Hz8+/yIWLGhn6NBWjjx0HB/6wFaLdS4Nvp+1ncmen9iFWS++xFbv3xmALbfcjHN/eirLLLsM7e3tHHbYd7nn3gfKBtrM+vFhPJm53yI2X9DD8acAp9Q7vy2IHoz5xD9z/oSTezxm6y0355qJ53DNxHP6lHxnzHyBL3/122/b/stf3czwFd/Fr6+6kC99bgwTzr0QgJVHDOenp32fay89j1OOPZKjTzyjb38ZLREuueQq9vzkP/6enPqDYzjp5AmM/uCunHDCGZz6w2MKRVcRA9yC6E+9VsAR8V661re9uZxiBnB9Zj42kIEtCUZv9b+YMfOFhn72ht/8jsv+/ToWLGhni8024dgjD2XIkB7XZAPwu9/fwSHjvgjArjt9lB9MOI/M5H3/tNFbx2w0an3+9sYbzJ8/n2HDhjUUn8r4/e13sf76I/9hW2ay4vAVARi+0oo81+DvnGo6m+dxlD1WwBHxHeAKIOhqJt9d+zwpIo4a+PCWfH98+DH+ZewhHHTkcUyd9jQAT/75GW6a8l9cev6ZXDPxHFpaWvjVzbfUNd+sF19mzdVXA6C1dQjvWmF55rwy9x+OmXzr7Wy6yUYm34o44pvHc9oPj+WpJ+/hR6cexzHH/rB0SM2to6P+UVhvFfA4YLPMXNB9Y0RMAB4BTl3UD9UWM48HOPfMkzlw/0W1UZrfpptsyORrJrL88stx2x/u5mtHn8iNV17AXfc+wKN/msq+4w4H4I033mCVlUcA8LWjT2TGcy+woH0BM194kc+MPRSAL+6zN5/ec9dezzl12tNMOPdC2s6qu82kJdz/Gb8/R37r+1x77Y189rN78bN/O5Pd9ti3dFhNK5eA1kK9ekvAncDawNMLbV+rtm+Rui9uXvDStOb590AfvWuFFd76vMOHt+HkM8/hL3NeITP51B678I2DD3jbz5z9w+8BXT3gY045k4t/+qN/2L/6u1fl+Vkvsebq76a9vYPX/jqPESsNB+D5WS9y+HdP4gfHfZP1Rq49gH8zDab9v/SvfOOIrt+Lq6++gbbzTy8cUZOrSgsC+DowJSJ+HRFttXETMAU4fMCjW8K99PJs3lyD/dCjj9OZyYiVhrPt6K2YfOvtvPyXOQC8MvdVnnu+vr7ex7bflutu/C0AN9/6ez70gS2JCOa++hqHfOt4vn7QAWy9xWYD8vdRGc/NfIEdd9gOgI9/bHuemPpU4YiaXD8+C2Kg9VgBZ+ZNEfFPdD1QovtFuHt6W99WBd86/lTuuf9B5syZy85jvsgh475Ee+2Ff5/79J7cfMvtXHntfzKkdQjLDhvG6SccRUSw4aj1Oex/78/4rx9DZ3YytLWVY444hLXXXKPXc/7LJ3fj6JNOZ499vsJKw1fk9BO6Wu2TrrmBZ6c/x/kXXc75F10OQNuPT2HVWmtDzeEXl57Djjtsx2qrrcKfp93LCSeewUEHfYsJE06ktbWVN/72Nw4++O2rY9QHTVQBRw7wC+yq3IJQ45Zb+6OlQ9ASqH3+jFjcOf76vX3rzjkrnHjFYp9vcXgjhqRqWQJaC/UyAUuqliZqQZiAJVVKlZahSVJzsQKWpEJMwJJUyBJwi3G9TMCSKiWtgCWpEBOwJBXiKghJKsQKWJIKMQFLUhnZYQtCksqwApakMlyGJkmlmIAlqZDmaQH3+koiSWoq2d5Z9+hNRFwYEbMi4uFu21aJiMkR8UTtz5Vr2yMizo6IqRHxYERs3dv8JmBJ1dLZh9G7i4HdF9p2FDAlMzem6/2YR9W27wFsXBvjgfN6m9wELKlSsjPrHr3OlXkbMHuhzXsDE2ufJwJjum2/JLvcCYyIiLV6mt8ELKla+rcCXpQ1MnNm7fPzwJtv210HeLbbcdP5+8uMF8kELKlS+lIBR8T4iLi32xjfp3N1vdW44WUXroKQVC19qGwzsw1o6+MZXoiItTJzZq3FMKu2fQawbrfjRta2vSMrYEmVku31jwZdD4ytfR4LXNdt+/611RDbAq90a1UskhWwpErpz7fSR8QkYCdgtYiYDhwPnApcFRHjgKeBfWqH3wh8ApgKzAMO6G1+E7CkaunHBJyZ+73Drp0XcWwCh/ZlfhOwpErpzwp4oJmAJVWKCViSCsmOKB1C3UzAkirFCliSCslOK2BJKsIKWJIKybQClqQirIAlqZBOV0FIUhlehJOkQkzAklRINs9LkU3AkqrFCliSCnEZmiQV0uEqCEkqwwpYkgqxByxJhbgKQpIKsQKWpEI6OpvnZe8mYEmVYgtCkgrpdBWEJJXhMjRJKsQWRDejN//iQJ9CTeisNT5WOgRVVDO1IJrncqEk1aGjs6Xu0ZuI+EZEPBIRD0fEpIhYNiJGRcRdETE1Iq6MiGGNxmoCllQp2YfRk4hYB/gaMDozNweGAPsCpwFnZeZGwF+AcY3GagKWVCmdGXWPOrQCy0VEK7A8MBP4OHB1bf9EYEyjsZqAJVVKZtQ9ImJ8RNzbbYz/+zw5AzgDeIauxPsK8D/AnMxsrx02HVin0VhdBSGpUvryUuTMbAPaFrUvIlYG9gZGAXOAfwd2X9z4ujMBS6qUpN9WQewCPJWZLwJExC+BjwAjIqK1VgWPBGY0egJbEJIqpT2j7tGLZ4BtI2L5iAhgZ+BR4Bbgs7VjxgLXNRqrCVhSpSRR9+hxnsy76LrYdh/wEF35sg34DnBEREwFVgUuaDRWWxCSKqUvPeDeZObxwPELbZ4GbNMf85uAJVVKP/aAB5wJWFKl9GcFPNBMwJIqpcMKWJLKaKI3EpmAJVVLpxWwJJXRRI8DNgFLqhYvwklSIZ1hC0KSiugoHUAfmIAlVYqrICSpEFdBSFIhroKQpEJsQUhSIS5Dk6RCOqyAJakMK2BJKsQELEmF9P6qtyWHCVhSpVgBS1Ih3oosSYW4DliSCrEFIUmFmIAlqRCfBSFJhTRTD7ildACS1J86+jB6ExEjIuLqiPhTRDwWEdtFxCoRMTkinqj9uXKjsZqAJVVKJ1n3qMNPgJsy873AlsBjwFHAlMzcGJhS+94QE7CkSunsw+hJRKwE7ABcAJCZ8zNzDrA3MLF22ERgTKOxmoAlVUr2YUTE+Ii4t9sY322qUcCLwEURcX9E/DwiVgDWyMyZtWOeB9ZoNFYvwkmqlL4sQ8vMNqDtHXa3AlsDh2XmXRHxExZqN2RmRkTDCy+sgCVVSntk3aMX04HpmXlX7fvVdCXkFyJiLYDan7MajdUELKlS+tKC6HGezOeBZyNik9qmnYFHgeuBsbVtY4HrGo3VFoSkSunnO+EOAy6LiGHANOAAugrXqyJiHPA0sE+jk5uAJVVKncvL6pKZDwCjF7Fr5/6Y3wQsqVK8FVmSCvFhPJJUSEcT1cAmYEmVYgUsSYWkFbAklWEFLIYtM4yL/uNchg4bSmvrECb/6hbOO/2Ct/Z/5+RvMGa/Pdluw10KRqnBNmz48uz8owNZdZORZCZTvvkz1ttxCzb7/E68/vKrANxx2lU8fcsfC0favPpzGdpAMwEPkPlvzOfAzxzG6/Nep7V1CBdffz63T7mTh+57hE23fC/DV1qxdIgqYIfvf4mnb32QXx90Ni1Dh9C63DKst+MWPPDzm7j/324sHV4lNE/69VbkAfX6vNcBaB3aSmtrK2TS0tLCEd87lLNOOqdwdBpsw1ZcjrU/tAmPXnErAJ0LOpg/d17ZoCqonax7lGYFPIBaWlqYdPOFrDdqJFde9Eseuv9RPn/gPtz6m9t5adbLpcPTIBu+7rv52+xX2WXCeFZ733rMeujP3Hb8pQBsMfafee9ntmfWg09x+0mX8cYrJuZGNdNFuIYr4Ig4oId9bz1j8+V5LzR6iqbX2dnJ53b5Mru+fwybv/99bL3tVuy618eYdMHVpUNTAS2tQ3j35hvw0CVTuGKPY1kw7w0+cOhePHTpb7lk+yOYtNsx/HXWHLY/7gulQ21q/fVA9sGwOC2IE95pR2a2ZebozBy96vINP6u4Ml6d+xr3/Pd9fPAjW7PuqJHccOdV3HjPNSy73LLccMdVpcPTIHlt5mxemzmbFx54EoAnb7yb1TffgNdfmkt2JmTyyOW3sMZW7ykcaXPLPvxXWo8tiIh48J12sRhPgV8arLzqCNoXtPPq3NdYZtlhbLvDB7nonF+w8xZ7vXXMHU/+lr22a/hBSmoy8158hddmzmbEe9ZizrSZjPzIZsx+YgbLrz6CebPmALDh7qN5+fHpZQNtcktCZVuv3nrAawC7AX9ZaHsAfxiQiCpitdVX5eSzj6NlSAstLS3cfP0Ubpvs/7Kl3X8dN5Fd/+/BDBnaytxnZvHbI9vY4YT9WW2z9SGTudNf4pajLiwdZlPryPKVbb0iewg2Ii4ALsrM2xex7/LM/HxvJ9hyzQ83z/8NDZoDh/rPbL3dYc/+IhZ3js+v/+m6c87lT1+72OdbHD1WwJk5rod9vSZfSRpsS0Jvt14uQ5NUKVXqAUtSU/FWZEkqxBaEJBXSTKsgTMCSKsUWhCQV4kU4SSrEHrAkFWILQpIK6enu3iWND2SXVCkdZN2jHhExJCLuj4hf1b6Pioi7ImJqRFwZEcMajdUELKlSOsm6R50OBx7r9v004KzM3IiuB5W94yMbemMCllQpmVn36E1EjAT2BH5e+x7Ax4E336owERjTaKwmYEmV0pcKuPvbe2pj/ELT/Rj4Nn9f3bYqMCcz22vfpwPrNBqrF+EkVUpflqFlZhvQtqh9EfFJYFZm/k9E7NQvwS3EBCypUvrxVuSPAJ+KiE8AywLDgZ8AIyKitVYFjwRmNHoCWxCSKqW/LsJl5tGZOTIzNwD2BX6XmV8AbgE+WztsLHBdo7GagCVVygCsgljYd4AjImIqXT3hCxqdyBaEpEoZiBsxMvNW4Nba52nANv0xrwlYUqV4K7IkFeLDeCSpkI5sngdSmoAlVUozPYzHBCypUuwBS1Ih9oAlqZBOWxCSVIYVsCQV4ioISSrEFoQkFWILQpIKsQKWpEKsgCWpkI7sKB1C3UzAkirFW5ElqRBvRZakQqyAJakQV0FIUiGugpCkQrwVWZIKsQcsSYXYA5akQqyAJamQZloH3FI6AEnqT5lZ9+hJRKwbEbdExKMR8UhEHF7bvkpETI6IJ2p/rtxorCZgSZXSkZ11j160A0dm5qbAtsChEbEpcBQwJTM3BqbUvjfEBCypUjoz6x49ycyZmXlf7fOrwGPAOsDewMTaYROBMY3GagKWVCl9aUFExPiIuLfbGL+oOSNiA+D9wF3AGpk5s7breWCNRmP1IpykSunLnXCZ2Qa09XRMRLwLuAb4embOjYjuP58R0fBVPxOwpErpz2VoETGUruR7WWb+srb5hYhYKzNnRsRawKxG57cFIalS+qsHHF2l7gXAY5k5oduu64Gxtc9jgesajTWaadFys4uI8bV/8khv8fdiyRQR2wO/Bx4C3lwy8V26+sBXAesBTwP7ZObshs5hAh48EXFvZo4uHYeWLP5eLL1sQUhSISZgSSrEBDy47PNpUfy9WErZA5akQqyAJakQE7AkFWICHiQRsXtEPB4RUyOi4acnqToi4sKImBURD5eORWWYgAdBRAwBzgH2ADYF9qs91k5Lt4uB3UsHoXJMwINjG2BqZk7LzPnAFXQ90k5Lscy8DWjoDipVgwl4cKwDPNvt+/TaNklLMROwJBViAh4cM4B1u30fWdsmaSlmAh4c9wAbR8SoiBgG7EvXI+0kLcVMwIMgM9uBrwK/oeu9Uldl5iNlo1JpETEJuAPYJCKmR8S40jFpcHkrsiQVYgUsSYWYgCWpEBOwJBViApakQkzAklSICViSCjEBS1Ih/x8pSEVe8w2bqQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "dt = DecisionTreeClassifier()\n", "model_dt = dt.fit(training_features,np.array(outcome_label['survived']))\n", "actual_label_dt = np.array(outcome_label['survived'])\n", "predicted_label_dt = model_dt.predict(training_features)\n", "print(f1_score(actual_label_dt,predicted_label_dt,labels=(1,0),average=None))\n", "cm = confusion_matrix(actual_label,predicted_label)\n", "\n", "sns.heatmap(cm,annot=True)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy score: 0.9924242424242424\n", "classification report:\n", " precision recall f1-score support\n", "\n", " 0 0.99 1.00 0.99 165\n", " 1 1.00 0.98 0.99 99\n", "\n", " accuracy 0.99 264\n", " macro avg 0.99 0.99 0.99 264\n", "weighted avg 0.99 0.99 0.99 264\n", "\n", "confusion matrix:\n", " [[165 0]\n", " [ 2 97]]\n", "precison,recall,fscore,support ARRAYS:\n", " (array([0.98802395, 1. ]), array([1. , 0.97979798]), array([0.9939759 , 0.98979592]), array([165, 99]))\n", "sensitivity score:\n", " 0.9797979797979798\n", "specificity score:\n", " 1.0\n" ] }, { "data": { "text/plain": [ "264" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted_label = model_dt.predict(training_features)\n", "actual_label = np.array(outcome_label['survived'])\n", "print('accuracy score:',accuracy_score(actual_label,predicted_label))\n", "print('classification report:\\n',classification_report(actual_label,predicted_label))\n", "print('confusion matrix:\\n',confusion_matrix(actual_label,predicted_label))\n", "print('precison,recall,fscore,support ARRAYS:\\n',precision_recall_fscore_support(actual_label,predicted_label))\n", "def specificity(y_true , y_pred):\n", " w,x,y,z = precision_recall_fscore_support(actual_label,predicted_label)\n", " return(x[0])\n", "print('sensitivity score:\\n',recall_score(actual_label,predicted_label))\n", "print('specificity score:\\n',specificity(actual_label,predicted_label))\n", "len(actual_label)" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['model_dt.pickle']" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sklearn\n", "import joblib\n", "# save model[Logistic REgression] to be deployed on your server\n", "joblib.dump(model_lr, r'model_lr.pickle')\n", "joblib.dump(ss, r'scaler.pickle')\n", "joblib.dump(model_dt, r'model_dt.pickle')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "#LOADING.............\n", "model_lr = joblib.load(r'model_lr.pickle')\n", "model_dt = joblib.load(r'model_dt.pickle')\n", "scaler = joblib.load(r'scaler.pickle')" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexagen_siblings_spousesparchfareclassalone
0male22.0107.2500Thirdn
1female38.01071.2833Firstn
2female26.0007.9250Thirdy
3female35.01053.1000Firstn
4male28.0008.4583Thirdy
\n", "
" ], "text/plain": [ " sex age n_siblings_spouses parch fare class alone\n", "0 male 22.0 1 0 7.2500 Third n\n", "1 female 38.0 1 0 71.2833 First n\n", "2 female 26.0 0 0 7.9250 Third y\n", "3 female 35.0 1 0 53.1000 First n\n", "4 male 28.0 0 0 8.4583 Third y" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature_names = ['sex','age','n_siblings_spouses','parch','fare','class','alone']\n", "prediction_features = eval[feature_names]\n", "outcome_feature = ['survived']\n", "outcome_label = eval[outcome_feature]\n", "categorical_features = ['sex','n_siblings_spouses','parch','class','alone']\n", "numeric_features = ['age','fare']\n", "prediction_features.head()" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " prediction_features[numeric_features] = scaler.transform(prediction_features[numeric_features])\n", "/home/prince_tesla/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " self._setitem_single_column(loc, value[:, i].tolist(), pi)\n" ] } ], "source": [ "scaler.fit(prediction_features[numeric_features])\n", "prediction_features[numeric_features] = scaler.transform(prediction_features[numeric_features])\n", "prediction_features = pd.get_dummies(prediction_features,columns=categorical_features)\n", "c_engineering_features = list(set(prediction_features.columns)-set(numeric_features))" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "missing feature(s): ['parch_6']\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agefaresex_femalesex_malen_siblings_spouses_0n_siblings_spouses_1n_siblings_spouses_2n_siblings_spouses_3n_siblings_spouses_4n_siblings_spouses_5...parch_2parch_3parch_4parch_5class_Firstclass_Secondclass_Thirdalone_nalone_yparch_6
0-0.610415-0.49740301010000...0000001100
10.6693970.67635310010000...0000100100
2-0.290462-0.48503010100000...0000001010
30.4294320.34304610010000...0000100100
4-0.130485-0.47525401100000...0000001010
\n", "

5 rows × 23 columns

\n", "
" ], "text/plain": [ " age fare sex_female sex_male n_siblings_spouses_0 \\\n", "0 -0.610415 -0.497403 0 1 0 \n", "1 0.669397 0.676353 1 0 0 \n", "2 -0.290462 -0.485030 1 0 1 \n", "3 0.429432 0.343046 1 0 0 \n", "4 -0.130485 -0.475254 0 1 1 \n", "\n", " n_siblings_spouses_1 n_siblings_spouses_2 n_siblings_spouses_3 \\\n", "0 1 0 0 \n", "1 1 0 0 \n", "2 0 0 0 \n", "3 1 0 0 \n", "4 0 0 0 \n", "\n", " n_siblings_spouses_4 n_siblings_spouses_5 ... parch_2 parch_3 parch_4 \\\n", "0 0 0 ... 0 0 0 \n", "1 0 0 ... 0 0 0 \n", "2 0 0 ... 0 0 0 \n", "3 0 0 ... 0 0 0 \n", "4 0 0 ... 0 0 0 \n", "\n", " parch_5 class_First class_Second class_Third alone_n alone_y parch_6 \n", "0 0 0 0 1 1 0 0 \n", "1 0 1 0 0 1 0 0 \n", "2 0 0 0 1 0 1 0 \n", "3 0 1 0 0 1 0 0 \n", "4 0 0 0 1 0 1 0 \n", "\n", "[5 rows x 23 columns]" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#setting aside and making up for the whole categorical features from our first model\n", "c_engineering_features = set(prediction_features.columns) - set(numeric_features)\n", "missing_features = list(set(engineering_features) - c_engineering_features)\n", "for feature in missing_features:\n", " #add zeroes\n", " prediction_features[feature] = [0]*len(prediction_features)\n", "print('missing feature(s):',missing_features) \n", "prediction_features.head()" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedsexagen_siblings_spousesparchfareclassdeckembark_townalonesurvived[Logistic Regression]survived[Decision Tree]
00male22.0107.2500ThirdunknownSouthamptonn00
11female38.01071.2833FirstCCherbourgn00
21female26.0007.9250ThirdunknownSouthamptony11
31female35.01053.1000FirstCSouthamptonn00
40male28.0008.4583ThirdunknownQueenstowny00
\n", "
" ], "text/plain": [ " survived sex age n_siblings_spouses parch fare class deck \\\n", "0 0 male 22.0 1 0 7.2500 Third unknown \n", "1 1 female 38.0 1 0 71.2833 First C \n", "2 1 female 26.0 0 0 7.9250 Third unknown \n", "3 1 female 35.0 1 0 53.1000 First C \n", "4 0 male 28.0 0 0 8.4583 Third unknown \n", "\n", " embark_town alone survived[Logistic Regression] survived[Decision Tree] \n", "0 Southampton n 0 0 \n", "1 Cherbourg n 0 0 \n", "2 Southampton y 1 1 \n", "3 Southampton n 0 0 \n", "4 Queenstown y 0 0 " ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prediction_lr = model_lr.predict(prediction_features)\n", "prediction_dt = model_dt.predict(prediction_features)\n", "\n", "# Making a copy of the eval dataset\n", "eval_2 = eval.copy()\n", "eval_2['survived[Logistic Regression]'] = prediction_lr\n", "eval_2['survived[Decision Tree]'] = prediction_dt\n", "eval_2.head()" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.98979592 0.9939759 ]\n" ] } ], "source": [ "from sklearn.metrics import f1_score\n", "print(f1_score(actual_label,predicted_label,labels=(1,0),average= None))" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "survived 243\n", "dtype: int64" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outcome_label[outcome_label==1].count()" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [], "source": [ "def predict(sex, age, n_siblings_spouses, parch, fare, Class, alone):\n", " features = ['sex_female', 'n_siblings_spouses_8', 'n_siblings_spouses_1',\n", " 'parch_6', 'n_siblings_spouses_4', 'parch_0', 'parch_5', 'n_siblings_spouses_0', 'parch_3',\n", " 'sex_male', 'Class_First', 'parch_2', 'alone_y', 'n_siblings_spouses_5', 'n_siblings_spouses_2',\n", " 'n_siblings_spouses_3', 'Class_Second', 'parch_1', 'alone_n', 'Class_Third', 'parch_4']\n", " labels = ['sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'Class', 'alone']\n", " feature_names = [sex, age, n_siblings_spouses, parch, fare, Class, alone]\n", " features_df = pd.DataFrame([feature_names], columns=labels)\n", " categorical_features = ['sex', 'n_siblings_spouses', 'parch', 'Class', 'alone']\n", " numeric_features = ['age', 'fare']\n", " features_df[numeric_features] = scaler.transform(features_df[numeric_features])\n", " features_df = pd.get_dummies(features_df,columns=categorical_features)\n", " #setting aside and making up for the whole categorical features from our first model\n", " c_engineering_features = set(features_df.columns) - set(numeric_features)\n", " missing_features = list(set(features) - c_engineering_features)\n", " for feature in missing_features:\n", " #add zeroes\n", " features_df[feature] = [0]*len(features_df)\n", " result = model_lr.predict(features_df)\n", " print(features_df)\n", " return result\n", " " ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " age fare sex_male n_siblings_spouses_1 parch_5 Class_First \\\n", "0 -0.770391 0.320363 1 1 1 1 \n", "\n", " alone_n sex_female n_siblings_spouses_8 alone_y ... \\\n", "0 1 0 0 0 ... \n", "\n", " n_siblings_spouses_2 Class_Second n_siblings_spouses_3 \\\n", "0 0 0 0 \n", "\n", " n_siblings_spouses_0 parch_1 parch_3 n_siblings_spouses_4 Class_Third \\\n", "0 0 0 0 0 0 \n", "\n", " parch_2 parch_4 \n", "0 0 0 \n", "\n", "[1 rows x 23 columns]\n" ] }, { "data": { "text/plain": [ "array([1])" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict('male', 20, 1, 5, 51.86255, 'First', 'n')" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexagen_siblings_spousesparchfareClassalone
0male-1.410297341.202742Firsty
\n", "
" ], "text/plain": [ " sex age n_siblings_spouses parch fare Class alone\n", "0 male -1.410297 3 4 1.202742 First y" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b = ['male',12, 3,4,100,'First','y']\n", "a = ['sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'Class', 'alone']\n", "c = pd.DataFrame([b], columns=a)\n", "x = ['age', 'fare']\n", "c[x] = scaler.transform(c[x])\n", "c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }