{
"cells": [
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import classification_report,accuracy_score,f1_score,confusion_matrix,precision_recall_fscore_support,recall_score\n",
"from sklearn.preprocessing import StandardScaler\n",
"import pandas\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"titanic dataset\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" survived \n",
" sex \n",
" age \n",
" n_siblings_spouses \n",
" parch \n",
" fare \n",
" class \n",
" deck \n",
" embark_town \n",
" alone \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" male \n",
" 35.0 \n",
" 0 \n",
" 0 \n",
" 8.0500 \n",
" Third \n",
" unknown \n",
" Southampton \n",
" y \n",
" \n",
" \n",
" 1 \n",
" 0 \n",
" male \n",
" 54.0 \n",
" 0 \n",
" 0 \n",
" 51.8625 \n",
" First \n",
" E \n",
" Southampton \n",
" y \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" female \n",
" 58.0 \n",
" 0 \n",
" 0 \n",
" 26.5500 \n",
" First \n",
" C \n",
" Southampton \n",
" y \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" female \n",
" 55.0 \n",
" 0 \n",
" 0 \n",
" 16.0000 \n",
" Second \n",
" unknown \n",
" Southampton \n",
" y \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" male \n",
" 34.0 \n",
" 0 \n",
" 0 \n",
" 13.0000 \n",
" Second \n",
" D \n",
" Southampton \n",
" y \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survived sex age n_siblings_spouses parch fare class \\\n",
"0 0 male 35.0 0 0 8.0500 Third \n",
"1 0 male 54.0 0 0 51.8625 First \n",
"2 1 female 58.0 0 0 26.5500 First \n",
"3 1 female 55.0 0 0 16.0000 Second \n",
"4 1 male 34.0 0 0 13.0000 Second \n",
"\n",
" deck embark_town alone \n",
"0 unknown Southampton y \n",
"1 E Southampton y \n",
"2 C Southampton y \n",
"3 unknown Southampton y \n",
"4 D Southampton y "
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eval = pd.read_csv('training.csv')\n",
"train = pd.read_csv('eval.csv')\n",
"print('titanic dataset')\n",
"train.head()"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"training features\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" sex \n",
" age \n",
" n_siblings_spouses \n",
" parch \n",
" fare \n",
" class \n",
" alone \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" male \n",
" 35.0 \n",
" 0 \n",
" 0 \n",
" 8.0500 \n",
" Third \n",
" y \n",
" \n",
" \n",
" 1 \n",
" male \n",
" 54.0 \n",
" 0 \n",
" 0 \n",
" 51.8625 \n",
" First \n",
" y \n",
" \n",
" \n",
" 2 \n",
" female \n",
" 58.0 \n",
" 0 \n",
" 0 \n",
" 26.5500 \n",
" First \n",
" y \n",
" \n",
" \n",
" 3 \n",
" female \n",
" 55.0 \n",
" 0 \n",
" 0 \n",
" 16.0000 \n",
" Second \n",
" y \n",
" \n",
" \n",
" 4 \n",
" male \n",
" 34.0 \n",
" 0 \n",
" 0 \n",
" 13.0000 \n",
" Second \n",
" y \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sex age n_siblings_spouses parch fare class alone\n",
"0 male 35.0 0 0 8.0500 Third y\n",
"1 male 54.0 0 0 51.8625 First y\n",
"2 female 58.0 0 0 26.5500 First y\n",
"3 female 55.0 0 0 16.0000 Second y\n",
"4 male 34.0 0 0 13.0000 Second y"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_names = ['sex','age','n_siblings_spouses','parch','fare','class','alone']\n",
"training_features = train[feature_names]\n",
"outcome_feature = ['survived']\n",
"outcome_label = train[outcome_feature]\n",
"categorical_features = ['sex','n_siblings_spouses','parch','class','alone']\n",
"numeric_features = ['age','fare']\n",
"print('training features')\n",
"training_features.head()"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fitted_training features:\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
":3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" training_features[numeric_features] = ss.transform(training_features[numeric_features])\n",
"/home/prince_tesla/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" self._setitem_single_column(loc, value[:, i].tolist(), pi)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" sex \n",
" age \n",
" n_siblings_spouses \n",
" parch \n",
" fare \n",
" class \n",
" alone \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" male \n",
" 0.444353 \n",
" 0 \n",
" 0 \n",
" -0.543558 \n",
" Third \n",
" y \n",
" \n",
" \n",
" 1 \n",
" male \n",
" 1.788943 \n",
" 0 \n",
" 0 \n",
" 0.711569 \n",
" First \n",
" y \n",
" \n",
" \n",
" 2 \n",
" female \n",
" 2.072015 \n",
" 0 \n",
" 0 \n",
" -0.013576 \n",
" First \n",
" y \n",
" \n",
" \n",
" 3 \n",
" female \n",
" 1.859711 \n",
" 0 \n",
" 0 \n",
" -0.315809 \n",
" Second \n",
" y \n",
" \n",
" \n",
" 4 \n",
" male \n",
" 0.373585 \n",
" 0 \n",
" 0 \n",
" -0.401752 \n",
" Second \n",
" y \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 259 \n",
" female \n",
" -0.263326 \n",
" 0 \n",
" 1 \n",
" -0.029332 \n",
" Second \n",
" n \n",
" \n",
" \n",
" 260 \n",
" male \n",
" 0.302817 \n",
" 0 \n",
" 0 \n",
" -0.547976 \n",
" Third \n",
" y \n",
" \n",
" \n",
" 261 \n",
" female \n",
" 0.727424 \n",
" 0 \n",
" 5 \n",
" 0.060192 \n",
" Third \n",
" n \n",
" \n",
" \n",
" 262 \n",
" male \n",
" -0.121790 \n",
" 0 \n",
" 0 \n",
" -0.401752 \n",
" Second \n",
" y \n",
" \n",
" \n",
" 263 \n",
" male \n",
" -0.192558 \n",
" 0 \n",
" 0 \n",
" 0.085259 \n",
" First \n",
" y \n",
" \n",
" \n",
"
\n",
"
264 rows × 7 columns
\n",
"
"
],
"text/plain": [
" sex age n_siblings_spouses parch fare class alone\n",
"0 male 0.444353 0 0 -0.543558 Third y\n",
"1 male 1.788943 0 0 0.711569 First y\n",
"2 female 2.072015 0 0 -0.013576 First y\n",
"3 female 1.859711 0 0 -0.315809 Second y\n",
"4 male 0.373585 0 0 -0.401752 Second y\n",
".. ... ... ... ... ... ... ...\n",
"259 female -0.263326 0 1 -0.029332 Second n\n",
"260 male 0.302817 0 0 -0.547976 Third y\n",
"261 female 0.727424 0 5 0.060192 Third n\n",
"262 male -0.121790 0 0 -0.401752 Second y\n",
"263 male -0.192558 0 0 0.085259 First y\n",
"\n",
"[264 rows x 7 columns]"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ss = StandardScaler()\n",
"ss.fit(training_features[numeric_features])\n",
"training_features[numeric_features] = ss.transform(training_features[numeric_features])\n",
"print('fitted_training features:')\n",
"training_features"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age \n",
" fare \n",
" sex_female \n",
" sex_male \n",
" n_siblings_spouses_0 \n",
" n_siblings_spouses_1 \n",
" n_siblings_spouses_2 \n",
" n_siblings_spouses_3 \n",
" n_siblings_spouses_4 \n",
" n_siblings_spouses_5 \n",
" ... \n",
" parch_2 \n",
" parch_3 \n",
" parch_4 \n",
" parch_5 \n",
" parch_6 \n",
" class_First \n",
" class_Second \n",
" class_Third \n",
" alone_n \n",
" alone_y \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0.444353 \n",
" -0.543558 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" 1.788943 \n",
" 0.711569 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" \n",
" \n",
" 2 \n",
" 2.072015 \n",
" -0.013576 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" \n",
" \n",
" 3 \n",
" 1.859711 \n",
" -0.315809 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" \n",
" \n",
" 4 \n",
" 0.373585 \n",
" -0.401752 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
5 rows × 23 columns
\n",
"
"
],
"text/plain": [
" age fare sex_female sex_male n_siblings_spouses_0 \\\n",
"0 0.444353 -0.543558 0 1 1 \n",
"1 1.788943 0.711569 0 1 1 \n",
"2 2.072015 -0.013576 1 0 1 \n",
"3 1.859711 -0.315809 1 0 1 \n",
"4 0.373585 -0.401752 0 1 1 \n",
"\n",
" n_siblings_spouses_1 n_siblings_spouses_2 n_siblings_spouses_3 \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" n_siblings_spouses_4 n_siblings_spouses_5 ... parch_2 parch_3 parch_4 \\\n",
"0 0 0 ... 0 0 0 \n",
"1 0 0 ... 0 0 0 \n",
"2 0 0 ... 0 0 0 \n",
"3 0 0 ... 0 0 0 \n",
"4 0 0 ... 0 0 0 \n",
"\n",
" parch_5 parch_6 class_First class_Second class_Third alone_n alone_y \n",
"0 0 0 0 0 1 0 1 \n",
"1 0 0 1 0 0 0 1 \n",
"2 0 0 1 0 0 0 1 \n",
"3 0 0 0 1 0 0 1 \n",
"4 0 0 0 1 0 0 1 \n",
"\n",
"[5 rows x 23 columns]"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_features = pd.get_dummies(training_features,columns=categorical_features)\n",
"training_features.head()"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"engineering features:\n"
]
},
{
"data": {
"text/plain": [
"['sex_female',\n",
" 'n_siblings_spouses_8',\n",
" 'n_siblings_spouses_1',\n",
" 'parch_6',\n",
" 'n_siblings_spouses_4',\n",
" 'parch_0',\n",
" 'parch_5',\n",
" 'n_siblings_spouses_0',\n",
" 'parch_3',\n",
" 'sex_male',\n",
" 'class_First',\n",
" 'parch_2',\n",
" 'alone_y',\n",
" 'n_siblings_spouses_5',\n",
" 'n_siblings_spouses_2',\n",
" 'n_siblings_spouses_3',\n",
" 'class_Second',\n",
" 'parch_1',\n",
" 'alone_n',\n",
" 'class_Third',\n",
" 'parch_4']"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"engineering_features = list(set(training_features.columns) - set(numeric_features))\n",
"print('engineering features:')\n",
"engineering_features"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LogisticRegression()"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr = LogisticRegression()\n",
"model_lr = lr.fit(training_features,np.array(outcome_label['survived']))\n",
"model_lr"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy score: 0.803030303030303\n",
"classification report:\n",
" precision recall f1-score support\n",
"\n",
" 0 0.81 0.89 0.85 165\n",
" 1 0.78 0.66 0.71 99\n",
"\n",
" accuracy 0.80 264\n",
" macro avg 0.80 0.77 0.78 264\n",
"weighted avg 0.80 0.80 0.80 264\n",
"\n",
"confusion matrix:\n",
" [[147 18]\n",
" [ 34 65]]\n",
"precison,recall,fscore,support ARRAYS:\n",
" (array([0.8121547 , 0.78313253]), array([0.89090909, 0.65656566]), array([0.84971098, 0.71428571]), array([165, 99]))\n",
"sensitivity score:\n",
" 0.6565656565656566\n",
"specificity score:\n",
" 0.8909090909090909\n"
]
},
{
"data": {
"text/plain": [
"264"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predicted_label = model_lr.predict(training_features)\n",
"actual_label = np.array(outcome_label['survived'])\n",
"print('accuracy score:',accuracy_score(actual_label,predicted_label))\n",
"print('classification report:\\n',classification_report(actual_label,predicted_label))\n",
"print('confusion matrix:\\n',confusion_matrix(actual_label,predicted_label))\n",
"print('precison,recall,fscore,support ARRAYS:\\n',precision_recall_fscore_support(actual_label,predicted_label))\n",
"def specificity(y_true , y_pred):\n",
" w,x,y,z = precision_recall_fscore_support(actual_label,predicted_label)\n",
" return(x[0])\n",
"print('sensitivity score:\\n',recall_score(actual_label,predicted_label))\n",
"print('specificity score:\\n',specificity(actual_label,predicted_label))\n",
"len(actual_label)"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.98979592 0.9939759 ]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVTElEQVR4nO3de5he87nw8e89mcSpIg51jEOKrcWLaqq0ipbtUFXpbrfSg1TjzetQ1dIDRdWppYjWW4c9rUMowaY2ulWlKVvtOm7UsV4RRSIEaYRGJTNzv3/MQ6cRM888mZlfnpXvx/W78jxrrfmtO6657uvOvX5rrchMJEmDr6V0AJK0tDIBS1IhJmBJKsQELEmFmIAlqZDWgT7BgpemucxCb7Pc2h8tHYKWQO3zZ8TiztGXnDN0tfcs9vkWhxWwJBUy4BWwJA2qzo7SEdTNBCypWjraS0dQNxOwpErJ7CwdQt1MwJKqpdMELEllWAFLUiFehJOkQpqoAnYdsKRKyY72ukdvIuLCiJgVEQ8vYt+REZERsVrte0TE2RExNSIejIite5vfBCypWjo76x+9uxjYfeGNEbEusCvwTLfNewAb18Z44LzeJjcBS6qW7Kx/9DZV5m3A7EXsOgv4NtD9tue9gUuyy53AiIhYq6f5TcCSqqWzo+4REeMj4t5uY3xv00fE3sCMzPzjQrvWAZ7t9n16bds78iKcpGrpw0W4zGwD2uo9PiKWB75LV/thsZmAJVXLwN6KvCEwCvhjRACMBO6LiG2AGcC63Y4dWdv2jkzAkqplAO+Ey8yHgNXf/B4RfwZGZ+ZLEXE98NWIuAL4EPBKZs7saT57wJIqJbOj7tGbiJgE3AFsEhHTI2JcD4ffCEwDpgI/Aw7pbX4rYEnV0o83YmTmfr3s36Db5wQO7cv8JmBJ1eLDeCSpkCa6FdkELKlaOhaUjqBuJmBJ1WILQpIKsQUhSYVYAUtSISZgSSojvQgnSYXYA5akQmxBSFIhVsCSVIgVsCQVYgUsSYW0D+gD2fuVCVhStVgBS1Ih9oAlqRArYEkqxApYkgqxApakQlwFIUmFZJaOoG4mYEnVYg9YkgoxAUtSIV6Ek6RCOjpKR1C3ltIBSFK/6uysf/QiIi6MiFkR8XC3badHxJ8i4sGIuDYiRnTbd3RETI2IxyNit97mNwFLqpZ+TMDAxcDuC22bDGyemVsA/w84GiAiNgX2BTar/cy5ETGkp8lNwJKqJTvrH71NlXkbMHuhbTdn5puLje8ERtY+7w1ckZlvZOZTwFRgm57mNwFLqpTszLpHRIyPiHu7jfF9PN1XgF/XPq8DPNtt3/TatnfkRThJ1dKHZWiZ2Qa0NXKaiDgGaAcua+TnwQQsqWoGYRVERHwZ+CSwc+Zbt97NANbtdtjI2rZ3ZAtCUrX070W4t4mI3YFvA5/KzHnddl0P7BsRy0TEKGBj4O6e5rICllQt/XgnXERMAnYCVouI6cDxdK16WAaYHBEAd2bmQZn5SERcBTxKV2vi0MzssRw3Affg2B9M4Lb/vptVVh7Bf/zi/Lftv/u+B/naUSewzlprArDLjh/m4K98YbHOOX/+fI4+6UweffwJRqw0nDNOPJp11lqDP9x9Hz8+/yIWLGhn6NBWjjx0HB/6wFaLdS4Nvp+1ncmen9iFWS++xFbv3xmALbfcjHN/eirLLLsM7e3tHHbYd7nn3gfKBtrM+vFhPJm53yI2X9DD8acAp9Q7vy2IHoz5xD9z/oSTezxm6y0355qJ53DNxHP6lHxnzHyBL3/122/b/stf3czwFd/Fr6+6kC99bgwTzr0QgJVHDOenp32fay89j1OOPZKjTzyjb38ZLREuueQq9vzkP/6enPqDYzjp5AmM/uCunHDCGZz6w2MKRVcRA9yC6E+9VsAR8V661re9uZxiBnB9Zj42kIEtCUZv9b+YMfOFhn72ht/8jsv+/ToWLGhni8024dgjD2XIkB7XZAPwu9/fwSHjvgjArjt9lB9MOI/M5H3/tNFbx2w0an3+9sYbzJ8/n2HDhjUUn8r4/e13sf76I/9hW2ay4vAVARi+0oo81+DvnGo6m+dxlD1WwBHxHeAKIOhqJt9d+zwpIo4a+PCWfH98+DH+ZewhHHTkcUyd9jQAT/75GW6a8l9cev6ZXDPxHFpaWvjVzbfUNd+sF19mzdVXA6C1dQjvWmF55rwy9x+OmXzr7Wy6yUYm34o44pvHc9oPj+WpJ+/hR6cexzHH/rB0SM2to6P+UVhvFfA4YLPMXNB9Y0RMAB4BTl3UD9UWM48HOPfMkzlw/0W1UZrfpptsyORrJrL88stx2x/u5mtHn8iNV17AXfc+wKN/msq+4w4H4I033mCVlUcA8LWjT2TGcy+woH0BM194kc+MPRSAL+6zN5/ec9dezzl12tNMOPdC2s6qu82kJdz/Gb8/R37r+1x77Y189rN78bN/O5Pd9ti3dFhNK5eA1kK9ekvAncDawNMLbV+rtm+Rui9uXvDStOb590AfvWuFFd76vMOHt+HkM8/hL3NeITP51B678I2DD3jbz5z9w+8BXT3gY045k4t/+qN/2L/6u1fl+Vkvsebq76a9vYPX/jqPESsNB+D5WS9y+HdP4gfHfZP1Rq49gH8zDab9v/SvfOOIrt+Lq6++gbbzTy8cUZOrSgsC+DowJSJ+HRFttXETMAU4fMCjW8K99PJs3lyD/dCjj9OZyYiVhrPt6K2YfOvtvPyXOQC8MvdVnnu+vr7ex7bflutu/C0AN9/6ez70gS2JCOa++hqHfOt4vn7QAWy9xWYD8vdRGc/NfIEdd9gOgI9/bHuemPpU4YiaXD8+C2Kg9VgBZ+ZNEfFPdD1QovtFuHt6W99WBd86/lTuuf9B5syZy85jvsgh475Ee+2Ff5/79J7cfMvtXHntfzKkdQjLDhvG6SccRUSw4aj1Oex/78/4rx9DZ3YytLWVY444hLXXXKPXc/7LJ3fj6JNOZ499vsJKw1fk9BO6Wu2TrrmBZ6c/x/kXXc75F10OQNuPT2HVWmtDzeEXl57Djjtsx2qrrcKfp93LCSeewUEHfYsJE06ktbWVN/72Nw4++O2rY9QHTVQBRw7wC+yq3IJQ45Zb+6OlQ9ASqH3+jFjcOf76vX3rzjkrnHjFYp9vcXgjhqRqWQJaC/UyAUuqliZqQZiAJVVKlZahSVJzsQKWpEJMwJJUyBJwi3G9TMCSKiWtgCWpEBOwJBXiKghJKsQKWJIKMQFLUhnZYQtCksqwApakMlyGJkmlmIAlqZDmaQH3+koiSWoq2d5Z9+hNRFwYEbMi4uFu21aJiMkR8UTtz5Vr2yMizo6IqRHxYERs3dv8JmBJ1dLZh9G7i4HdF9p2FDAlMzem6/2YR9W27wFsXBvjgfN6m9wELKlSsjPrHr3OlXkbMHuhzXsDE2ufJwJjum2/JLvcCYyIiLV6mt8ELKla+rcCXpQ1MnNm7fPzwJtv210HeLbbcdP5+8uMF8kELKlS+lIBR8T4iLi32xjfp3N1vdW44WUXroKQVC19qGwzsw1o6+MZXoiItTJzZq3FMKu2fQawbrfjRta2vSMrYEmVku31jwZdD4ytfR4LXNdt+/611RDbAq90a1UskhWwpErpz7fSR8QkYCdgtYiYDhwPnApcFRHjgKeBfWqH3wh8ApgKzAMO6G1+E7CkaunHBJyZ+73Drp0XcWwCh/ZlfhOwpErpzwp4oJmAJVWKCViSCsmOKB1C3UzAkirFCliSCslOK2BJKsIKWJIKybQClqQirIAlqZBOV0FIUhlehJOkQkzAklRINs9LkU3AkqrFCliSCnEZmiQV0uEqCEkqwwpYkgqxByxJhbgKQpIKsQKWpEI6OpvnZe8mYEmVYgtCkgrpdBWEJJXhMjRJKsQWRDejN//iQJ9CTeisNT5WOgRVVDO1IJrncqEk1aGjs6Xu0ZuI+EZEPBIRD0fEpIhYNiJGRcRdETE1Iq6MiGGNxmoCllQp2YfRk4hYB/gaMDozNweGAPsCpwFnZeZGwF+AcY3GagKWVCmdGXWPOrQCy0VEK7A8MBP4OHB1bf9EYEyjsZqAJVVKZtQ9ImJ8RNzbbYz/+zw5AzgDeIauxPsK8D/AnMxsrx02HVin0VhdBSGpUvryUuTMbAPaFrUvIlYG9gZGAXOAfwd2X9z4ujMBS6qUpN9WQewCPJWZLwJExC+BjwAjIqK1VgWPBGY0egJbEJIqpT2j7tGLZ4BtI2L5iAhgZ+BR4Bbgs7VjxgLXNRqrCVhSpSRR9+hxnsy76LrYdh/wEF35sg34DnBEREwFVgUuaDRWWxCSKqUvPeDeZObxwPELbZ4GbNMf85uAJVVKP/aAB5wJWFKl9GcFPNBMwJIqpcMKWJLKaKI3EpmAJVVLpxWwJJXRRI8DNgFLqhYvwklSIZ1hC0KSiugoHUAfmIAlVYqrICSpEFdBSFIhroKQpEJsQUhSIS5Dk6RCOqyAJakMK2BJKsQELEmF9P6qtyWHCVhSpVgBS1Ih3oosSYW4DliSCrEFIUmFmIAlqRCfBSFJhTRTD7ildACS1J86+jB6ExEjIuLqiPhTRDwWEdtFxCoRMTkinqj9uXKjsZqAJVVKJ1n3qMNPgJsy873AlsBjwFHAlMzcGJhS+94QE7CkSunsw+hJRKwE7ABcAJCZ8zNzDrA3MLF22ERgTKOxmoAlVUr2YUTE+Ii4t9sY322qUcCLwEURcX9E/DwiVgDWyMyZtWOeB9ZoNFYvwkmqlL4sQ8vMNqDtHXa3AlsDh2XmXRHxExZqN2RmRkTDCy+sgCVVSntk3aMX04HpmXlX7fvVdCXkFyJiLYDan7MajdUELKlS+tKC6HGezOeBZyNik9qmnYFHgeuBsbVtY4HrGo3VFoSkSunnO+EOAy6LiGHANOAAugrXqyJiHPA0sE+jk5uAJVVKncvL6pKZDwCjF7Fr5/6Y3wQsqVK8FVmSCvFhPJJUSEcT1cAmYEmVYgUsSYWkFbAklWEFLIYtM4yL/uNchg4bSmvrECb/6hbOO/2Ct/Z/5+RvMGa/Pdluw10KRqnBNmz48uz8owNZdZORZCZTvvkz1ttxCzb7/E68/vKrANxx2lU8fcsfC0favPpzGdpAMwEPkPlvzOfAzxzG6/Nep7V1CBdffz63T7mTh+57hE23fC/DV1qxdIgqYIfvf4mnb32QXx90Ni1Dh9C63DKst+MWPPDzm7j/324sHV4lNE/69VbkAfX6vNcBaB3aSmtrK2TS0tLCEd87lLNOOqdwdBpsw1ZcjrU/tAmPXnErAJ0LOpg/d17ZoCqonax7lGYFPIBaWlqYdPOFrDdqJFde9Eseuv9RPn/gPtz6m9t5adbLpcPTIBu+7rv52+xX2WXCeFZ733rMeujP3Hb8pQBsMfafee9ntmfWg09x+0mX8cYrJuZGNdNFuIYr4Ig4oId9bz1j8+V5LzR6iqbX2dnJ53b5Mru+fwybv/99bL3tVuy618eYdMHVpUNTAS2tQ3j35hvw0CVTuGKPY1kw7w0+cOhePHTpb7lk+yOYtNsx/HXWHLY/7gulQ21q/fVA9sGwOC2IE95pR2a2ZebozBy96vINP6u4Ml6d+xr3/Pd9fPAjW7PuqJHccOdV3HjPNSy73LLccMdVpcPTIHlt5mxemzmbFx54EoAnb7yb1TffgNdfmkt2JmTyyOW3sMZW7ykcaXPLPvxXWo8tiIh48J12sRhPgV8arLzqCNoXtPPq3NdYZtlhbLvDB7nonF+w8xZ7vXXMHU/+lr22a/hBSmoy8158hddmzmbEe9ZizrSZjPzIZsx+YgbLrz6CebPmALDh7qN5+fHpZQNtcktCZVuv3nrAawC7AX9ZaHsAfxiQiCpitdVX5eSzj6NlSAstLS3cfP0Ubpvs/7Kl3X8dN5Fd/+/BDBnaytxnZvHbI9vY4YT9WW2z9SGTudNf4pajLiwdZlPryPKVbb0iewg2Ii4ALsrM2xex7/LM/HxvJ9hyzQ83z/8NDZoDh/rPbL3dYc/+IhZ3js+v/+m6c87lT1+72OdbHD1WwJk5rod9vSZfSRpsS0Jvt14uQ5NUKVXqAUtSU/FWZEkqxBaEJBXSTKsgTMCSKsUWhCQV4kU4SSrEHrAkFWILQpIK6enu3iWND2SXVCkdZN2jHhExJCLuj4hf1b6Pioi7ImJqRFwZEcMajdUELKlSOsm6R50OBx7r9v004KzM3IiuB5W94yMbemMCllQpmVn36E1EjAT2BH5e+x7Ax4E336owERjTaKwmYEmV0pcKuPvbe2pj/ELT/Rj4Nn9f3bYqMCcz22vfpwPrNBqrF+EkVUpflqFlZhvQtqh9EfFJYFZm/k9E7NQvwS3EBCypUvrxVuSPAJ+KiE8AywLDgZ8AIyKitVYFjwRmNHoCWxCSKqW/LsJl5tGZOTIzNwD2BX6XmV8AbgE+WztsLHBdo7GagCVVygCsgljYd4AjImIqXT3hCxqdyBaEpEoZiBsxMvNW4Nba52nANv0xrwlYUqV4K7IkFeLDeCSpkI5sngdSmoAlVUozPYzHBCypUuwBS1Ih9oAlqZBOWxCSVIYVsCQV4ioISSrEFoQkFWILQpIKsQKWpEKsgCWpkI7sKB1C3UzAkirFW5ElqRBvRZakQqyAJakQV0FIUiGugpCkQrwVWZIKsQcsSYXYA5akQqyAJamQZloH3FI6AEnqT5lZ9+hJRKwbEbdExKMR8UhEHF7bvkpETI6IJ2p/rtxorCZgSZXSkZ11j160A0dm5qbAtsChEbEpcBQwJTM3BqbUvjfEBCypUjoz6x49ycyZmXlf7fOrwGPAOsDewMTaYROBMY3GagKWVCl9aUFExPiIuLfbGL+oOSNiA+D9wF3AGpk5s7breWCNRmP1IpykSunLnXCZ2Qa09XRMRLwLuAb4embOjYjuP58R0fBVPxOwpErpz2VoETGUruR7WWb+srb5hYhYKzNnRsRawKxG57cFIalS+qsHHF2l7gXAY5k5oduu64Gxtc9jgesajTWaadFys4uI8bV/8khv8fdiyRQR2wO/Bx4C3lwy8V26+sBXAesBTwP7ZObshs5hAh48EXFvZo4uHYeWLP5eLL1sQUhSISZgSSrEBDy47PNpUfy9WErZA5akQqyAJakQE7AkFWICHiQRsXtEPB4RUyOi4acnqToi4sKImBURD5eORWWYgAdBRAwBzgH2ADYF9qs91k5Lt4uB3UsHoXJMwINjG2BqZk7LzPnAFXQ90k5Lscy8DWjoDipVgwl4cKwDPNvt+/TaNklLMROwJBViAh4cM4B1u30fWdsmaSlmAh4c9wAbR8SoiBgG7EvXI+0kLcVMwIMgM9uBrwK/oeu9Uldl5iNlo1JpETEJuAPYJCKmR8S40jFpcHkrsiQVYgUsSYWYgCWpEBOwJBViApakQkzAklSICViSCjEBS1Ih/x8pSEVe8w2bqQAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dt = DecisionTreeClassifier()\n",
"model_dt = dt.fit(training_features,np.array(outcome_label['survived']))\n",
"actual_label_dt = np.array(outcome_label['survived'])\n",
"predicted_label_dt = model_dt.predict(training_features)\n",
"print(f1_score(actual_label_dt,predicted_label_dt,labels=(1,0),average=None))\n",
"cm = confusion_matrix(actual_label,predicted_label)\n",
"\n",
"sns.heatmap(cm,annot=True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy score: 0.9924242424242424\n",
"classification report:\n",
" precision recall f1-score support\n",
"\n",
" 0 0.99 1.00 0.99 165\n",
" 1 1.00 0.98 0.99 99\n",
"\n",
" accuracy 0.99 264\n",
" macro avg 0.99 0.99 0.99 264\n",
"weighted avg 0.99 0.99 0.99 264\n",
"\n",
"confusion matrix:\n",
" [[165 0]\n",
" [ 2 97]]\n",
"precison,recall,fscore,support ARRAYS:\n",
" (array([0.98802395, 1. ]), array([1. , 0.97979798]), array([0.9939759 , 0.98979592]), array([165, 99]))\n",
"sensitivity score:\n",
" 0.9797979797979798\n",
"specificity score:\n",
" 1.0\n"
]
},
{
"data": {
"text/plain": [
"264"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predicted_label = model_dt.predict(training_features)\n",
"actual_label = np.array(outcome_label['survived'])\n",
"print('accuracy score:',accuracy_score(actual_label,predicted_label))\n",
"print('classification report:\\n',classification_report(actual_label,predicted_label))\n",
"print('confusion matrix:\\n',confusion_matrix(actual_label,predicted_label))\n",
"print('precison,recall,fscore,support ARRAYS:\\n',precision_recall_fscore_support(actual_label,predicted_label))\n",
"def specificity(y_true , y_pred):\n",
" w,x,y,z = precision_recall_fscore_support(actual_label,predicted_label)\n",
" return(x[0])\n",
"print('sensitivity score:\\n',recall_score(actual_label,predicted_label))\n",
"print('specificity score:\\n',specificity(actual_label,predicted_label))\n",
"len(actual_label)"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['model_dt.pickle']"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sklearn\n",
"import joblib\n",
"# save model[Logistic REgression] to be deployed on your server\n",
"joblib.dump(model_lr, r'model_lr.pickle')\n",
"joblib.dump(ss, r'scaler.pickle')\n",
"joblib.dump(model_dt, r'model_dt.pickle')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"#LOADING.............\n",
"model_lr = joblib.load(r'model_lr.pickle')\n",
"model_dt = joblib.load(r'model_dt.pickle')\n",
"scaler = joblib.load(r'scaler.pickle')"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" sex \n",
" age \n",
" n_siblings_spouses \n",
" parch \n",
" fare \n",
" class \n",
" alone \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" male \n",
" 22.0 \n",
" 1 \n",
" 0 \n",
" 7.2500 \n",
" Third \n",
" n \n",
" \n",
" \n",
" 1 \n",
" female \n",
" 38.0 \n",
" 1 \n",
" 0 \n",
" 71.2833 \n",
" First \n",
" n \n",
" \n",
" \n",
" 2 \n",
" female \n",
" 26.0 \n",
" 0 \n",
" 0 \n",
" 7.9250 \n",
" Third \n",
" y \n",
" \n",
" \n",
" 3 \n",
" female \n",
" 35.0 \n",
" 1 \n",
" 0 \n",
" 53.1000 \n",
" First \n",
" n \n",
" \n",
" \n",
" 4 \n",
" male \n",
" 28.0 \n",
" 0 \n",
" 0 \n",
" 8.4583 \n",
" Third \n",
" y \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sex age n_siblings_spouses parch fare class alone\n",
"0 male 22.0 1 0 7.2500 Third n\n",
"1 female 38.0 1 0 71.2833 First n\n",
"2 female 26.0 0 0 7.9250 Third y\n",
"3 female 35.0 1 0 53.1000 First n\n",
"4 male 28.0 0 0 8.4583 Third y"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_names = ['sex','age','n_siblings_spouses','parch','fare','class','alone']\n",
"prediction_features = eval[feature_names]\n",
"outcome_feature = ['survived']\n",
"outcome_label = eval[outcome_feature]\n",
"categorical_features = ['sex','n_siblings_spouses','parch','class','alone']\n",
"numeric_features = ['age','fare']\n",
"prediction_features.head()"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
":2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" prediction_features[numeric_features] = scaler.transform(prediction_features[numeric_features])\n",
"/home/prince_tesla/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" self._setitem_single_column(loc, value[:, i].tolist(), pi)\n"
]
}
],
"source": [
"scaler.fit(prediction_features[numeric_features])\n",
"prediction_features[numeric_features] = scaler.transform(prediction_features[numeric_features])\n",
"prediction_features = pd.get_dummies(prediction_features,columns=categorical_features)\n",
"c_engineering_features = list(set(prediction_features.columns)-set(numeric_features))"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"missing feature(s): ['parch_6']\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age \n",
" fare \n",
" sex_female \n",
" sex_male \n",
" n_siblings_spouses_0 \n",
" n_siblings_spouses_1 \n",
" n_siblings_spouses_2 \n",
" n_siblings_spouses_3 \n",
" n_siblings_spouses_4 \n",
" n_siblings_spouses_5 \n",
" ... \n",
" parch_2 \n",
" parch_3 \n",
" parch_4 \n",
" parch_5 \n",
" class_First \n",
" class_Second \n",
" class_Third \n",
" alone_n \n",
" alone_y \n",
" parch_6 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" -0.610415 \n",
" -0.497403 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 1 \n",
" 0.669397 \n",
" 0.676353 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 2 \n",
" -0.290462 \n",
" -0.485030 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" \n",
" \n",
" 3 \n",
" 0.429432 \n",
" 0.343046 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 4 \n",
" -0.130485 \n",
" -0.475254 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" \n",
" \n",
"
\n",
"
5 rows × 23 columns
\n",
"
"
],
"text/plain": [
" age fare sex_female sex_male n_siblings_spouses_0 \\\n",
"0 -0.610415 -0.497403 0 1 0 \n",
"1 0.669397 0.676353 1 0 0 \n",
"2 -0.290462 -0.485030 1 0 1 \n",
"3 0.429432 0.343046 1 0 0 \n",
"4 -0.130485 -0.475254 0 1 1 \n",
"\n",
" n_siblings_spouses_1 n_siblings_spouses_2 n_siblings_spouses_3 \\\n",
"0 1 0 0 \n",
"1 1 0 0 \n",
"2 0 0 0 \n",
"3 1 0 0 \n",
"4 0 0 0 \n",
"\n",
" n_siblings_spouses_4 n_siblings_spouses_5 ... parch_2 parch_3 parch_4 \\\n",
"0 0 0 ... 0 0 0 \n",
"1 0 0 ... 0 0 0 \n",
"2 0 0 ... 0 0 0 \n",
"3 0 0 ... 0 0 0 \n",
"4 0 0 ... 0 0 0 \n",
"\n",
" parch_5 class_First class_Second class_Third alone_n alone_y parch_6 \n",
"0 0 0 0 1 1 0 0 \n",
"1 0 1 0 0 1 0 0 \n",
"2 0 0 0 1 0 1 0 \n",
"3 0 1 0 0 1 0 0 \n",
"4 0 0 0 1 0 1 0 \n",
"\n",
"[5 rows x 23 columns]"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#setting aside and making up for the whole categorical features from our first model\n",
"c_engineering_features = set(prediction_features.columns) - set(numeric_features)\n",
"missing_features = list(set(engineering_features) - c_engineering_features)\n",
"for feature in missing_features:\n",
" #add zeroes\n",
" prediction_features[feature] = [0]*len(prediction_features)\n",
"print('missing feature(s):',missing_features) \n",
"prediction_features.head()"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" survived \n",
" sex \n",
" age \n",
" n_siblings_spouses \n",
" parch \n",
" fare \n",
" class \n",
" deck \n",
" embark_town \n",
" alone \n",
" survived[Logistic Regression] \n",
" survived[Decision Tree] \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" male \n",
" 22.0 \n",
" 1 \n",
" 0 \n",
" 7.2500 \n",
" Third \n",
" unknown \n",
" Southampton \n",
" n \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" female \n",
" 38.0 \n",
" 1 \n",
" 0 \n",
" 71.2833 \n",
" First \n",
" C \n",
" Cherbourg \n",
" n \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" female \n",
" 26.0 \n",
" 0 \n",
" 0 \n",
" 7.9250 \n",
" Third \n",
" unknown \n",
" Southampton \n",
" y \n",
" 1 \n",
" 1 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" female \n",
" 35.0 \n",
" 1 \n",
" 0 \n",
" 53.1000 \n",
" First \n",
" C \n",
" Southampton \n",
" n \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 4 \n",
" 0 \n",
" male \n",
" 28.0 \n",
" 0 \n",
" 0 \n",
" 8.4583 \n",
" Third \n",
" unknown \n",
" Queenstown \n",
" y \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survived sex age n_siblings_spouses parch fare class deck \\\n",
"0 0 male 22.0 1 0 7.2500 Third unknown \n",
"1 1 female 38.0 1 0 71.2833 First C \n",
"2 1 female 26.0 0 0 7.9250 Third unknown \n",
"3 1 female 35.0 1 0 53.1000 First C \n",
"4 0 male 28.0 0 0 8.4583 Third unknown \n",
"\n",
" embark_town alone survived[Logistic Regression] survived[Decision Tree] \n",
"0 Southampton n 0 0 \n",
"1 Cherbourg n 0 0 \n",
"2 Southampton y 1 1 \n",
"3 Southampton n 0 0 \n",
"4 Queenstown y 0 0 "
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_lr = model_lr.predict(prediction_features)\n",
"prediction_dt = model_dt.predict(prediction_features)\n",
"\n",
"# Making a copy of the eval dataset\n",
"eval_2 = eval.copy()\n",
"eval_2['survived[Logistic Regression]'] = prediction_lr\n",
"eval_2['survived[Decision Tree]'] = prediction_dt\n",
"eval_2.head()"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.98979592 0.9939759 ]\n"
]
}
],
"source": [
"from sklearn.metrics import f1_score\n",
"print(f1_score(actual_label,predicted_label,labels=(1,0),average= None))"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"survived 243\n",
"dtype: int64"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outcome_label[outcome_label==1].count()"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [],
"source": [
"def predict(sex, age, n_siblings_spouses, parch, fare, Class, alone):\n",
" features = ['sex_female', 'n_siblings_spouses_8', 'n_siblings_spouses_1',\n",
" 'parch_6', 'n_siblings_spouses_4', 'parch_0', 'parch_5', 'n_siblings_spouses_0', 'parch_3',\n",
" 'sex_male', 'Class_First', 'parch_2', 'alone_y', 'n_siblings_spouses_5', 'n_siblings_spouses_2',\n",
" 'n_siblings_spouses_3', 'Class_Second', 'parch_1', 'alone_n', 'Class_Third', 'parch_4']\n",
" labels = ['sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'Class', 'alone']\n",
" feature_names = [sex, age, n_siblings_spouses, parch, fare, Class, alone]\n",
" features_df = pd.DataFrame([feature_names], columns=labels)\n",
" categorical_features = ['sex', 'n_siblings_spouses', 'parch', 'Class', 'alone']\n",
" numeric_features = ['age', 'fare']\n",
" features_df[numeric_features] = scaler.transform(features_df[numeric_features])\n",
" features_df = pd.get_dummies(features_df,columns=categorical_features)\n",
" #setting aside and making up for the whole categorical features from our first model\n",
" c_engineering_features = set(features_df.columns) - set(numeric_features)\n",
" missing_features = list(set(features) - c_engineering_features)\n",
" for feature in missing_features:\n",
" #add zeroes\n",
" features_df[feature] = [0]*len(features_df)\n",
" result = model_lr.predict(features_df)\n",
" print(features_df)\n",
" return result\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" age fare sex_male n_siblings_spouses_1 parch_5 Class_First \\\n",
"0 -0.770391 0.320363 1 1 1 1 \n",
"\n",
" alone_n sex_female n_siblings_spouses_8 alone_y ... \\\n",
"0 1 0 0 0 ... \n",
"\n",
" n_siblings_spouses_2 Class_Second n_siblings_spouses_3 \\\n",
"0 0 0 0 \n",
"\n",
" n_siblings_spouses_0 parch_1 parch_3 n_siblings_spouses_4 Class_Third \\\n",
"0 0 0 0 0 0 \n",
"\n",
" parch_2 parch_4 \n",
"0 0 0 \n",
"\n",
"[1 rows x 23 columns]\n"
]
},
{
"data": {
"text/plain": [
"array([1])"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predict('male', 20, 1, 5, 51.86255, 'First', 'n')"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" sex \n",
" age \n",
" n_siblings_spouses \n",
" parch \n",
" fare \n",
" Class \n",
" alone \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" male \n",
" -1.410297 \n",
" 3 \n",
" 4 \n",
" 1.202742 \n",
" First \n",
" y \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sex age n_siblings_spouses parch fare Class alone\n",
"0 male -1.410297 3 4 1.202742 First y"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b = ['male',12, 3,4,100,'First','y']\n",
"a = ['sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'Class', 'alone']\n",
"c = pd.DataFrame([b], columns=a)\n",
"x = ['age', 'fare']\n",
"c[x] = scaler.transform(c[x])\n",
"c"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}