{ "cells": [ { "cell_type": "code", "execution_count": 36, "id": "b87148ba", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customerIDgenderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurity...DeviceProtectionTechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurn
07590-VHVEGFemale0YesNo1NoNo phone serviceDSLNo...NoNoNoNoMonth-to-monthYesElectronic check29.8529.85No
15575-GNVDEMale0NoNo34YesNoDSLYes...YesNoNoNoOne yearNoMailed check56.951889.5No
23668-QPYBKMale0NoNo2YesNoDSLYes...NoNoNoNoMonth-to-monthYesMailed check53.85108.15Yes
37795-CFOCWMale0NoNo45NoNo phone serviceDSLYes...YesYesNoNoOne yearNoBank transfer (automatic)42.301840.75No
49237-HQITUFemale0NoNo2YesNoFiber opticNo...NoNoNoNoMonth-to-monthYesElectronic check70.70151.65Yes
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " customerID gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", "0 7590-VHVEG Female 0 Yes No 1 No \n", "1 5575-GNVDE Male 0 No No 34 Yes \n", "2 3668-QPYBK Male 0 No No 2 Yes \n", "3 7795-CFOCW Male 0 No No 45 No \n", "4 9237-HQITU Female 0 No No 2 Yes \n", "\n", " MultipleLines InternetService OnlineSecurity ... DeviceProtection \\\n", "0 No phone service DSL No ... No \n", "1 No DSL Yes ... Yes \n", "2 No DSL Yes ... No \n", "3 No phone service DSL Yes ... Yes \n", "4 No Fiber optic No ... No \n", "\n", " TechSupport StreamingTV StreamingMovies Contract PaperlessBilling \\\n", "0 No No No Month-to-month Yes \n", "1 No No No One year No \n", "2 No No No Month-to-month Yes \n", "3 Yes No No One year No \n", "4 No No No Month-to-month Yes \n", "\n", " PaymentMethod MonthlyCharges TotalCharges Churn \n", "0 Electronic check 29.85 29.85 No \n", "1 Mailed check 56.95 1889.5 No \n", "2 Mailed check 53.85 108.15 Yes \n", "3 Bank transfer (automatic) 42.30 1840.75 No \n", "4 Electronic check 70.70 151.65 Yes \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "sns.set_style('darkgrid')\n", "df = pd.read_csv(\"C:/Users/Pranjal Ray/OneDrive/Desktop/WA_Fn-UseC_-Telco-Customer-Churn.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 37, "id": "0ddbd39c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7043, 21)" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 38, "id": "23f33921", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "customerID 0\n", "gender 0\n", "SeniorCitizen 0\n", "Partner 0\n", "Dependents 0\n", "tenure 0\n", "PhoneService 0\n", "MultipleLines 0\n", "InternetService 0\n", "OnlineSecurity 0\n", "OnlineBackup 0\n", "DeviceProtection 0\n", "TechSupport 0\n", "StreamingTV 0\n", "StreamingMovies 0\n", "Contract 0\n", "PaperlessBilling 0\n", "PaymentMethod 0\n", "MonthlyCharges 0\n", "TotalCharges 0\n", "Churn 0\n", "dtype: int64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 39, "id": "f4d75c0e", "metadata": {}, "outputs": [], "source": [ "df.drop(df.columns[[0]], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 41, "id": "b79a1009", "metadata": { "scrolled": true }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def stacked_plot(df, group, target):\n", " fig, ax = plt.subplots(figsize = (6,4))\n", " temp_df = (df.groupby([group, target]).size()/df.groupby(group)[target].count()).reset_index().pivot(columns=target, index=group, values=0)\n", " temp_df.plot(kind='bar', stacked=True, ax = ax, color = [\"green\", \"darkred\"])\n", " ax.xaxis.set_tick_params(rotation=0)\n", " ax.set_xlabel(group)\n", " ax.set_ylabel('Churn Percentage')\n", "stacked_plot(df, \"gender\", \"Churn\")\n", "stacked_plot(df, \"SeniorCitizen\", \"Churn\")\n", "stacked_plot(df, \"Partner\", \"Churn\")\n", "stacked_plot(df, \"Dependents\", \"Churn\")" ] }, { "cell_type": "code", "execution_count": 42, "id": "f2a4681b", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurityOnlineBackupDeviceProtectionTechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurn
0Female0YesNo1NoNo phone serviceDSLNoYesNoNoNoNoMonth-to-monthYesElectronic check29.8529.85No
1Male0NoNo34YesNoDSLYesNoYesNoNoNoOne yearNoMailed check56.951889.5No
2Male0NoNo2YesNoDSLYesYesNoNoNoNoMonth-to-monthYesMailed check53.85108.15Yes
3Male0NoNo45NoNo phone serviceDSLYesNoYesYesNoNoOne yearNoBank transfer (automatic)42.301840.75No
4Female0NoNo2YesNoFiber opticNoNoNoNoNoNoMonth-to-monthYesElectronic check70.70151.65Yes
...............................................................
7038Male0YesYes24YesYesDSLYesNoYesYesYesYesOne yearYesMailed check84.801990.5No
7039Female0YesYes72YesYesFiber opticNoYesYesNoYesYesOne yearYesCredit card (automatic)103.207362.9No
7040Female0YesYes11NoNo phone serviceDSLYesNoNoNoNoNoMonth-to-monthYesElectronic check29.60346.45No
7041Male1YesNo4YesYesFiber opticNoNoNoNoNoNoMonth-to-monthYesMailed check74.40306.6Yes
7042Male0NoNo66YesNoFiber opticYesNoYesYesYesYesTwo yearYesBank transfer (automatic)105.656844.5No
\n", "

7043 rows × 20 columns

\n", "
" ], "text/plain": [ " gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", "0 Female 0 Yes No 1 No \n", "1 Male 0 No No 34 Yes \n", "2 Male 0 No No 2 Yes \n", "3 Male 0 No No 45 No \n", "4 Female 0 No No 2 Yes \n", "... ... ... ... ... ... ... \n", "7038 Male 0 Yes Yes 24 Yes \n", "7039 Female 0 Yes Yes 72 Yes \n", "7040 Female 0 Yes Yes 11 No \n", "7041 Male 1 Yes No 4 Yes \n", "7042 Male 0 No No 66 Yes \n", "\n", " MultipleLines InternetService OnlineSecurity OnlineBackup \\\n", "0 No phone service DSL No Yes \n", "1 No DSL Yes No \n", "2 No DSL Yes Yes \n", "3 No phone service DSL Yes No \n", "4 No Fiber optic No No \n", "... ... ... ... ... \n", "7038 Yes DSL Yes No \n", "7039 Yes Fiber optic No Yes \n", "7040 No phone service DSL Yes No \n", "7041 Yes Fiber optic No No \n", "7042 No Fiber optic Yes No \n", "\n", " DeviceProtection TechSupport StreamingTV StreamingMovies Contract \\\n", "0 No No No No Month-to-month \n", "1 Yes No No No One year \n", "2 No No No No Month-to-month \n", "3 Yes Yes No No One year \n", "4 No No No No Month-to-month \n", "... ... ... ... ... ... \n", "7038 Yes Yes Yes Yes One year \n", "7039 Yes No Yes Yes One year \n", "7040 No No No No Month-to-month \n", "7041 No No No No Month-to-month \n", "7042 Yes Yes Yes Yes Two year \n", "\n", " PaperlessBilling PaymentMethod MonthlyCharges TotalCharges \\\n", "0 Yes Electronic check 29.85 29.85 \n", "1 No Mailed check 56.95 1889.5 \n", "2 Yes Mailed check 53.85 108.15 \n", "3 No Bank transfer (automatic) 42.30 1840.75 \n", "4 Yes Electronic check 70.70 151.65 \n", "... ... ... ... ... \n", "7038 Yes Mailed check 84.80 1990.5 \n", "7039 Yes Credit card (automatic) 103.20 7362.9 \n", "7040 Yes Electronic check 29.60 346.45 \n", "7041 Yes Mailed check 74.40 306.6 \n", "7042 Yes Bank transfer (automatic) 105.65 6844.5 \n", "\n", " Churn \n", "0 No \n", "1 No \n", "2 Yes \n", "3 No \n", "4 Yes \n", "... ... \n", "7038 No \n", "7039 No \n", "7040 No \n", "7041 Yes \n", "7042 No \n", "\n", "[7043 rows x 20 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 43, "id": "d737e383", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "No 1437\n", "Yes 229\n", "Name: Churn, dtype: int64" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df.SeniorCitizen == 0) & (df.Partner == 'Yes') & (df.Dependents == 'Yes')].Churn.value_counts()" ] }, { "cell_type": "code", "execution_count": 46, "id": "84e0c495", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "No 921\n", "Yes 242\n", "Name: Churn, dtype: int64" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df.SeniorCitizen == 0) & (df.Partner == 'Yes') & (df.Dependents == 'No')].Churn.value_counts()" ] }, { "cell_type": "code", "execution_count": 47, "id": "79d7494d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "No 278\n", "Yes 75\n", "Name: Churn, dtype: int64" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df.SeniorCitizen == 0) & (df.Partner == 'No') & (df.Dependents == 'Yes')].Churn.value_counts()" ] }, { "cell_type": "code", "execution_count": 48, "id": "86961019", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "No 1872\n", "Yes 847\n", "Name: Churn, dtype: int64" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df.SeniorCitizen == 0) & (df.Partner == 'No') & (df.Dependents == 'No')].Churn.value_counts()" ] }, { "cell_type": "code", "execution_count": 49, "id": "a4de94da", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 7043.000000\n", "mean 32.371149\n", "std 24.559481\n", "min 0.000000\n", "25% 9.000000\n", "50% 29.000000\n", "75% 55.000000\n", "max 72.000000\n", "Name: tenure, dtype: float64" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['tenure'].describe()" ] }, { "cell_type": "code", "execution_count": 50, "id": "324f6ec0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 613\n", "72 362\n", "2 238\n", "3 200\n", "4 176\n", "71 170\n", "5 133\n", "7 131\n", "8 123\n", "70 119\n", "Name: tenure, dtype: int64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['tenure'].value_counts().head(10)" ] }, { "cell_type": "code", "execution_count": 51, "id": "9ef12561", "metadata": { "collapsed": true }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(16,8))\n", "sns.countplot(x=\"tenure\", hue=\"Churn\", data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 52, "id": "cd1d5ad0", "metadata": {}, "outputs": [], "source": [ "def tenure(t):\n", " if t<=12:\n", " return 1\n", " elif t>12 and t<=24:\n", " return 2\n", " elif t>24 and t<=36:\n", " return 3\n", " elif t>36 and t<=48:\n", " return 4\n", " elif t>48 and t<=60:\n", " return 5\n", " else:\n", " return 6\n", "\n", "df[\"tenure_group\"]=df[\"tenure\"].apply(lambda x: tenure(x))" ] }, { "cell_type": "code", "execution_count": 53, "id": "291a7d77", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 2186\n", "6 1407\n", "2 1024\n", "3 832\n", "5 832\n", "4 762\n", "Name: tenure_group, dtype: int64" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"tenure_group\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 54, "id": "c2378179", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurityOnlineBackup...TechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurntenure_group
11Male0NoNo16YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceTwo yearNoCredit card (automatic)18.95326.8No2
16Female0NoNo52YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceOne yearNoMailed check20.651022.95No5
21Male0YesNo12YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceOne yearNoBank transfer (automatic)19.80202.25No1
22Male0NoNo1YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceMonth-to-monthNoMailed check20.1520.15Yes1
33Male0NoNo1YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceMonth-to-monthNoBank transfer (automatic)20.2020.2No1
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", "11 Male 0 No No 16 Yes \n", "16 Female 0 No No 52 Yes \n", "21 Male 0 Yes No 12 Yes \n", "22 Male 0 No No 1 Yes \n", "33 Male 0 No No 1 Yes \n", "\n", " MultipleLines InternetService OnlineSecurity OnlineBackup \\\n", "11 No No No internet service No internet service \n", "16 No No No internet service No internet service \n", "21 No No No internet service No internet service \n", "22 No No No internet service No internet service \n", "33 No No No internet service No internet service \n", "\n", " ... TechSupport StreamingTV StreamingMovies \\\n", "11 ... No internet service No internet service No internet service \n", "16 ... No internet service No internet service No internet service \n", "21 ... No internet service No internet service No internet service \n", "22 ... No internet service No internet service No internet service \n", "33 ... No internet service No internet service No internet service \n", "\n", " Contract PaperlessBilling PaymentMethod MonthlyCharges \\\n", "11 Two year No Credit card (automatic) 18.95 \n", "16 One year No Mailed check 20.65 \n", "21 One year No Bank transfer (automatic) 19.80 \n", "22 Month-to-month No Mailed check 20.15 \n", "33 Month-to-month No Bank transfer (automatic) 20.20 \n", "\n", " TotalCharges Churn tenure_group \n", "11 326.8 No 2 \n", "16 1022.95 No 5 \n", "21 202.25 No 1 \n", "22 20.15 Yes 1 \n", "33 20.2 No 1 \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.InternetService == 'No'].head()" ] }, { "cell_type": "code", "execution_count": 55, "id": "372909cd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['TotalCharges'] = df[\"TotalCharges\"].replace(\" \",np.nan)\n", "df['TotalCharges'].isna().sum() " ] }, { "cell_type": "code", "execution_count": 56, "id": "ca8e8dc5", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurityOnlineBackup...TechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurntenure_group
488Female0YesYes0NoNo phone serviceDSLYesNo...YesYesNoTwo yearYesBank transfer (automatic)52.55NaNNo1
753Male0NoYes0YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceTwo yearNoMailed check20.25NaNNo1
936Female0YesYes0YesNoDSLYesYes...NoYesYesTwo yearNoMailed check80.85NaNNo1
1082Male0YesYes0YesYesNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceTwo yearNoMailed check25.75NaNNo1
1340Female0YesYes0NoNo phone serviceDSLYesYes...YesYesNoTwo yearNoCredit card (automatic)56.05NaNNo1
3331Male0YesYes0YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceTwo yearNoMailed check19.85NaNNo1
3826Male0YesYes0YesYesNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceTwo yearNoMailed check25.35NaNNo1
4380Female0YesYes0YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceTwo yearNoMailed check20.00NaNNo1
5218Male0YesYes0YesNoNoNo internet serviceNo internet service...No internet serviceNo internet serviceNo internet serviceOne yearYesMailed check19.70NaNNo1
6670Female0YesYes0YesYesDSLNoYes...YesYesNoTwo yearNoMailed check73.35NaNNo1
6754Male0NoYes0YesYesDSLYesYes...YesNoNoTwo yearYesBank transfer (automatic)61.90NaNNo1
\n", "

11 rows × 21 columns

\n", "
" ], "text/plain": [ " gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", "488 Female 0 Yes Yes 0 No \n", "753 Male 0 No Yes 0 Yes \n", "936 Female 0 Yes Yes 0 Yes \n", "1082 Male 0 Yes Yes 0 Yes \n", "1340 Female 0 Yes Yes 0 No \n", "3331 Male 0 Yes Yes 0 Yes \n", "3826 Male 0 Yes Yes 0 Yes \n", "4380 Female 0 Yes Yes 0 Yes \n", "5218 Male 0 Yes Yes 0 Yes \n", "6670 Female 0 Yes Yes 0 Yes \n", "6754 Male 0 No Yes 0 Yes \n", "\n", " MultipleLines InternetService OnlineSecurity \\\n", "488 No phone service DSL Yes \n", "753 No No No internet service \n", "936 No DSL Yes \n", "1082 Yes No No internet service \n", "1340 No phone service DSL Yes \n", "3331 No No No internet service \n", "3826 Yes No No internet service \n", "4380 No No No internet service \n", "5218 No No No internet service \n", "6670 Yes DSL No \n", "6754 Yes DSL Yes \n", "\n", " OnlineBackup ... TechSupport StreamingTV \\\n", "488 No ... Yes Yes \n", "753 No internet service ... No internet service No internet service \n", "936 Yes ... No Yes \n", "1082 No internet service ... No internet service No internet service \n", "1340 Yes ... Yes Yes \n", "3331 No internet service ... No internet service No internet service \n", "3826 No internet service ... No internet service No internet service \n", "4380 No internet service ... No internet service No internet service \n", "5218 No internet service ... No internet service No internet service \n", "6670 Yes ... Yes Yes \n", "6754 Yes ... Yes No \n", "\n", " StreamingMovies Contract PaperlessBilling \\\n", "488 No Two year Yes \n", "753 No internet service Two year No \n", "936 Yes Two year No \n", "1082 No internet service Two year No \n", "1340 No Two year No \n", "3331 No internet service Two year No \n", "3826 No internet service Two year No \n", "4380 No internet service Two year No \n", "5218 No internet service One year Yes \n", "6670 No Two year No \n", "6754 No Two year Yes \n", "\n", " PaymentMethod MonthlyCharges TotalCharges Churn \\\n", "488 Bank transfer (automatic) 52.55 NaN No \n", "753 Mailed check 20.25 NaN No \n", "936 Mailed check 80.85 NaN No \n", "1082 Mailed check 25.75 NaN No \n", "1340 Credit card (automatic) 56.05 NaN No \n", "3331 Mailed check 19.85 NaN No \n", "3826 Mailed check 25.35 NaN No \n", "4380 Mailed check 20.00 NaN No \n", "5218 Mailed check 19.70 NaN No \n", "6670 Mailed check 73.35 NaN No \n", "6754 Bank transfer (automatic) 61.90 NaN No \n", "\n", " tenure_group \n", "488 1 \n", "753 1 \n", "936 1 \n", "1082 1 \n", "1340 1 \n", "3331 1 \n", "3826 1 \n", "4380 1 \n", "5218 1 \n", "6670 1 \n", "6754 1 \n", "\n", "[11 rows x 21 columns]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df[\"TotalCharges\"].isnull()]" ] }, { "cell_type": "code", "execution_count": 57, "id": "df41e8b2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[df[\"TotalCharges\"].isnull(), 'TotalCharges'] = 0\n", "df.isnull().any().any()" ] }, { "cell_type": "code", "execution_count": 58, "id": "19b4d944", "metadata": {}, "outputs": [], "source": [ "df['TotalCharges'] = df[\"TotalCharges\"].astype(float)\n", "\n", "Churn = df[df.Churn==\"Yes\"]\n", "Not_Churn = df[df.Churn==\"No\"]" ] }, { "cell_type": "code", "execution_count": 59, "id": "091733e8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.MonthlyCharges.isna().sum()" ] }, { "cell_type": "code", "execution_count": 60, "id": "c902d80c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 0.99956055],\n", " [0.99956055, 1. ]])" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.corrcoef(df.TotalCharges, df.MonthlyCharges*df.tenure)" ] }, { "cell_type": "code", "execution_count": 61, "id": "647b2581", "metadata": {}, "outputs": [], "source": [ "def datapreparation(filepath):\n", " \n", " df = pd.read_csv(filepath)\n", " df.drop([\"customerID\"], inplace = True, axis = 1)\n", " \n", " df.TotalCharges = df.TotalCharges.replace(\" \",np.nan)\n", " df.TotalCharges.fillna(0, inplace = True)\n", " df.TotalCharges = df.TotalCharges.astype(float)\n", " \n", " cols1 = ['Partner', 'Dependents', 'PaperlessBilling', 'Churn', 'PhoneService']\n", " for col in cols1:\n", " df[col] = df[col].apply(lambda x: 0 if x == \"No\" else 1)\n", " \n", " df.gender = df.gender.apply(lambda x: 0 if x == \"Male\" else 1)\n", " df.MultipleLines = df.MultipleLines.map({'No phone service': 0, 'No': 0, 'Yes': 1})\n", " \n", " cols2 = ['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']\n", " for col in cols2:\n", " df[col] = df[col].map({'No internet service': 0, 'No': 0, 'Yes': 1})\n", " \n", " df = pd.get_dummies(df, columns=['InternetService', 'Contract', 'PaymentMethod'], drop_first=True)\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": 74, "id": "1b6a26fc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesOnlineSecurityOnlineBackupDeviceProtection...MonthlyChargesTotalChargesChurnInternetService_Fiber opticInternetService_NoContract_One yearContract_Two yearPaymentMethod_Credit card (automatic)PaymentMethod_Electronic checkPaymentMethod_Mailed check
01010100010...29.8529.8500000010
100003410101...56.951889.5000010001
20000210110...53.85108.1510000001
300004500101...42.301840.7500010000
41000210000...70.70151.6511000010
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " gender SeniorCitizen Partner Dependents tenure PhoneService \\\n", "0 1 0 1 0 1 0 \n", "1 0 0 0 0 34 1 \n", "2 0 0 0 0 2 1 \n", "3 0 0 0 0 45 0 \n", "4 1 0 0 0 2 1 \n", "\n", " MultipleLines OnlineSecurity OnlineBackup DeviceProtection ... \\\n", "0 0 0 1 0 ... \n", "1 0 1 0 1 ... \n", "2 0 1 1 0 ... \n", "3 0 1 0 1 ... \n", "4 0 0 0 0 ... \n", "\n", " MonthlyCharges TotalCharges Churn InternetService_Fiber optic \\\n", "0 29.85 29.85 0 0 \n", "1 56.95 1889.50 0 0 \n", "2 53.85 108.15 1 0 \n", "3 42.30 1840.75 0 0 \n", "4 70.70 151.65 1 1 \n", "\n", " InternetService_No Contract_One year Contract_Two year \\\n", "0 0 0 0 \n", "1 0 1 0 \n", "2 0 0 0 \n", "3 0 1 0 \n", "4 0 0 0 \n", "\n", " PaymentMethod_Credit card (automatic) PaymentMethod_Electronic check \\\n", "0 0 1 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 1 \n", "\n", " PaymentMethod_Mailed check \n", "0 0 \n", "1 1 \n", "2 1 \n", "3 0 \n", "4 0 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = datapreparation(filepath = \"C:/Users/Pranjal Ray/OneDrive/Desktop/WA_Fn-UseC_-Telco-Customer-Churn.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 75, "id": "37d4a5df", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().any().any()" ] }, { "cell_type": "code", "execution_count": 76, "id": "a8f45473", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\n", "from sklearn.metrics import confusion_matrix, accuracy_score, classification_report\n", "from sklearn.metrics import roc_auc_score, roc_curve, precision_score, recall_score, f1_score\n", "from imblearn.over_sampling import SMOTE\n", "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 77, "id": "f1e96a3c", "metadata": {}, "outputs": [], "source": [ "train, test = train_test_split(df, test_size=0.2, random_state=111, stratify = df.Churn)\n", "x = df.columns[df.columns!=\"Churn\"]\n", "y = \"Churn\"\n", "train_x = train[x]\n", "train_y = train[y]\n", "test_x = test[x]\n", "test_y = test[y]" ] }, { "cell_type": "code", "execution_count": 93, "id": "029c9416", "metadata": {}, "outputs": [], "source": [ "def churn_prediction(algo, training_x, training_y, testing_x, testing_y, cols, cf = 'coefficients'):\n", " algo.fit(training_x,training_y)\n", " predictions = algo.predict(testing_x)\n", " probabilities = algo.predict_proba(testing_x)[:,1]\n", " if cf == \"coefficients\":\n", " coefficients = pd.DataFrame(algo.coef_.ravel())\n", " elif cf == \"features\":\n", " coefficients = pd.DataFrame(algo.feature_importances_) \n", " column_df = pd.DataFrame(cols)\n", " coef_sumry = (pd.merge(coefficients,column_df,left_index= True,right_index= True, how = \"left\"))\n", " coef_sumry.columns = [\"coefficients\",\"features\"]\n", " coef_sumry = coef_sumry.sort_values(by = \"coefficients\",ascending = False) \n", " print (algo)\n", " print (\"\\n Classification report : \\n\",classification_report(testing_y,predictions))\n", " print (\"Accuracy Score : \",accuracy_score(testing_y,predictions))\n", " conf_matrix = confusion_matrix(testing_y,predictions)\n", " plt.figure(figsize=(12,12))\n", " plt.subplot(221)\n", " sns.heatmap(conf_matrix, fmt = \"d\",annot=True, cmap='Blues')\n", " plt.title('Confuion Matrix')\n", " plt.ylabel('True Values')\n", " plt.xlabel('Predicted Values')\n", " model_roc_auc = roc_auc_score(testing_y,probabilities) \n", " print (\"Area under curve : \",model_roc_auc,\"\\n\")\n", " fpr,tpr,thresholds = roc_curve(testing_y,probabilities)\n", " plt.subplot(222)\n", " plt.plot(fpr, tpr, color='darkorange', lw=1, label = \"Auc : %.3f\" %model_roc_auc)\n", " plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n", " plt.xlim([0.0, 1.0])\n", " plt.ylim([0.0, 1.05])\n", " plt.xlabel('False Positive Rate')\n", " plt.ylabel('True Positive Rate')\n", " plt.title('Receiver operating characteristic')\n", " plt.legend(loc=\"lower right\")\n", " plt.subplot(212)\n", " sns.barplot(x = coef_sumry[\"features\"] ,y = coef_sumry[\"coefficients\"])\n", " plt.title('Feature Importances')\n", " plt.xticks(rotation=\"vertical\")\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 94, "id": "a2060930", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 24 candidates, totalling 72 fits\n" ] }, { "data": { "text/html": [ "
GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n",
       "             param_grid={'max_features': ['auto', 'sqrt', 'log2', None],\n",
       "                         'n_estimators': [300, 500, 700, 900, 1100, 1300]},\n",
       "             scoring='f1', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n", " param_grid={'max_features': ['auto', 'sqrt', 'log2', None],\n", " 'n_estimators': [300, 500, 700, 900, 1100, 1300]},\n", " scoring='f1', verbose=1)" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "param_grid1 = {'max_features':['auto', 'sqrt', 'log2', None],\n", " 'n_estimators':[300, 500, 700, 900, 1100, 1300]\n", " }\n", "\n", "rf_model = RandomForestClassifier()\n", "grid1 = GridSearchCV(estimator=rf_model, param_grid=param_grid1, n_jobs=-1, cv=3, verbose=1, scoring = 'f1')\n", "grid1.fit(train_x, train_y)" ] }, { "cell_type": "code", "execution_count": 95, "id": "6304aa86", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestClassifier(max_features='log2', n_estimators=300)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestClassifier(max_features='log2', n_estimators=300)" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid1.best_estimator_" ] }, { "cell_type": "code", "execution_count": 96, "id": "27b8c617", "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dt = pd.DataFrame(grid1.cv_results_)\n", "dt.param_max_features = dt.param_max_features.astype(str)\n", "dt.param_n_estimators = dt.param_n_estimators.astype(str)\n", "\n", "table = pd.pivot_table(dt, values='mean_test_score', index='param_n_estimators', \n", " columns='param_max_features')\n", " \n", "sns.heatmap(table)" ] }, { "cell_type": "code", "execution_count": 97, "id": "aa77cab0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.571507341023573" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid1.best_score_" ] }, { "cell_type": "code", "execution_count": 98, "id": "143b43ab", "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 36 candidates, totalling 108 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n" ] }, { "data": { "text/html": [ "
GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n",
       "             param_grid={'criterion': ['entropy', 'gini'],\n",
       "                         'max_depth': [7, 9, 11, 13, 15, None],\n",
       "                         'max_features': ['auto'],\n",
       "                         'n_estimators': [1000, 1100, 1200]},\n",
       "             scoring='f1', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['entropy', 'gini'],\n", " 'max_depth': [7, 9, 11, 13, 15, None],\n", " 'max_features': ['auto'],\n", " 'n_estimators': [1000, 1100, 1200]},\n", " scoring='f1', verbose=1)" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "param_grid2 = {'max_features':['auto'],\n", " 'n_estimators':[1000, 1100, 1200],\n", " 'criterion': ['entropy', 'gini'], \n", " 'max_depth': [7, 9, 11, 13, 15, None],\n", " }\n", "\n", "rf_model = RandomForestClassifier()\n", "grid2 = GridSearchCV(estimator=rf_model, param_grid=param_grid2, n_jobs=-1, cv=3, verbose=1, scoring = 'f1')\n", "grid2.fit(train_x, train_y)" ] }, { "cell_type": "code", "execution_count": 99, "id": "06489d11", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestClassifier(max_depth=9, max_features='auto', n_estimators=1200)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestClassifier(max_depth=9, max_features='auto', n_estimators=1200)" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid2.best_estimator_" ] }, { "cell_type": "code", "execution_count": 100, "id": "76d3671e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5783323242846907" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid2.best_score_" ] }, { "cell_type": "code", "execution_count": 101, "id": "01c691ac", "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 16 candidates, totalling 48 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n" ] }, { "data": { "text/html": [ "
GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n",
       "             param_grid={'criterion': ['entropy'], 'max_depth': [10],\n",
       "                         'max_features': ['auto'],\n",
       "                         'min_samples_leaf': [1, 3, 5, 7],\n",
       "                         'min_samples_split': [2, 4, 6, 8],\n",
       "                         'n_estimators': [1000]},\n",
       "             scoring='f1', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['entropy'], 'max_depth': [10],\n", " 'max_features': ['auto'],\n", " 'min_samples_leaf': [1, 3, 5, 7],\n", " 'min_samples_split': [2, 4, 6, 8],\n", " 'n_estimators': [1000]},\n", " scoring='f1', verbose=1)" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "param_grid3 = {'max_features':['auto'],\n", " 'n_estimators':[1000],\n", " 'criterion': ['entropy'], \n", " 'max_depth': [10],\n", " 'min_samples_leaf': [1, 3, 5, 7],\n", " 'min_samples_split': [2, 4, 6, 8]\n", " }\n", "\n", "rf_model = RandomForestClassifier()\n", "grid3 = GridSearchCV(estimator=rf_model, param_grid=param_grid3, n_jobs=-1, cv=3, verbose=1, scoring = 'f1')\n", "grid3.fit(train_x, train_y)" ] }, { "cell_type": "code", "execution_count": 102, "id": "f16490c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5771951048339532" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid3.best_score_" ] }, { "cell_type": "code", "execution_count": 103, "id": "bfc98bad", "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 3 candidates, totalling 9 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n" ] }, { "data": { "text/html": [ "
GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n",
       "             param_grid={'class_weight': [{0: 1, 1: 1}, {0: 1, 1: 2},\n",
       "                                          {0: 1, 1: 3}],\n",
       "                         'criterion': ['entropy'], 'max_depth': [10],\n",
       "                         'max_features': ['auto'], 'min_samples_leaf': [1],\n",
       "                         'min_samples_split': [8], 'n_estimators': [1000]},\n",
       "             scoring='f1', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,\n", " param_grid={'class_weight': [{0: 1, 1: 1}, {0: 1, 1: 2},\n", " {0: 1, 1: 3}],\n", " 'criterion': ['entropy'], 'max_depth': [10],\n", " 'max_features': ['auto'], 'min_samples_leaf': [1],\n", " 'min_samples_split': [8], 'n_estimators': [1000]},\n", " scoring='f1', verbose=1)" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "param_grid4 = {'class_weight':[{0:1, 1:1}, {0:1, 1:2}, {0:1, 1:3}],\n", " 'max_features':['auto'],\n", " 'n_estimators':[1000],\n", " 'criterion': ['entropy'], \n", " 'max_depth': [10],\n", " 'min_samples_leaf': [1],\n", " 'min_samples_split': [8]\n", " }\n", "\n", "rf_model = RandomForestClassifier()\n", "grid4 = GridSearchCV(estimator=rf_model, param_grid=param_grid4, n_jobs=-1, cv=3, verbose=1, scoring = 'f1')\n", "grid4.fit(train_x, train_y)" ] }, { "cell_type": "code", "execution_count": 104, "id": "f47323d5", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
RandomForestClassifier(class_weight={0: 1, 1: 3}, criterion='entropy',\n",
       "                       max_depth=10, max_features='auto', min_samples_split=8,\n",
       "                       n_estimators=1000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestClassifier(class_weight={0: 1, 1: 3}, criterion='entropy',\n", " max_depth=10, max_features='auto', min_samples_split=8,\n", " n_estimators=1000)" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid4.best_estimator_" ] }, { "cell_type": "code", "execution_count": 105, "id": "e01b1e14", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6329993304864453" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid4.best_score_" ] }, { "cell_type": "code", "execution_count": 106, "id": "7bfcc1c1", "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dt = pd.DataFrame(grid4.cv_results_)\n", "dt.param_class_weight = dt.param_class_weight.astype(str)\n", "table = pd.pivot_table(dt, values='mean_test_score', index='param_class_weight')\n", " \n", "sns.heatmap(table)" ] }, { "cell_type": "code", "execution_count": 108, "id": "900034bc", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "RandomForestClassifier(class_weight={0: 1, 1: 2}, criterion='entropy',\n", " max_depth=10, max_features='auto', min_samples_split=8,\n", " n_estimators=1000)\n", "\n", " Classification report : \n", " precision recall f1-score support\n", "\n", " 0 0.87 0.83 0.85 1035\n", " 1 0.59 0.66 0.63 374\n", "\n", " accuracy 0.79 1409\n", " macro avg 0.73 0.75 0.74 1409\n", "weighted avg 0.80 0.79 0.79 1409\n", "\n", "Accuracy Score : 0.7892122072391767\n", "Area under curve : 0.848934356351236 \n", "\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight={0: 1, 1: 2},\n", " criterion='entropy', max_depth=10, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0,\n", " min_samples_leaf=1, min_samples_split=8,\n", " min_weight_fraction_leaf=0.0, n_estimators=1000,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)\n", "churn_prediction(model, train_x, train_y, test_x, test_y, x,\"features\")" ] }, { "cell_type": "code", "execution_count": 109, "id": "e2715af6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n", "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n", "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n", "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n", "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n", " warn(\n" ] }, { "data": { "text/plain": [ "array([0.60615385, 0.66452648, 0.61538462, 0.58139535, 0.64050235])" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_scores = cross_val_score(model, train_x, train_y, cv = 5, scoring='f1')\n", "train_scores" ] }, { "cell_type": "code", "execution_count": 110, "id": "acae271c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6215925299829888" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(train_scores)" ] }, { "cell_type": "code", "execution_count": 112, "id": "de0e0c7c", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "pickle.dump(model, open('model.pkl','wb'))" ] }, { "cell_type": "code", "execution_count": 117, "id": "b121638c", "metadata": {}, "outputs": [], "source": [ "import eli5\n", "from eli5.sklearn import PermutationImportance" ] }, { "cell_type": "code", "execution_count": 118, "id": "7ce2de5c", "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "\n", " \n", "\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WeightFeature
\n", " 0.0197\n", " \n", " ± 0.0065\n", " \n", " \n", " InternetService_Fiber optic\n", "
\n", " 0.0075\n", " \n", " ± 0.0095\n", " \n", " \n", " Contract_Two year\n", "
\n", " 0.0048\n", " \n", " ± 0.0053\n", " \n", " \n", " InternetService_No\n", "
\n", " 0.0044\n", " \n", " ± 0.0128\n", " \n", " \n", " Contract_One year\n", "
\n", " 0.0035\n", " \n", " ± 0.0070\n", " \n", " \n", " PaymentMethod_Electronic check\n", "
\n", " 0.0033\n", " \n", " ± 0.0060\n", " \n", " \n", " OnlineSecurity\n", "
\n", " 0.0033\n", " \n", " ± 0.0083\n", " \n", " \n", " MonthlyCharges\n", "
\n", " 0.0030\n", " \n", " ± 0.0094\n", " \n", " \n", " PaperlessBilling\n", "
\n", " 0.0009\n", " \n", " ± 0.0014\n", " \n", " \n", " OnlineBackup\n", "
\n", " 0.0009\n", " \n", " ± 0.0035\n", " \n", " \n", " gender\n", "
\n", " 0.0004\n", " \n", " ± 0.0128\n", " \n", " \n", " tenure\n", "
\n", " 0.0004\n", " \n", " ± 0.0038\n", " \n", " \n", " SeniorCitizen\n", "
\n", " 0.0004\n", " \n", " ± 0.0021\n", " \n", " \n", " TechSupport\n", "
\n", " 0.0001\n", " \n", " ± 0.0011\n", " \n", " \n", " DeviceProtection\n", "
\n", " 0.0000\n", " \n", " ± 0.0016\n", " \n", " \n", " PhoneService\n", "
\n", " -0.0004\n", " \n", " ± 0.0014\n", " \n", " \n", " MultipleLines\n", "
\n", " -0.0009\n", " \n", " ± 0.0030\n", " \n", " \n", " StreamingMovies\n", "
\n", " -0.0016\n", " \n", " ± 0.0014\n", " \n", " \n", " StreamingTV\n", "
\n", " -0.0021\n", " \n", " ± 0.0024\n", " \n", " \n", " Dependents\n", "
\n", " -0.0024\n", " \n", " ± 0.0014\n", " \n", " \n", " PaymentMethod_Credit card (automatic)\n", "
\n", " … 3 more …\n", "
\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "perm = PermutationImportance(model, random_state=1).fit(test_x, test_y)\n", "eli5.show_weights(perm, feature_names = test_x.columns.tolist())" ] }, { "cell_type": "code", "execution_count": 120, "id": "c5174f4d", "metadata": { "collapsed": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Pranjal Ray\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\tqdm\\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import shap \n", "shap.initjs()\n", "\n", "import joblib" ] }, { "cell_type": "code", "execution_count": 121, "id": "a44271d4", "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", " Visualization omitted, Javascript library not loaded!
\n", " Have you run `initjs()` in this notebook? If this notebook was from another\n", " user you must also trust this notebook (File -> Trust notebook). If you are viewing\n", " this notebook on github the Javascript has been stripped for security. If you are using\n", " JupyterLab this error is because a JupyterLab extension has not yet been written.\n", "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "explainer = shap.TreeExplainer(model)\n", "\n", "shap_values = explainer.shap_values(np.array(test_x.iloc[0]))\n", "shap.force_plot(explainer.expected_value[1], shap_values[1], test_x.iloc[0])" ] }, { "cell_type": "code", "execution_count": 122, "id": "38982986", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['explainer.bz2']" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ex_filename = 'explainer.bz2'\n", "joblib.dump(explainer, filename=ex_filename, compress=('bz2', 9))" ] }, { "cell_type": "code", "execution_count": 126, "id": "6b0ba38b", "metadata": { "collapsed": true }, "outputs": [ { "ename": "NameError", "evalue": "name 'bbox_plot' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "Input \u001b[1;32mIn [126]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m explainer \u001b[38;5;241m=\u001b[39m joblib\u001b[38;5;241m.\u001b[39mload(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexplainer.bz2\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 2\u001b[0m shap_values \u001b[38;5;241m=\u001b[39m explainer\u001b[38;5;241m.\u001b[39mshap_values(np\u001b[38;5;241m.\u001b[39marray(test_x\u001b[38;5;241m.\u001b[39miloc[\u001b[38;5;241m0\u001b[39m]))\n\u001b[1;32m----> 3\u001b[0m shap\u001b[38;5;241m.\u001b[39mforce_plot(explainer\u001b[38;5;241m.\u001b[39mexpected_value[\u001b[38;5;241m1\u001b[39m], shap_values[\u001b[38;5;241m1\u001b[39m], \u001b[38;5;28mlist\u001b[39m(test_x\u001b[38;5;241m.\u001b[39mcolumns), matplotlib \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m, show \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m)\u001b[38;5;241m.\u001b[39msavefig(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatic/images/shap.png\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[43mbbox_plot\u001b[49m)\n", "\u001b[1;31mNameError\u001b[0m: name 'bbox_plot' is not defined" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "explainer = joblib.load(filename=\"explainer.bz2\")\n", "shap_values = explainer.shap_values(np.array(test_x.iloc[0]))\n", "shap.force_plot(explainer.expected_value[1], shap_values[1], list(test_x.columns), matplotlib = True, show = False).savefig('static/images/shap.png', bbox_plot)" ] }, { "cell_type": "code", "execution_count": 127, "id": "40b5bcd7", "metadata": {}, "outputs": [], "source": [ "from matplotlib.patches import Circle, Wedge, Rectangle\n", "def degree_range(n): \n", " start = np.linspace(0,180,n+1, endpoint=True)[0:-1]\n", " end = np.linspace(0,180,n+1, endpoint=True)[1::]\n", " mid_points = start + ((end-start)/2.)\n", " return np.c_[start, end], mid_points\n", "def rot_text(ang): \n", " rotation = np.degrees(np.radians(ang) * np.pi / np.pi - np.radians(90))\n", " return rotation\n", "def gauge(labels=['LOW','MEDIUM','HIGH','EXTREME'], \\\n", " colors=['#007A00','#0063BF','#FFCC00','#ED1C24'], Probability=1, fname=False): \n", " N = len(labels)\n", " colors = colors[::-1]\n", " fig, ax = plt.subplots()\n", " ang_range, mid_points = degree_range(4)\n", " labels = labels[::-1]\n", " patches = []\n", " for ang, c in zip(ang_range, colors): \n", " # sectors\n", " patches.append(Wedge((0.,0.), .4, *ang, facecolor='w', lw=2))\n", " # arcs\n", " patches.append(Wedge((0.,0.), .4, *ang, width=0.10, facecolor=c, lw=2, alpha=0.5))\n", " [ax.add_patch(p) for p in patches]\n", " for mid, lab in zip(mid_points, labels): \n", " ax.text(0.35 * np.cos(np.radians(mid)), 0.35 * np.sin(np.radians(mid)), lab, \\\n", " horizontalalignment='center', verticalalignment='center', fontsize=14, \\\n", " fontweight='bold', rotation = rot_text(mid))\n", " r = Rectangle((-0.4,-0.1),0.8,0.1, facecolor='w', lw=2)\n", " ax.add_patch(r)\n", " ax.text(0, -0.05, 'Churn Probability ' + np.round(Probability,2).astype(str), horizontalalignment='center', \\\n", " verticalalignment='center', fontsize=22, fontweight='bold')\n", " pos = (1-Probability)*180\n", " ax.arrow(0, 0, 0.225 * np.cos(np.radians(pos)), 0.225 * np.sin(np.radians(pos)), \\\n", " width=0.04, head_width=0.09, head_length=0.1, fc='k', ec='k') \n", " ax.add_patch(Circle((0, 0), radius=0.02, facecolor='k'))\n", " ax.add_patch(Circle((0, 0), radius=0.01, facecolor='w', zorder=11))\n", " ax.set_frame_on(False)\n", " ax.axes.set_xticks([])\n", " ax.axes.set_yticks([])\n", " ax.axis('equal')\n", " plt.tight_layout()\n", " if fname:\n", " fig.savefig(fname, dpi=200)" ] }, { "cell_type": "code", "execution_count": 128, "id": "7c7086e9", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "gauge(Probability=model.predict_proba(test_x.iloc[0:1])[0,1])" ] }, { "cell_type": "code", "execution_count": 129, "id": "3e328e35", "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',\n", " 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',\n", " 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',\n", " 'PaperlessBilling', 'MonthlyCharges', 'TotalCharges',\n", " 'InternetService_Fiber optic', 'InternetService_No',\n", " 'Contract_One year', 'Contract_Two year',\n", " 'PaymentMethod_Credit card (automatic)',\n", " 'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check'],\n", " dtype='object')" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_x.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "73bead14", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 }