{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "url = 'https://raw.githubusercontent.com/nikhil-xyz/datasets/main/insurance.csv'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv(url)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1338, 7)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agesexbmichildrensmokerregionexpenses
019female27.90yessouthwest16884.92
118male33.81nosoutheast1725.55
228male33.03nosoutheast4449.46
333male22.70nonorthwest21984.47
432male28.90nonorthwest3866.86
\n", "
" ], "text/plain": [ " age sex bmi children smoker region expenses\n", "0 19 female 27.9 0 yes southwest 16884.92\n", "1 18 male 33.8 1 no southeast 1725.55\n", "2 28 male 33.0 3 no southeast 4449.46\n", "3 33 male 22.7 0 no northwest 21984.47\n", "4 32 male 28.9 0 no northwest 3866.86" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "age 0\n", "sex 0\n", "bmi 0\n", "children 0\n", "smoker 0\n", "region 0\n", "expenses 0\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isna().sum()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.duplicated().sum()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "data.drop_duplicates(inplace=True)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.duplicated().sum()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 1337 entries, 0 to 1337\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 age 1337 non-null int64 \n", " 1 sex 1337 non-null object \n", " 2 bmi 1337 non-null float64\n", " 3 children 1337 non-null int64 \n", " 4 smoker 1337 non-null object \n", " 5 region 1337 non-null object \n", " 6 expenses 1337 non-null float64\n", "dtypes: float64(2), int64(2), object(3)\n", "memory usage: 83.6+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agebmichildrenexpenses
count1337.0000001337.0000001337.0000001337.000000
mean39.22213930.6655201.09573713279.121638
std14.0443336.1006641.20557112110.359657
min18.00000016.0000000.0000001121.870000
25%27.00000026.3000000.0000004746.340000
50%39.00000030.4000001.0000009386.160000
75%51.00000034.7000002.00000016657.720000
max64.00000053.1000005.00000063770.430000
\n", "
" ], "text/plain": [ " age bmi children expenses\n", "count 1337.000000 1337.000000 1337.000000 1337.000000\n", "mean 39.222139 30.665520 1.095737 13279.121638\n", "std 14.044333 6.100664 1.205571 12110.359657\n", "min 18.000000 16.000000 0.000000 1121.870000\n", "25% 27.000000 26.300000 0.000000 4746.340000\n", "50% 39.000000 30.400000 1.000000 9386.160000\n", "75% 51.000000 34.700000 2.000000 16657.720000\n", "max 64.000000 53.100000 5.000000 63770.430000" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- all candidated have age between 18 and 64\n", "- 50% of candidated either have one kid or no kid" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**categories inside the categorical data**" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Categories in 'sex' variable: ['female' 'male']\n", "Categories in 'smoker' variable: ['yes' 'no']\n", "Categories in 'region' variable: ['southwest' 'southeast' 'northwest' 'northeast']\n" ] } ], "source": [ "print(\"Categories in 'sex' variable: \",end=\" \" )\n", "print(data['sex'].unique())\n", "\n", "print(\"Categories in 'smoker' variable: \",end=\" \" )\n", "print(data['smoker'].unique())\n", "\n", "print(\"Categories in 'region' variable: \",end=\" \" )\n", "print(data['region'].unique())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.19" } }, "nbformat": 4, "nbformat_minor": 2 }