Spaces:
Sleeping
Sleeping
File size: 51,999 Bytes
6f00d18 |
|
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>in_sf</th>\n",
" <th>beds</th>\n",
" <th>bath</th>\n",
" <th>price</th>\n",
" <th>year_built</th>\n",
" <th>sqft</th>\n",
" <th>price_per_sqft</th>\n",
" <th>elevation</th>\n",
" <th>city</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>999000</td>\n",
" <td>1960</td>\n",
" <td>1000</td>\n",
" <td>999</td>\n",
" <td>10</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2750000</td>\n",
" <td>2006</td>\n",
" <td>1418</td>\n",
" <td>1939</td>\n",
" <td>0</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>1350000</td>\n",
" <td>1900</td>\n",
" <td>2150</td>\n",
" <td>628</td>\n",
" <td>9</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>629000</td>\n",
" <td>1903</td>\n",
" <td>500</td>\n",
" <td>1258</td>\n",
" <td>9</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>439000</td>\n",
" <td>1930</td>\n",
" <td>500</td>\n",
" <td>878</td>\n",
" <td>10</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" in_sf beds bath price year_built sqft price_per_sqft elevation \\\n",
"0 0 2.0 1.0 999000 1960 1000 999 10 \n",
"1 0 2.0 2.0 2750000 2006 1418 1939 0 \n",
"2 0 2.0 2.0 1350000 1900 2150 628 9 \n",
"3 0 1.0 1.0 629000 1903 500 1258 9 \n",
"4 0 0.0 1.0 439000 1930 500 878 10 \n",
"\n",
" city \n",
"0 NY \n",
"1 NY \n",
"2 NY \n",
"3 NY \n",
"4 NY "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('../data/ny-vs-sf-houses.csv')\n",
"df.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([<Axes: title={'center': 'NY'}>, <Axes: title={'center': 'SF'}>],\n",
" dtype=object)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df.hist('elevation', by='city')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"\n",
"fig = sns.histplot(df, x='elevation', hue='city', multiple='stack')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>in_sf</th>\n",
" <th>beds</th>\n",
" <th>bath</th>\n",
" <th>price</th>\n",
" <th>year_built</th>\n",
" <th>sqft</th>\n",
" <th>price_per_sqft</th>\n",
" <th>elevation</th>\n",
" <th>city</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>999000</td>\n",
" <td>1982</td>\n",
" <td>784</td>\n",
" <td>1274</td>\n",
" <td>5</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>775000</td>\n",
" <td>2009</td>\n",
" <td>546</td>\n",
" <td>1419</td>\n",
" <td>6</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>3995000</td>\n",
" <td>1906</td>\n",
" <td>2400</td>\n",
" <td>1665</td>\n",
" <td>10</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>439</th>\n",
" <td>1</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>849000</td>\n",
" <td>1947</td>\n",
" <td>1622</td>\n",
" <td>523</td>\n",
" <td>106</td>\n",
" <td>SF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220</th>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>529000</td>\n",
" <td>1986</td>\n",
" <td>650</td>\n",
" <td>814</td>\n",
" <td>0</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" in_sf beds bath price year_built sqft price_per_sqft elevation \\\n",
"11 0 1.0 1.0 999000 1982 784 1274 5 \n",
"58 0 0.0 1.0 775000 2009 546 1419 6 \n",
"44 0 2.0 2.0 3995000 1906 2400 1665 10 \n",
"439 1 3.0 2.0 849000 1947 1622 523 106 \n",
"220 0 1.0 1.0 529000 1986 650 814 0 \n",
"\n",
" city \n",
"11 NY \n",
"58 NY \n",
"44 NY \n",
"439 SF \n",
"220 NY "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('city')['price'].mean().round(0)\n",
"\n",
"\n",
"df.sample(n=5)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.288585</td>\n",
" <td>-1.823887</td>\n",
" <td>-0.930694</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-2.020028</td>\n",
" <td>0.322731</td>\n",
" <td>1.634198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.551412</td>\n",
" <td>0.966280</td>\n",
" <td>0.689169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.548931</td>\n",
" <td>-0.416653</td>\n",
" <td>0.088240</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-1.180181</td>\n",
" <td>-0.218380</td>\n",
" <td>0.350026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.984890</td>\n",
" <td>-0.620657</td>\n",
" <td>0.218497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>-1.174211</td>\n",
" <td>0.985980</td>\n",
" <td>0.591793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>-0.461553</td>\n",
" <td>2.075740</td>\n",
" <td>0.371126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-1.692228</td>\n",
" <td>1.191046</td>\n",
" <td>0.863126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.354696</td>\n",
" <td>-0.853733</td>\n",
" <td>1.799386</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1.165250</td>\n",
" <td>2.035038</td>\n",
" <td>-0.953814</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>-0.899864</td>\n",
" <td>-0.469766</td>\n",
" <td>0.531577</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>-0.608545</td>\n",
" <td>-0.576878</td>\n",
" <td>0.674811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>-1.193892</td>\n",
" <td>-0.498491</td>\n",
" <td>-2.542653</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>-1.729489</td>\n",
" <td>0.637867</td>\n",
" <td>-1.093362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>-0.994024</td>\n",
" <td>-0.714704</td>\n",
" <td>-0.166540</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.098802</td>\n",
" <td>1.637319</td>\n",
" <td>0.922935</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>1.326211</td>\n",
" <td>0.282339</td>\n",
" <td>1.709445</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>-0.955390</td>\n",
" <td>1.292335</td>\n",
" <td>-0.140798</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.019700</td>\n",
" <td>-2.386171</td>\n",
" <td>-0.452989</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 -0.288585 -1.823887 -0.930694\n",
"1 -2.020028 0.322731 1.634198\n",
"2 -0.551412 0.966280 0.689169\n",
"3 0.548931 -0.416653 0.088240\n",
"4 -1.180181 -0.218380 0.350026\n",
"5 0.984890 -0.620657 0.218497\n",
"6 -1.174211 0.985980 0.591793\n",
"7 -0.461553 2.075740 0.371126\n",
"8 -1.692228 1.191046 0.863126\n",
"9 -0.354696 -0.853733 1.799386\n",
"10 1.165250 2.035038 -0.953814\n",
"11 -0.899864 -0.469766 0.531577\n",
"12 -0.608545 -0.576878 0.674811\n",
"13 -1.193892 -0.498491 -2.542653\n",
"14 -1.729489 0.637867 -1.093362\n",
"15 -0.994024 -0.714704 -0.166540\n",
"16 0.098802 1.637319 0.922935\n",
"17 1.326211 0.282339 1.709445\n",
"18 -0.955390 1.292335 -0.140798\n",
"19 0.019700 -2.386171 -0.452989"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"chart_data = pd.DataFrame(np.random.randn(20, 3), columns=[\"a\", \"b\", \"c\"])\n",
"chart_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|