{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ad7592e7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2459435/230780042.py:2: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n", " data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/Airbnb_Open_Data.csv')\n" ] } ], "source": [ "import pandas as pd\n", "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/Airbnb_Open_Data.csv')" ] }, { "cell_type": "code", "execution_count": 2, "id": "f97916a9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idNAMEhost idhost_identity_verifiedhost nameneighbourhood groupneighbourhoodlatlongcountry...service feeminimum nightsnumber of reviewslast reviewreviews per monthreview rate numbercalculated host listings countavailability 365house_ruleslicense
01001254Clean & quiet apt home by the park80014485718unconfirmedMadalineBrooklynKensington40.64749-73.97237United States...$19310.09.010/19/20210.214.06.0286.0Clean up and treat the home the way you'd like...NaN
11002102Skylit Midtown Castle52335172823verifiedJennaManhattanMidtown40.75362-73.98377United States...$2830.045.05/21/20220.384.02.0228.0Pet friendly but please confirm with me if the...NaN
21002403THE VILLAGE OF HARLEM....NEW YORK !78829239556NaNEliseManhattanHarlem40.80902-73.94190United States...$1243.00.0NaNNaN5.01.0352.0I encourage you to use my kitchen, cooking and...NaN
31002755NaN85098326012unconfirmedGarryBrooklynClinton Hill40.68514-73.95976United States...$7430.0270.07/5/20194.644.01.0322.0NaNNaN
41003689Entire Apt: Spacious Studio/Loft by central park92037596077verifiedLyndonManhattanEast Harlem40.79851-73.94399United States...$4110.09.011/19/20180.103.01.0289.0Please no smoking in the house, porch or on th...NaN
..................................................................
1025946092437Spare room in Williamsburg12312296767verifiedKrikBrooklynWilliamsburg40.70862-73.94651United States...$1691.00.0NaNNaN3.01.0227.0No Smoking No Parties or Events of any kind Pl...NaN
1025956092990Best Location near Columbia U77864383453unconfirmedMifanManhattanMorningside Heights40.80460-73.96545United States...$1671.01.07/6/20150.022.02.0395.0House rules: Guests agree to the following ter...NaN
1025966093542Comfy, bright room in Brooklyn69050334417unconfirmedMeganBrooklynPark Slope40.67505-73.98045United States...$1983.00.0NaNNaN5.01.0342.0NaNNaN
1025976094094Big Studio-One Stop from Midtown11160591270unconfirmedChristopherQueensLong Island City40.74989-73.93777United States...$1092.05.010/11/20150.103.01.0386.0NaNNaN
1025986094647585 sf Luxury Studio68170633372unconfirmedRebeccaManhattanUpper West Side40.76807-73.98342United States...$2061.00.0NaNNaN3.01.069.0NaNNaN
\n", "

102599 rows × 26 columns

\n", "
" ], "text/plain": [ " id NAME \n", "0 1001254 Clean & quiet apt home by the park \\\n", "1 1002102 Skylit Midtown Castle \n", "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! \n", "3 1002755 NaN \n", "4 1003689 Entire Apt: Spacious Studio/Loft by central park \n", "... ... ... \n", "102594 6092437 Spare room in Williamsburg \n", "102595 6092990 Best Location near Columbia U \n", "102596 6093542 Comfy, bright room in Brooklyn \n", "102597 6094094 Big Studio-One Stop from Midtown \n", "102598 6094647 585 sf Luxury Studio \n", "\n", " host id host_identity_verified host name neighbourhood group \n", "0 80014485718 unconfirmed Madaline Brooklyn \\\n", "1 52335172823 verified Jenna Manhattan \n", "2 78829239556 NaN Elise Manhattan \n", "3 85098326012 unconfirmed Garry Brooklyn \n", "4 92037596077 verified Lyndon Manhattan \n", "... ... ... ... ... \n", "102594 12312296767 verified Krik Brooklyn \n", "102595 77864383453 unconfirmed Mifan Manhattan \n", "102596 69050334417 unconfirmed Megan Brooklyn \n", "102597 11160591270 unconfirmed Christopher Queens \n", "102598 68170633372 unconfirmed Rebecca Manhattan \n", "\n", " neighbourhood lat long country ... \n", "0 Kensington 40.64749 -73.97237 United States ... \\\n", "1 Midtown 40.75362 -73.98377 United States ... \n", "2 Harlem 40.80902 -73.94190 United States ... \n", "3 Clinton Hill 40.68514 -73.95976 United States ... \n", "4 East Harlem 40.79851 -73.94399 United States ... \n", "... ... ... ... ... ... \n", "102594 Williamsburg 40.70862 -73.94651 United States ... \n", "102595 Morningside Heights 40.80460 -73.96545 United States ... \n", "102596 Park Slope 40.67505 -73.98045 United States ... \n", "102597 Long Island City 40.74989 -73.93777 United States ... \n", "102598 Upper West Side 40.76807 -73.98342 United States ... \n", "\n", " service fee minimum nights number of reviews last review \n", "0 $193 10.0 9.0 10/19/2021 \\\n", "1 $28 30.0 45.0 5/21/2022 \n", "2 $124 3.0 0.0 NaN \n", "3 $74 30.0 270.0 7/5/2019 \n", "4 $41 10.0 9.0 11/19/2018 \n", "... ... ... ... ... \n", "102594 $169 1.0 0.0 NaN \n", "102595 $167 1.0 1.0 7/6/2015 \n", "102596 $198 3.0 0.0 NaN \n", "102597 $109 2.0 5.0 10/11/2015 \n", "102598 $206 1.0 0.0 NaN \n", "\n", " reviews per month review rate number calculated host listings count \n", "0 0.21 4.0 6.0 \\\n", "1 0.38 4.0 2.0 \n", "2 NaN 5.0 1.0 \n", "3 4.64 4.0 1.0 \n", "4 0.10 3.0 1.0 \n", "... ... ... ... \n", "102594 NaN 3.0 1.0 \n", "102595 0.02 2.0 2.0 \n", "102596 NaN 5.0 1.0 \n", "102597 0.10 3.0 1.0 \n", "102598 NaN 3.0 1.0 \n", "\n", " availability 365 house_rules \n", "0 286.0 Clean up and treat the home the way you'd like... \\\n", "1 228.0 Pet friendly but please confirm with me if the... \n", "2 352.0 I encourage you to use my kitchen, cooking and... \n", "3 322.0 NaN \n", "4 289.0 Please no smoking in the house, porch or on th... \n", "... ... ... \n", "102594 227.0 No Smoking No Parties or Events of any kind Pl... \n", "102595 395.0 House rules: Guests agree to the following ter... \n", "102596 342.0 NaN \n", "102597 386.0 NaN \n", "102598 69.0 NaN \n", "\n", " license \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "102594 NaN \n", "102595 NaN \n", "102596 NaN \n", "102597 NaN \n", "102598 NaN \n", "\n", "[102599 rows x 26 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 3, "id": "e21af5d1", "metadata": {}, "outputs": [], "source": [ "flight = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')" ] }, { "cell_type": "code", "execution_count": 4, "id": "966feef9", "metadata": {}, "outputs": [], "source": [ "flight = flight.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 5, "id": "3f4fe062", "metadata": {}, "outputs": [], "source": [ "data_dict = data.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 6, "id": "33213ac0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[2, '2022-04-04', '15:14', '16:36', 251.0, 'Durango', 'Denver', 100]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flight['data'][2]" ] }, { "cell_type": "code", "execution_count": 8, "id": "9cef6161", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nan\n" ] } ], "source": [ "print(str(data_dict['data'][3][24]))" ] }, { "cell_type": "code", "execution_count": 9, "id": "c5f81f43", "metadata": {}, "outputs": [], "source": [ "city_set = set()\n", "cnt = 0\n", "for unit in data_dict['data']:\n", " if str(unit[24]) != 'nan':\n", " cnt += 1" ] }, { "cell_type": "code", "execution_count": 10, "id": "533a5aa6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50468" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 11, "id": "bfce5f56", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "set()" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "city_set" ] }, { "cell_type": "code", "execution_count": 12, "id": "230b760c", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Sample larger than population or is negative", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrandom\u001b[39;00m\n\u001b[1;32m 2\u001b[0m city_set \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(city_set)\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcity_set\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m)\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/random.py:449\u001b[0m, in \u001b[0;36mRandom.sample\u001b[0;34m(self, population, k, counts)\u001b[0m\n\u001b[1;32m 447\u001b[0m randbelow \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m k \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m n:\n\u001b[0;32m--> 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSample larger than population or is negative\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 450\u001b[0m result \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m k\n\u001b[1;32m 451\u001b[0m setsize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m21\u001b[39m \u001b[38;5;66;03m# size of a small set minus size of an empty list\u001b[39;00m\n", "\u001b[0;31mValueError\u001b[0m: Sample larger than population or is negative" ] } ], "source": [ "import random\n", "city_set = list(city_set)\n", "print(random.sample(city_set,1))" ] }, { "cell_type": "code", "execution_count": 12, "id": "61eddd5f", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'dict' object has no attribute 'to_dict'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data_dict \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m(orient \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msplit\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'to_dict'" ] } ], "source": [ "data_dict = data.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 35, "id": "3292c450", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Unnamed: 0',\n", " 'NAME',\n", " 'room type',\n", " 'price',\n", " 'minimum nights',\n", " 'review rate number',\n", " 'house_rules',\n", " 'maximum occupancy',\n", " 'city']" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dict['columns']" ] }, { "cell_type": "code", "execution_count": 38, "id": "cfaa21d9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5047" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data_dict['data'])" ] }, { "cell_type": "code", "execution_count": 36, "id": "2980362d", "metadata": {}, "outputs": [], "source": [ "type_set = set()\n", "for unit in data_dict['data']:\n", " type_set.add(unit[2])" ] }, { "cell_type": "code", "execution_count": 37, "id": "f5e36fbb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Entire home/apt', 'Private room', 'Shared room'}" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type_set" ] }, { "cell_type": "code", "execution_count": 15, "id": "bf1231c4", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'data_dict' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata_dict\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;241m147\u001b[39m]\n", "\u001b[0;31mNameError\u001b[0m: name 'data_dict' is not defined" ] } ], "source": [ "data_dict['data'][147]" ] }, { "cell_type": "code", "execution_count": 14, "id": "f993b894", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "set()" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type_set" ] }, { "cell_type": "code", "execution_count": 10, "id": "916e9470", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 NAME\n", "7 lat\n", "8 long\n", "13 room type\n", "15 price\n", "17 minimum nights\n", "21 review rate number\n", "24 house_rules\n" ] } ], "source": [ "for idx, unit in enumerate(data_dict['columns']):\n", " if unit in ['NAME','lat', 'long', 'room type', 'price','minimum nights','review rate number','house_rules']:\n", " print(idx,unit)" ] }, { "cell_type": "code", "execution_count": 73, "id": "1213484d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "51764c1a3739416289913ec613816cc7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3241846/557604333.py:23: DeprecationWarning: Sampling from a set deprecated\n", "since Python 3.9 and will be removed in a subsequent version.\n", " tmp_dict[\"city\"] = random.sample(city_set,1)[0]\n" ] }, { "ename": "ValueError", "evalue": "Sample larger than population or is negative", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[73], line 23\u001b[0m\n\u001b[1;32m 21\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreview rate number\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m21\u001b[39m]\n\u001b[1;32m 22\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhouse_rules\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m24\u001b[39m]\n\u001b[0;32m---> 23\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcity\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcity_set\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 24\u001b[0m new_data\u001b[38;5;241m.\u001b[39mappend(tmp_dict)\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/random.py:449\u001b[0m, in \u001b[0;36mRandom.sample\u001b[0;34m(self, population, k, counts)\u001b[0m\n\u001b[1;32m 447\u001b[0m randbelow \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m k \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m n:\n\u001b[0;32m--> 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSample larger than population or is negative\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 450\u001b[0m result \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m k\n\u001b[1;32m 451\u001b[0m setsize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m21\u001b[39m \u001b[38;5;66;03m# size of a small set minus size of an empty list\u001b[39;00m\n", "\u001b[0;31mValueError\u001b[0m: Sample larger than population or is negative" ] } ], "source": [ "from tqdm.autonotebook import tqdm\n", "import random\n", "new_data = []\n", "for idx, unit in tqdm(enumerate(data_dict['data'])):\n", " tmp_dict = {k:\"\" for k in ['NAME','room type', 'price','minimum nights','review rate number','house_rules']}\n", " tmp_dict[\"NAME\"] = unit[1]\n", " tmp_dict[\"room type\"] = unit[13]\n", " if unit[13] == \"Shared room\":\n", " tmp_dict[\"maximum occupancy\"] = 1\n", " elif unit[13] == \"Hotel room\":\n", " tmp_dict[\"maximum occupancy\"] = random.randint(1, 2)\n", " elif unit[13] == \"Private room\":\n", " tmp_dict[\"maximum occupancy\"] = random.randint(1, 2)\n", " elif unit[13] == \"Entire home/apt\":\n", " try:\n", " tmp_dict[\"maximum occupancy\"] = random.randint(2, max(3,eval(unit[15].replace(\"$\",\"\").replace(\",\",\"\"))//100))\n", " except:\n", " tmp_dict[\"maximum occupancy\"] = random.randint(2, max(3,unit[15]//100))\n", " tmp_dict[\"price\"] = unit[15].replace(\"$\",\"\").replace(\",\",\"\")\n", " tmp_dict[\"minimum nights\"] = unit[17]\n", " tmp_dict[\"review rate number\"] = unit[21]\n", " tmp_dict[\"house_rules\"] = unit[24]\n", " tmp_dict[\"city\"] = random.sample(city_set,1)[0]\n", " new_data.append(tmp_dict)" ] }, { "cell_type": "code", "execution_count": 20, "id": "fd3e8257", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "102599" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(new_data)" ] }, { "cell_type": "code", "execution_count": 21, "id": "bfb243c0", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(new_data)" ] }, { "cell_type": "code", "execution_count": 23, "id": "af7e3411", "metadata": {}, "outputs": [], "source": [ "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')" ] }, { "cell_type": "code", "execution_count": 22, "id": "71d21fea", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEroom typepriceminimum nightsreview rate numberhouse_rulesmaximum occupancycity
0Clean & quiet apt home by the parkPrivate room$96610.04.0Clean up and treat the home the way you'd like...1Des Moines
1Skylit Midtown CastleEntire home/apt$14230.04.0Pet friendly but please confirm with me if the...2Wilmington
2THE VILLAGE OF HARLEM....NEW YORK !Private room$6203.05.0I encourage you to use my kitchen, cooking and...2St. George
3NaNEntire home/apt$36830.04.0NaN2Kalamazoo
4Entire Apt: Spacious Studio/Loft by central parkEntire home/apt$20410.03.0Please no smoking in the house, porch or on th...3Cheyenne
...........................
102594Spare room in WilliamsburgPrivate room$8441.03.0No Smoking No Parties or Events of any kind Pl...1White Plains
102595Best Location near Columbia UPrivate room$8371.02.0House rules: Guests agree to the following ter...2Mosinee
102596Comfy, bright room in BrooklynPrivate room$9883.05.0NaN2Amarillo
102597Big Studio-One Stop from MidtownEntire home/apt$5462.03.0NaN4Binghamton
102598585 sf Luxury StudioEntire home/apt$1,0321.03.0NaN7Flint
\n", "

102599 rows × 8 columns

\n", "
" ], "text/plain": [ " NAME room type \n", "0 Clean & quiet apt home by the park Private room \\\n", "1 Skylit Midtown Castle Entire home/apt \n", "2 THE VILLAGE OF HARLEM....NEW YORK ! Private room \n", "3 NaN Entire home/apt \n", "4 Entire Apt: Spacious Studio/Loft by central park Entire home/apt \n", "... ... ... \n", "102594 Spare room in Williamsburg Private room \n", "102595 Best Location near Columbia U Private room \n", "102596 Comfy, bright room in Brooklyn Private room \n", "102597 Big Studio-One Stop from Midtown Entire home/apt \n", "102598 585 sf Luxury Studio Entire home/apt \n", "\n", " price minimum nights review rate number \n", "0 $966 10.0 4.0 \\\n", "1 $142 30.0 4.0 \n", "2 $620 3.0 5.0 \n", "3 $368 30.0 4.0 \n", "4 $204 10.0 3.0 \n", "... ... ... ... \n", "102594 $844 1.0 3.0 \n", "102595 $837 1.0 2.0 \n", "102596 $988 3.0 5.0 \n", "102597 $546 2.0 3.0 \n", "102598 $1,032 1.0 3.0 \n", "\n", " house_rules maximum occupancy \n", "0 Clean up and treat the home the way you'd like... 1 \\\n", "1 Pet friendly but please confirm with me if the... 2 \n", "2 I encourage you to use my kitchen, cooking and... 2 \n", "3 NaN 2 \n", "4 Please no smoking in the house, porch or on th... 3 \n", "... ... ... \n", "102594 No Smoking No Parties or Events of any kind Pl... 1 \n", "102595 House rules: Guests agree to the following ter... 2 \n", "102596 NaN 2 \n", "102597 NaN 4 \n", "102598 NaN 7 \n", "\n", " city \n", "0 Des Moines \n", "1 Wilmington \n", "2 St. George \n", "3 Kalamazoo \n", "4 Cheyenne \n", "... ... \n", "102594 White Plains \n", "102595 Mosinee \n", "102596 Amarillo \n", "102597 Binghamton \n", "102598 Flint \n", "\n", "[102599 rows x 8 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 50, "id": "0ec56283", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')" ] }, { "cell_type": "code", "execution_count": 52, "id": "5dc27048", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0NAMEroom typepriceminimum nightsreview rate numberhouse_rulesmaximum occupancycity
00Clean & quiet apt home by the parkPrivate room$96610.04.0Clean up and treat the home the way you'd like...1Des Moines
11Skylit Midtown CastleEntire home/apt$14230.04.0Pet friendly but please confirm with me if the...2Wilmington
22THE VILLAGE OF HARLEM....NEW YORK !Private room$6203.05.0I encourage you to use my kitchen, cooking and...2St. George
33NaNEntire home/apt$36830.04.0NaN2Kalamazoo
44Entire Apt: Spacious Studio/Loft by central parkEntire home/apt$20410.03.0Please no smoking in the house, porch or on th...3Cheyenne
..............................
102594102594Spare room in WilliamsburgPrivate room$8441.03.0No Smoking No Parties or Events of any kind Pl...1White Plains
102595102595Best Location near Columbia UPrivate room$8371.02.0House rules: Guests agree to the following ter...2Mosinee
102596102596Comfy, bright room in BrooklynPrivate room$9883.05.0NaN2Amarillo
102597102597Big Studio-One Stop from MidtownEntire home/apt$5462.03.0NaN4Binghamton
102598102598585 sf Luxury StudioEntire home/apt$1,0321.03.0NaN7Flint
\n", "

102599 rows × 9 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 NAME \n", "0 0 Clean & quiet apt home by the park \\\n", "1 1 Skylit Midtown Castle \n", "2 2 THE VILLAGE OF HARLEM....NEW YORK ! \n", "3 3 NaN \n", "4 4 Entire Apt: Spacious Studio/Loft by central park \n", "... ... ... \n", "102594 102594 Spare room in Williamsburg \n", "102595 102595 Best Location near Columbia U \n", "102596 102596 Comfy, bright room in Brooklyn \n", "102597 102597 Big Studio-One Stop from Midtown \n", "102598 102598 585 sf Luxury Studio \n", "\n", " room type price minimum nights review rate number \n", "0 Private room $966 10.0 4.0 \\\n", "1 Entire home/apt $142 30.0 4.0 \n", "2 Private room $620 3.0 5.0 \n", "3 Entire home/apt $368 30.0 4.0 \n", "4 Entire home/apt $204 10.0 3.0 \n", "... ... ... ... ... \n", "102594 Private room $844 1.0 3.0 \n", "102595 Private room $837 1.0 2.0 \n", "102596 Private room $988 3.0 5.0 \n", "102597 Entire home/apt $546 2.0 3.0 \n", "102598 Entire home/apt $1,032 1.0 3.0 \n", "\n", " house_rules maximum occupancy \n", "0 Clean up and treat the home the way you'd like... 1 \\\n", "1 Pet friendly but please confirm with me if the... 2 \n", "2 I encourage you to use my kitchen, cooking and... 2 \n", "3 NaN 2 \n", "4 Please no smoking in the house, porch or on th... 3 \n", "... ... ... \n", "102594 No Smoking No Parties or Events of any kind Pl... 1 \n", "102595 House rules: Guests agree to the following ter... 2 \n", "102596 NaN 2 \n", "102597 NaN 4 \n", "102598 NaN 7 \n", "\n", " city \n", "0 Des Moines \n", "1 Wilmington \n", "2 St. George \n", "3 Kalamazoo \n", "4 Cheyenne \n", "... ... \n", "102594 White Plains \n", "102595 Mosinee \n", "102596 Amarillo \n", "102597 Binghamton \n", "102598 Flint \n", "\n", "[102599 rows x 9 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 63, "id": "bebb9c93", "metadata": {}, "outputs": [], "source": [ "filtered_data = data[data.iloc[:, -3].notna()]" ] }, { "cell_type": "code", "execution_count": 64, "id": "bd010fc9", "metadata": {}, "outputs": [], "source": [ "dict_representation = filtered_data.to_dict(orient='split')" ] }, { "cell_type": "code", "execution_count": 71, "id": "e84db5c4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50468" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(dict_representation['data'])" ] }, { "cell_type": "code", "execution_count": 67, "id": "31eaadf3", "metadata": {}, "outputs": [], "source": [ "sample_df = filtered_data.sample(frac=0.1)" ] }, { "cell_type": "code", "execution_count": 69, "id": "33998ec6", "metadata": {}, "outputs": [], "source": [ "sample_df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')" ] }, { "cell_type": "code", "execution_count": 72, "id": "25396015", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5047" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(sample_df)" ] }, { "cell_type": "code", "execution_count": 3, "id": "17d054b5", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')" ] }, { "cell_type": "code", "execution_count": 4, "id": "64db8d6c", "metadata": {}, "outputs": [], "source": [ "data_dict = data.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 21, "id": "b32b2f0c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 Unnamed: 0.1\n", "1 Unnamed: 0\n", "2 NAME\n", "3 room type\n", "4 price\n", "5 minimum nights\n", "6 review rate number\n", "7 house_rules\n", "8 maximum occupancy\n", "9 city\n" ] } ], "source": [ "for idx, unit in enumerate(data_dict['columns']):\n", " print(idx,unit)" ] }, { "cell_type": "code", "execution_count": 8, "id": "fe415c1c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0,\n", " 'Beautiful room upper manhttn.',\n", " 'Private room',\n", " 131.0,\n", " 1.0,\n", " 2.0,\n", " 'No smoking. No pets. ',\n", " 1,\n", " 'Christiansted']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dict['data'][0]" ] }, { "cell_type": "code", "execution_count": 40, "id": "38cb5c5a", "metadata": {}, "outputs": [], "source": [ "import random\n", "new_data = []\n", "for idx, unit in enumerate(data_dict['data']):\n", " tmp_dict = {k:j for k,j in zip(['NAME','room type', 'price','minimum nights','review rate number','house_rules','maximum occupancy','city'],unit[1:])}\n", " if type(unit[4]) == str:\n", " tmp_dict[\"price\"] = eval(unit[4].replace(\"$\",\"\").replace(\",\",\"\"))\n", " house_rules_number = random.choice([0,1,1,1,2,2,3])\n", " tmp_dict['house_rules'] = \" & \".join(x for x in random.sample([\"No parties\",\"No smoking\",\"No children under 10\",\"No pets\",\"No visitors\"],house_rules_number))\n", " tmp_dict['city'] = tmp_dict['city'].split('/')[0]\n", " new_data.append(tmp_dict)" ] }, { "cell_type": "code", "execution_count": 41, "id": "ae3d551e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'NAME': 'BIG room with bath & balcony in BK!',\n", " 'room type': 'Private room',\n", " 'price': 1123.0,\n", " 'minimum nights': 1.0,\n", " 'review rate number': 4.0,\n", " 'house_rules': 'No parties',\n", " 'maximum occupancy': 2,\n", " 'city': 'Louisville'}" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data[2]" ] }, { "cell_type": "code", "execution_count": 42, "id": "6fac856c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "----------\n", "No pets & No visitors & No smoking\n", "----------\n", "No parties & No visitors\n", "----------\n", "No children under 10 & No pets & No smoking\n", "----------\n", "No parties & No pets & No visitors\n", "----------\n", "No pets & No children under 10\n", "----------\n", "No children under 10 & No parties & No pets\n", "----------\n", "No visitors\n", "----------\n", "No parties & No children under 10\n", "----------\n", "No children under 10 & No smoking & No visitors\n", "----------\n", "No children under 10 & No parties & No smoking\n", "----------\n", "No pets & No smoking & No children under 10\n", "----------\n", "No pets & No visitors\n", "----------\n", "No visitors & No pets\n", "----------\n", "No children under 10 & No smoking & No pets\n", "----------\n", "No smoking & No parties & No pets\n", "----------\n", "No visitors & No children under 10 & No parties\n", "----------\n", "No parties & No children under 10 & No smoking\n", "----------\n", "No visitors & No children under 10 & No smoking\n", "----------\n", "No pets & No parties\n", "----------\n", "No smoking & No parties\n", "----------\n", "No smoking & No children under 10\n", "----------\n", "No parties & No children under 10 & No visitors\n", "----------\n", "No children under 10 & No smoking\n", "----------\n", "No visitors & No pets & No smoking\n", "----------\n", "No pets\n", "----------\n", "No children under 10 & No pets\n", "----------\n", "No visitors & No smoking\n", "----------\n", "No smoking\n", "----------\n", "No parties & No smoking & No children under 10\n", "----------\n", "No parties & No smoking\n", "----------\n", "No smoking & No visitors & No parties\n", "----------\n", "No pets & No smoking\n", "----------\n", "No pets & No smoking & No parties\n", "----------\n", "No smoking & No children under 10 & No visitors\n", "----------\n", "No parties & No smoking & No visitors\n", "----------\n", "No visitors & No parties\n", "----------\n", "No visitors & No children under 10\n", "----------\n", "No parties & No smoking & No pets\n", "----------\n", "No children under 10 & No pets & No visitors\n", "----------\n", "No smoking & No pets & No parties\n", "----------\n", "No children under 10 & No smoking & No parties\n", "----------\n", "No visitors & No children under 10 & No pets\n", "----------\n", "No children under 10 & No parties\n", "----------\n", "No pets & No parties & No visitors\n", "----------\n", "No children under 10 & No visitors & No parties\n", "----------\n", "No parties & No pets\n", "----------\n", "No visitors & No parties & No pets\n", "----------\n", "No smoking & No pets & No visitors\n", "----------\n", "No smoking & No pets\n", "----------\n", "No visitors & No smoking & No children under 10\n", "----------\n", "No pets & No children under 10 & No parties\n", "----------\n", "No visitors & No pets & No children under 10\n", "----------\n", "No pets & No children under 10 & No smoking\n", "----------\n", "No parties & No visitors & No children under 10\n", "----------\n", "No pets & No smoking & No visitors\n", "----------\n", "No pets & No parties & No smoking\n", "----------\n", "No parties & No visitors & No smoking\n", "----------\n", "No pets & No visitors & No children under 10\n", "----------\n", "No parties & No visitors & No pets\n", "----------\n", "No children under 10\n", "----------\n", "No children under 10 & No pets & No parties\n", "----------\n", "No children under 10 & No visitors & No smoking\n", "----------\n", "No smoking & No children under 10 & No parties\n", "----------\n", "No pets & No parties & No children under 10\n", "----------\n", "No children under 10 & No visitors & No pets\n", "----------\n", "No parties & No pets & No smoking\n", "----------\n", "No pets & No children under 10 & No visitors\n", "----------\n", "No parties & No children under 10 & No pets\n", "----------\n", "No parties & No pets & No children under 10\n", "----------\n", "No smoking & No parties & No visitors\n", "----------\n", "No parties\n", "----------\n", "No visitors & No pets & No parties\n", "----------\n", "No children under 10 & No visitors\n", "----------\n", "No smoking & No children under 10 & No pets\n", "----------\n", "No smoking & No parties & No children under 10\n", "----------\n", "No visitors & No smoking & No parties\n", "----------\n", "No pets & No visitors & No parties\n", "----------\n", "No smoking & No visitors\n", "----------\n", "No smoking & No visitors & No children under 10\n", "----------\n", "No visitors & No smoking & No pets\n", "----------\n", "No smoking & No visitors & No pets\n", "----------\n", "No visitors & No parties & No smoking\n", "----------\n", "No smoking & No pets & No children under 10\n", "----------\n", "No children under 10 & No parties & No visitors\n", "----------\n", "No visitors & No parties & No children under 10\n", "----------\n" ] } ], "source": [ "maximum_occupancy_set = set()\n", "for unit in new_data:\n", " maximum_occupancy_set.add(unit['house_rules'])\n", "for unit in maximum_occupancy_set:\n", " print(unit)\n", " print(\"----------\")" ] }, { "cell_type": "code", "execution_count": 45, "id": "8056052a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEroom typepriceminimum nightsreview rate numberhouse_rulesmaximum occupancycity
0Beautiful room upper manhttn.Private room131.01.02.0No smoking1Christiansted
1Roomy and Comftable RoomPrivate room548.010.05.0No children under 10 & No parties2Laredo
2BIG room with bath & balcony in BK!Private room1123.01.04.0No parties2Louisville
34A-Entire home/apt225.030.04.0No pets3Greensboro
4Nice and Comfortable Private RoomPrivate room761.02.01.0No smoking & No parties2Cape Girardeau
...........................
5042Amazing LOFT in Prime WilliamsburgPrivate room249.05.05.0No pets2Trenton
5043Private Queen Bedroom in BrooklynPrivate room1032.01.01.0No pets1Des Moines
5044Bushwick / Bed Sty RetreatPrivate room546.02.04.0No children under 10 & No visitors & No smoking2Scottsbluff
5045Charming Mid-Century StudioEntire home/apt1115.02.05.0No pets & No children under 107Butte
50463 Bed/ 2 Bath Full Apt. BK HeightsEntire home/apt396.02.01.0No smoking3Norfolk
\n", "

5047 rows × 8 columns

\n", "
" ], "text/plain": [ " NAME room type price \n", "0 Beautiful room upper manhttn. Private room 131.0 \\\n", "1 Roomy and Comftable Room Private room 548.0 \n", "2 BIG room with bath & balcony in BK! Private room 1123.0 \n", "3 4A- Entire home/apt 225.0 \n", "4 Nice and Comfortable Private Room Private room 761.0 \n", "... ... ... ... \n", "5042 Amazing LOFT in Prime Williamsburg Private room 249.0 \n", "5043 Private Queen Bedroom in Brooklyn Private room 1032.0 \n", "5044 Bushwick / Bed Sty Retreat Private room 546.0 \n", "5045 Charming Mid-Century Studio Entire home/apt 1115.0 \n", "5046 3 Bed/ 2 Bath Full Apt. BK Heights Entire home/apt 396.0 \n", "\n", " minimum nights review rate number \n", "0 1.0 2.0 \\\n", "1 10.0 5.0 \n", "2 1.0 4.0 \n", "3 30.0 4.0 \n", "4 2.0 1.0 \n", "... ... ... \n", "5042 5.0 5.0 \n", "5043 1.0 1.0 \n", "5044 2.0 4.0 \n", "5045 2.0 5.0 \n", "5046 2.0 1.0 \n", "\n", " house_rules maximum occupancy \n", "0 No smoking 1 \\\n", "1 No children under 10 & No parties 2 \n", "2 No parties 2 \n", "3 No pets 3 \n", "4 No smoking & No parties 2 \n", "... ... ... \n", "5042 No pets 2 \n", "5043 No pets 1 \n", "5044 No children under 10 & No visitors & No smoking 2 \n", "5045 No pets & No children under 10 7 \n", "5046 No smoking 3 \n", "\n", " city \n", "0 Christiansted \n", "1 Laredo \n", "2 Louisville \n", "3 Greensboro \n", "4 Cape Girardeau \n", "... ... \n", "5042 Trenton \n", "5043 Des Moines \n", "5044 Scottsbluff \n", "5045 Butte \n", "5046 Norfolk \n", "\n", "[5047 rows x 8 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 44, "id": "54423e0d", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(new_data)\n", "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')" ] }, { "cell_type": "code", "execution_count": null, "id": "5767aa80", "metadata": {}, "outputs": [], "source": [ "df.rename(columns={'old_name1': 'new_name1', 'old_name2': 'new_name2'}, inplace=True)\n", "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }