hsaest commited on
Commit
23868ad
1 Parent(s): 4b07d40

Delete tools/accommodations/test.ipynb

Browse files
Files changed (1) hide show
  1. tools/accommodations/test.ipynb +0 -2037
tools/accommodations/test.ipynb DELETED
@@ -1,2037 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "ad7592e7",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stderr",
11
- "output_type": "stream",
12
- "text": [
13
- "/tmp/ipykernel_2459435/230780042.py:2: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
14
- " data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/Airbnb_Open_Data.csv')\n"
15
- ]
16
- }
17
- ],
18
- "source": [
19
- "import pandas as pd\n",
20
- "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/Airbnb_Open_Data.csv')"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 2,
26
- "id": "f97916a9",
27
- "metadata": {},
28
- "outputs": [
29
- {
30
- "data": {
31
- "text/html": [
32
- "<div>\n",
33
- "<style scoped>\n",
34
- " .dataframe tbody tr th:only-of-type {\n",
35
- " vertical-align: middle;\n",
36
- " }\n",
37
- "\n",
38
- " .dataframe tbody tr th {\n",
39
- " vertical-align: top;\n",
40
- " }\n",
41
- "\n",
42
- " .dataframe thead th {\n",
43
- " text-align: right;\n",
44
- " }\n",
45
- "</style>\n",
46
- "<table border=\"1\" class=\"dataframe\">\n",
47
- " <thead>\n",
48
- " <tr style=\"text-align: right;\">\n",
49
- " <th></th>\n",
50
- " <th>id</th>\n",
51
- " <th>NAME</th>\n",
52
- " <th>host id</th>\n",
53
- " <th>host_identity_verified</th>\n",
54
- " <th>host name</th>\n",
55
- " <th>neighbourhood group</th>\n",
56
- " <th>neighbourhood</th>\n",
57
- " <th>lat</th>\n",
58
- " <th>long</th>\n",
59
- " <th>country</th>\n",
60
- " <th>...</th>\n",
61
- " <th>service fee</th>\n",
62
- " <th>minimum nights</th>\n",
63
- " <th>number of reviews</th>\n",
64
- " <th>last review</th>\n",
65
- " <th>reviews per month</th>\n",
66
- " <th>review rate number</th>\n",
67
- " <th>calculated host listings count</th>\n",
68
- " <th>availability 365</th>\n",
69
- " <th>house_rules</th>\n",
70
- " <th>license</th>\n",
71
- " </tr>\n",
72
- " </thead>\n",
73
- " <tbody>\n",
74
- " <tr>\n",
75
- " <th>0</th>\n",
76
- " <td>1001254</td>\n",
77
- " <td>Clean &amp; quiet apt home by the park</td>\n",
78
- " <td>80014485718</td>\n",
79
- " <td>unconfirmed</td>\n",
80
- " <td>Madaline</td>\n",
81
- " <td>Brooklyn</td>\n",
82
- " <td>Kensington</td>\n",
83
- " <td>40.64749</td>\n",
84
- " <td>-73.97237</td>\n",
85
- " <td>United States</td>\n",
86
- " <td>...</td>\n",
87
- " <td>$193</td>\n",
88
- " <td>10.0</td>\n",
89
- " <td>9.0</td>\n",
90
- " <td>10/19/2021</td>\n",
91
- " <td>0.21</td>\n",
92
- " <td>4.0</td>\n",
93
- " <td>6.0</td>\n",
94
- " <td>286.0</td>\n",
95
- " <td>Clean up and treat the home the way you'd like...</td>\n",
96
- " <td>NaN</td>\n",
97
- " </tr>\n",
98
- " <tr>\n",
99
- " <th>1</th>\n",
100
- " <td>1002102</td>\n",
101
- " <td>Skylit Midtown Castle</td>\n",
102
- " <td>52335172823</td>\n",
103
- " <td>verified</td>\n",
104
- " <td>Jenna</td>\n",
105
- " <td>Manhattan</td>\n",
106
- " <td>Midtown</td>\n",
107
- " <td>40.75362</td>\n",
108
- " <td>-73.98377</td>\n",
109
- " <td>United States</td>\n",
110
- " <td>...</td>\n",
111
- " <td>$28</td>\n",
112
- " <td>30.0</td>\n",
113
- " <td>45.0</td>\n",
114
- " <td>5/21/2022</td>\n",
115
- " <td>0.38</td>\n",
116
- " <td>4.0</td>\n",
117
- " <td>2.0</td>\n",
118
- " <td>228.0</td>\n",
119
- " <td>Pet friendly but please confirm with me if the...</td>\n",
120
- " <td>NaN</td>\n",
121
- " </tr>\n",
122
- " <tr>\n",
123
- " <th>2</th>\n",
124
- " <td>1002403</td>\n",
125
- " <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
126
- " <td>78829239556</td>\n",
127
- " <td>NaN</td>\n",
128
- " <td>Elise</td>\n",
129
- " <td>Manhattan</td>\n",
130
- " <td>Harlem</td>\n",
131
- " <td>40.80902</td>\n",
132
- " <td>-73.94190</td>\n",
133
- " <td>United States</td>\n",
134
- " <td>...</td>\n",
135
- " <td>$124</td>\n",
136
- " <td>3.0</td>\n",
137
- " <td>0.0</td>\n",
138
- " <td>NaN</td>\n",
139
- " <td>NaN</td>\n",
140
- " <td>5.0</td>\n",
141
- " <td>1.0</td>\n",
142
- " <td>352.0</td>\n",
143
- " <td>I encourage you to use my kitchen, cooking and...</td>\n",
144
- " <td>NaN</td>\n",
145
- " </tr>\n",
146
- " <tr>\n",
147
- " <th>3</th>\n",
148
- " <td>1002755</td>\n",
149
- " <td>NaN</td>\n",
150
- " <td>85098326012</td>\n",
151
- " <td>unconfirmed</td>\n",
152
- " <td>Garry</td>\n",
153
- " <td>Brooklyn</td>\n",
154
- " <td>Clinton Hill</td>\n",
155
- " <td>40.68514</td>\n",
156
- " <td>-73.95976</td>\n",
157
- " <td>United States</td>\n",
158
- " <td>...</td>\n",
159
- " <td>$74</td>\n",
160
- " <td>30.0</td>\n",
161
- " <td>270.0</td>\n",
162
- " <td>7/5/2019</td>\n",
163
- " <td>4.64</td>\n",
164
- " <td>4.0</td>\n",
165
- " <td>1.0</td>\n",
166
- " <td>322.0</td>\n",
167
- " <td>NaN</td>\n",
168
- " <td>NaN</td>\n",
169
- " </tr>\n",
170
- " <tr>\n",
171
- " <th>4</th>\n",
172
- " <td>1003689</td>\n",
173
- " <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
174
- " <td>92037596077</td>\n",
175
- " <td>verified</td>\n",
176
- " <td>Lyndon</td>\n",
177
- " <td>Manhattan</td>\n",
178
- " <td>East Harlem</td>\n",
179
- " <td>40.79851</td>\n",
180
- " <td>-73.94399</td>\n",
181
- " <td>United States</td>\n",
182
- " <td>...</td>\n",
183
- " <td>$41</td>\n",
184
- " <td>10.0</td>\n",
185
- " <td>9.0</td>\n",
186
- " <td>11/19/2018</td>\n",
187
- " <td>0.10</td>\n",
188
- " <td>3.0</td>\n",
189
- " <td>1.0</td>\n",
190
- " <td>289.0</td>\n",
191
- " <td>Please no smoking in the house, porch or on th...</td>\n",
192
- " <td>NaN</td>\n",
193
- " </tr>\n",
194
- " <tr>\n",
195
- " <th>...</th>\n",
196
- " <td>...</td>\n",
197
- " <td>...</td>\n",
198
- " <td>...</td>\n",
199
- " <td>...</td>\n",
200
- " <td>...</td>\n",
201
- " <td>...</td>\n",
202
- " <td>...</td>\n",
203
- " <td>...</td>\n",
204
- " <td>...</td>\n",
205
- " <td>...</td>\n",
206
- " <td>...</td>\n",
207
- " <td>...</td>\n",
208
- " <td>...</td>\n",
209
- " <td>...</td>\n",
210
- " <td>...</td>\n",
211
- " <td>...</td>\n",
212
- " <td>...</td>\n",
213
- " <td>...</td>\n",
214
- " <td>...</td>\n",
215
- " <td>...</td>\n",
216
- " <td>...</td>\n",
217
- " </tr>\n",
218
- " <tr>\n",
219
- " <th>102594</th>\n",
220
- " <td>6092437</td>\n",
221
- " <td>Spare room in Williamsburg</td>\n",
222
- " <td>12312296767</td>\n",
223
- " <td>verified</td>\n",
224
- " <td>Krik</td>\n",
225
- " <td>Brooklyn</td>\n",
226
- " <td>Williamsburg</td>\n",
227
- " <td>40.70862</td>\n",
228
- " <td>-73.94651</td>\n",
229
- " <td>United States</td>\n",
230
- " <td>...</td>\n",
231
- " <td>$169</td>\n",
232
- " <td>1.0</td>\n",
233
- " <td>0.0</td>\n",
234
- " <td>NaN</td>\n",
235
- " <td>NaN</td>\n",
236
- " <td>3.0</td>\n",
237
- " <td>1.0</td>\n",
238
- " <td>227.0</td>\n",
239
- " <td>No Smoking No Parties or Events of any kind Pl...</td>\n",
240
- " <td>NaN</td>\n",
241
- " </tr>\n",
242
- " <tr>\n",
243
- " <th>102595</th>\n",
244
- " <td>6092990</td>\n",
245
- " <td>Best Location near Columbia U</td>\n",
246
- " <td>77864383453</td>\n",
247
- " <td>unconfirmed</td>\n",
248
- " <td>Mifan</td>\n",
249
- " <td>Manhattan</td>\n",
250
- " <td>Morningside Heights</td>\n",
251
- " <td>40.80460</td>\n",
252
- " <td>-73.96545</td>\n",
253
- " <td>United States</td>\n",
254
- " <td>...</td>\n",
255
- " <td>$167</td>\n",
256
- " <td>1.0</td>\n",
257
- " <td>1.0</td>\n",
258
- " <td>7/6/2015</td>\n",
259
- " <td>0.02</td>\n",
260
- " <td>2.0</td>\n",
261
- " <td>2.0</td>\n",
262
- " <td>395.0</td>\n",
263
- " <td>House rules: Guests agree to the following ter...</td>\n",
264
- " <td>NaN</td>\n",
265
- " </tr>\n",
266
- " <tr>\n",
267
- " <th>102596</th>\n",
268
- " <td>6093542</td>\n",
269
- " <td>Comfy, bright room in Brooklyn</td>\n",
270
- " <td>69050334417</td>\n",
271
- " <td>unconfirmed</td>\n",
272
- " <td>Megan</td>\n",
273
- " <td>Brooklyn</td>\n",
274
- " <td>Park Slope</td>\n",
275
- " <td>40.67505</td>\n",
276
- " <td>-73.98045</td>\n",
277
- " <td>United States</td>\n",
278
- " <td>...</td>\n",
279
- " <td>$198</td>\n",
280
- " <td>3.0</td>\n",
281
- " <td>0.0</td>\n",
282
- " <td>NaN</td>\n",
283
- " <td>NaN</td>\n",
284
- " <td>5.0</td>\n",
285
- " <td>1.0</td>\n",
286
- " <td>342.0</td>\n",
287
- " <td>NaN</td>\n",
288
- " <td>NaN</td>\n",
289
- " </tr>\n",
290
- " <tr>\n",
291
- " <th>102597</th>\n",
292
- " <td>6094094</td>\n",
293
- " <td>Big Studio-One Stop from Midtown</td>\n",
294
- " <td>11160591270</td>\n",
295
- " <td>unconfirmed</td>\n",
296
- " <td>Christopher</td>\n",
297
- " <td>Queens</td>\n",
298
- " <td>Long Island City</td>\n",
299
- " <td>40.74989</td>\n",
300
- " <td>-73.93777</td>\n",
301
- " <td>United States</td>\n",
302
- " <td>...</td>\n",
303
- " <td>$109</td>\n",
304
- " <td>2.0</td>\n",
305
- " <td>5.0</td>\n",
306
- " <td>10/11/2015</td>\n",
307
- " <td>0.10</td>\n",
308
- " <td>3.0</td>\n",
309
- " <td>1.0</td>\n",
310
- " <td>386.0</td>\n",
311
- " <td>NaN</td>\n",
312
- " <td>NaN</td>\n",
313
- " </tr>\n",
314
- " <tr>\n",
315
- " <th>102598</th>\n",
316
- " <td>6094647</td>\n",
317
- " <td>585 sf Luxury Studio</td>\n",
318
- " <td>68170633372</td>\n",
319
- " <td>unconfirmed</td>\n",
320
- " <td>Rebecca</td>\n",
321
- " <td>Manhattan</td>\n",
322
- " <td>Upper West Side</td>\n",
323
- " <td>40.76807</td>\n",
324
- " <td>-73.98342</td>\n",
325
- " <td>United States</td>\n",
326
- " <td>...</td>\n",
327
- " <td>$206</td>\n",
328
- " <td>1.0</td>\n",
329
- " <td>0.0</td>\n",
330
- " <td>NaN</td>\n",
331
- " <td>NaN</td>\n",
332
- " <td>3.0</td>\n",
333
- " <td>1.0</td>\n",
334
- " <td>69.0</td>\n",
335
- " <td>NaN</td>\n",
336
- " <td>NaN</td>\n",
337
- " </tr>\n",
338
- " </tbody>\n",
339
- "</table>\n",
340
- "<p>102599 rows × 26 columns</p>\n",
341
- "</div>"
342
- ],
343
- "text/plain": [
344
- " id NAME \n",
345
- "0 1001254 Clean & quiet apt home by the park \\\n",
346
- "1 1002102 Skylit Midtown Castle \n",
347
- "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! \n",
348
- "3 1002755 NaN \n",
349
- "4 1003689 Entire Apt: Spacious Studio/Loft by central park \n",
350
- "... ... ... \n",
351
- "102594 6092437 Spare room in Williamsburg \n",
352
- "102595 6092990 Best Location near Columbia U \n",
353
- "102596 6093542 Comfy, bright room in Brooklyn \n",
354
- "102597 6094094 Big Studio-One Stop from Midtown \n",
355
- "102598 6094647 585 sf Luxury Studio \n",
356
- "\n",
357
- " host id host_identity_verified host name neighbourhood group \n",
358
- "0 80014485718 unconfirmed Madaline Brooklyn \\\n",
359
- "1 52335172823 verified Jenna Manhattan \n",
360
- "2 78829239556 NaN Elise Manhattan \n",
361
- "3 85098326012 unconfirmed Garry Brooklyn \n",
362
- "4 92037596077 verified Lyndon Manhattan \n",
363
- "... ... ... ... ... \n",
364
- "102594 12312296767 verified Krik Brooklyn \n",
365
- "102595 77864383453 unconfirmed Mifan Manhattan \n",
366
- "102596 69050334417 unconfirmed Megan Brooklyn \n",
367
- "102597 11160591270 unconfirmed Christopher Queens \n",
368
- "102598 68170633372 unconfirmed Rebecca Manhattan \n",
369
- "\n",
370
- " neighbourhood lat long country ... \n",
371
- "0 Kensington 40.64749 -73.97237 United States ... \\\n",
372
- "1 Midtown 40.75362 -73.98377 United States ... \n",
373
- "2 Harlem 40.80902 -73.94190 United States ... \n",
374
- "3 Clinton Hill 40.68514 -73.95976 United States ... \n",
375
- "4 East Harlem 40.79851 -73.94399 United States ... \n",
376
- "... ... ... ... ... ... \n",
377
- "102594 Williamsburg 40.70862 -73.94651 United States ... \n",
378
- "102595 Morningside Heights 40.80460 -73.96545 United States ... \n",
379
- "102596 Park Slope 40.67505 -73.98045 United States ... \n",
380
- "102597 Long Island City 40.74989 -73.93777 United States ... \n",
381
- "102598 Upper West Side 40.76807 -73.98342 United States ... \n",
382
- "\n",
383
- " service fee minimum nights number of reviews last review \n",
384
- "0 $193 10.0 9.0 10/19/2021 \\\n",
385
- "1 $28 30.0 45.0 5/21/2022 \n",
386
- "2 $124 3.0 0.0 NaN \n",
387
- "3 $74 30.0 270.0 7/5/2019 \n",
388
- "4 $41 10.0 9.0 11/19/2018 \n",
389
- "... ... ... ... ... \n",
390
- "102594 $169 1.0 0.0 NaN \n",
391
- "102595 $167 1.0 1.0 7/6/2015 \n",
392
- "102596 $198 3.0 0.0 NaN \n",
393
- "102597 $109 2.0 5.0 10/11/2015 \n",
394
- "102598 $206 1.0 0.0 NaN \n",
395
- "\n",
396
- " reviews per month review rate number calculated host listings count \n",
397
- "0 0.21 4.0 6.0 \\\n",
398
- "1 0.38 4.0 2.0 \n",
399
- "2 NaN 5.0 1.0 \n",
400
- "3 4.64 4.0 1.0 \n",
401
- "4 0.10 3.0 1.0 \n",
402
- "... ... ... ... \n",
403
- "102594 NaN 3.0 1.0 \n",
404
- "102595 0.02 2.0 2.0 \n",
405
- "102596 NaN 5.0 1.0 \n",
406
- "102597 0.10 3.0 1.0 \n",
407
- "102598 NaN 3.0 1.0 \n",
408
- "\n",
409
- " availability 365 house_rules \n",
410
- "0 286.0 Clean up and treat the home the way you'd like... \\\n",
411
- "1 228.0 Pet friendly but please confirm with me if the... \n",
412
- "2 352.0 I encourage you to use my kitchen, cooking and... \n",
413
- "3 322.0 NaN \n",
414
- "4 289.0 Please no smoking in the house, porch or on th... \n",
415
- "... ... ... \n",
416
- "102594 227.0 No Smoking No Parties or Events of any kind Pl... \n",
417
- "102595 395.0 House rules: Guests agree to the following ter... \n",
418
- "102596 342.0 NaN \n",
419
- "102597 386.0 NaN \n",
420
- "102598 69.0 NaN \n",
421
- "\n",
422
- " license \n",
423
- "0 NaN \n",
424
- "1 NaN \n",
425
- "2 NaN \n",
426
- "3 NaN \n",
427
- "4 NaN \n",
428
- "... ... \n",
429
- "102594 NaN \n",
430
- "102595 NaN \n",
431
- "102596 NaN \n",
432
- "102597 NaN \n",
433
- "102598 NaN \n",
434
- "\n",
435
- "[102599 rows x 26 columns]"
436
- ]
437
- },
438
- "execution_count": 2,
439
- "metadata": {},
440
- "output_type": "execute_result"
441
- }
442
- ],
443
- "source": [
444
- "data"
445
- ]
446
- },
447
- {
448
- "cell_type": "code",
449
- "execution_count": 3,
450
- "id": "e21af5d1",
451
- "metadata": {},
452
- "outputs": [],
453
- "source": [
454
- "flight = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')"
455
- ]
456
- },
457
- {
458
- "cell_type": "code",
459
- "execution_count": 4,
460
- "id": "966feef9",
461
- "metadata": {},
462
- "outputs": [],
463
- "source": [
464
- "flight = flight.to_dict(orient = 'split')"
465
- ]
466
- },
467
- {
468
- "cell_type": "code",
469
- "execution_count": 5,
470
- "id": "3f4fe062",
471
- "metadata": {},
472
- "outputs": [],
473
- "source": [
474
- "data_dict = data.to_dict(orient = 'split')"
475
- ]
476
- },
477
- {
478
- "cell_type": "code",
479
- "execution_count": 6,
480
- "id": "33213ac0",
481
- "metadata": {},
482
- "outputs": [
483
- {
484
- "data": {
485
- "text/plain": [
486
- "[2, '2022-04-04', '15:14', '16:36', 251.0, 'Durango', 'Denver', 100]"
487
- ]
488
- },
489
- "execution_count": 6,
490
- "metadata": {},
491
- "output_type": "execute_result"
492
- }
493
- ],
494
- "source": [
495
- "flight['data'][2]"
496
- ]
497
- },
498
- {
499
- "cell_type": "code",
500
- "execution_count": 8,
501
- "id": "9cef6161",
502
- "metadata": {},
503
- "outputs": [
504
- {
505
- "name": "stdout",
506
- "output_type": "stream",
507
- "text": [
508
- "nan\n"
509
- ]
510
- }
511
- ],
512
- "source": [
513
- "print(str(data_dict['data'][3][24]))"
514
- ]
515
- },
516
- {
517
- "cell_type": "code",
518
- "execution_count": 9,
519
- "id": "c5f81f43",
520
- "metadata": {},
521
- "outputs": [],
522
- "source": [
523
- "city_set = set()\n",
524
- "cnt = 0\n",
525
- "for unit in data_dict['data']:\n",
526
- " if str(unit[24]) != 'nan':\n",
527
- " cnt += 1"
528
- ]
529
- },
530
- {
531
- "cell_type": "code",
532
- "execution_count": 10,
533
- "id": "533a5aa6",
534
- "metadata": {},
535
- "outputs": [
536
- {
537
- "data": {
538
- "text/plain": [
539
- "50468"
540
- ]
541
- },
542
- "execution_count": 10,
543
- "metadata": {},
544
- "output_type": "execute_result"
545
- }
546
- ],
547
- "source": [
548
- "cnt"
549
- ]
550
- },
551
- {
552
- "cell_type": "code",
553
- "execution_count": 11,
554
- "id": "bfce5f56",
555
- "metadata": {},
556
- "outputs": [
557
- {
558
- "data": {
559
- "text/plain": [
560
- "set()"
561
- ]
562
- },
563
- "execution_count": 11,
564
- "metadata": {},
565
- "output_type": "execute_result"
566
- }
567
- ],
568
- "source": [
569
- "city_set"
570
- ]
571
- },
572
- {
573
- "cell_type": "code",
574
- "execution_count": 12,
575
- "id": "230b760c",
576
- "metadata": {},
577
- "outputs": [
578
- {
579
- "ename": "ValueError",
580
- "evalue": "Sample larger than population or is negative",
581
- "output_type": "error",
582
- "traceback": [
583
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
584
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
585
- "Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrandom\u001b[39;00m\n\u001b[1;32m 2\u001b[0m city_set \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(city_set)\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcity_set\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m)\n",
586
- "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/random.py:449\u001b[0m, in \u001b[0;36mRandom.sample\u001b[0;34m(self, population, k, counts)\u001b[0m\n\u001b[1;32m 447\u001b[0m randbelow \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m k \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m n:\n\u001b[0;32m--> 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSample larger than population or is negative\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 450\u001b[0m result \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m k\n\u001b[1;32m 451\u001b[0m setsize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m21\u001b[39m \u001b[38;5;66;03m# size of a small set minus size of an empty list\u001b[39;00m\n",
587
- "\u001b[0;31mValueError\u001b[0m: Sample larger than population or is negative"
588
- ]
589
- }
590
- ],
591
- "source": [
592
- "import random\n",
593
- "city_set = list(city_set)\n",
594
- "print(random.sample(city_set,1))"
595
- ]
596
- },
597
- {
598
- "cell_type": "code",
599
- "execution_count": 12,
600
- "id": "61eddd5f",
601
- "metadata": {},
602
- "outputs": [
603
- {
604
- "ename": "AttributeError",
605
- "evalue": "'dict' object has no attribute 'to_dict'",
606
- "output_type": "error",
607
- "traceback": [
608
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
609
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
610
- "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data_dict \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m(orient \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msplit\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
611
- "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'to_dict'"
612
- ]
613
- }
614
- ],
615
- "source": [
616
- "data_dict = data.to_dict(orient = 'split')"
617
- ]
618
- },
619
- {
620
- "cell_type": "code",
621
- "execution_count": 35,
622
- "id": "3292c450",
623
- "metadata": {},
624
- "outputs": [
625
- {
626
- "data": {
627
- "text/plain": [
628
- "['Unnamed: 0',\n",
629
- " 'NAME',\n",
630
- " 'room type',\n",
631
- " 'price',\n",
632
- " 'minimum nights',\n",
633
- " 'review rate number',\n",
634
- " 'house_rules',\n",
635
- " 'maximum occupancy',\n",
636
- " 'city']"
637
- ]
638
- },
639
- "execution_count": 35,
640
- "metadata": {},
641
- "output_type": "execute_result"
642
- }
643
- ],
644
- "source": [
645
- "data_dict['columns']"
646
- ]
647
- },
648
- {
649
- "cell_type": "code",
650
- "execution_count": 38,
651
- "id": "cfaa21d9",
652
- "metadata": {},
653
- "outputs": [
654
- {
655
- "data": {
656
- "text/plain": [
657
- "5047"
658
- ]
659
- },
660
- "execution_count": 38,
661
- "metadata": {},
662
- "output_type": "execute_result"
663
- }
664
- ],
665
- "source": [
666
- "len(data_dict['data'])"
667
- ]
668
- },
669
- {
670
- "cell_type": "code",
671
- "execution_count": 36,
672
- "id": "2980362d",
673
- "metadata": {},
674
- "outputs": [],
675
- "source": [
676
- "type_set = set()\n",
677
- "for unit in data_dict['data']:\n",
678
- " type_set.add(unit[2])"
679
- ]
680
- },
681
- {
682
- "cell_type": "code",
683
- "execution_count": 37,
684
- "id": "f5e36fbb",
685
- "metadata": {},
686
- "outputs": [
687
- {
688
- "data": {
689
- "text/plain": [
690
- "{'Entire home/apt', 'Private room', 'Shared room'}"
691
- ]
692
- },
693
- "execution_count": 37,
694
- "metadata": {},
695
- "output_type": "execute_result"
696
- }
697
- ],
698
- "source": [
699
- "type_set"
700
- ]
701
- },
702
- {
703
- "cell_type": "code",
704
- "execution_count": 15,
705
- "id": "bf1231c4",
706
- "metadata": {},
707
- "outputs": [
708
- {
709
- "ename": "NameError",
710
- "evalue": "name 'data_dict' is not defined",
711
- "output_type": "error",
712
- "traceback": [
713
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
714
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
715
- "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata_dict\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;241m147\u001b[39m]\n",
716
- "\u001b[0;31mNameError\u001b[0m: name 'data_dict' is not defined"
717
- ]
718
- }
719
- ],
720
- "source": [
721
- "data_dict['data'][147]"
722
- ]
723
- },
724
- {
725
- "cell_type": "code",
726
- "execution_count": 14,
727
- "id": "f993b894",
728
- "metadata": {},
729
- "outputs": [
730
- {
731
- "data": {
732
- "text/plain": [
733
- "set()"
734
- ]
735
- },
736
- "execution_count": 14,
737
- "metadata": {},
738
- "output_type": "execute_result"
739
- }
740
- ],
741
- "source": [
742
- "type_set"
743
- ]
744
- },
745
- {
746
- "cell_type": "code",
747
- "execution_count": 10,
748
- "id": "916e9470",
749
- "metadata": {},
750
- "outputs": [
751
- {
752
- "name": "stdout",
753
- "output_type": "stream",
754
- "text": [
755
- "1 NAME\n",
756
- "7 lat\n",
757
- "8 long\n",
758
- "13 room type\n",
759
- "15 price\n",
760
- "17 minimum nights\n",
761
- "21 review rate number\n",
762
- "24 house_rules\n"
763
- ]
764
- }
765
- ],
766
- "source": [
767
- "for idx, unit in enumerate(data_dict['columns']):\n",
768
- " if unit in ['NAME','lat', 'long', 'room type', 'price','minimum nights','review rate number','house_rules']:\n",
769
- " print(idx,unit)"
770
- ]
771
- },
772
- {
773
- "cell_type": "code",
774
- "execution_count": 73,
775
- "id": "1213484d",
776
- "metadata": {},
777
- "outputs": [
778
- {
779
- "data": {
780
- "application/vnd.jupyter.widget-view+json": {
781
- "model_id": "51764c1a3739416289913ec613816cc7",
782
- "version_major": 2,
783
- "version_minor": 0
784
- },
785
- "text/plain": [
786
- "0it [00:00, ?it/s]"
787
- ]
788
- },
789
- "metadata": {},
790
- "output_type": "display_data"
791
- },
792
- {
793
- "name": "stderr",
794
- "output_type": "stream",
795
- "text": [
796
- "/tmp/ipykernel_3241846/557604333.py:23: DeprecationWarning: Sampling from a set deprecated\n",
797
- "since Python 3.9 and will be removed in a subsequent version.\n",
798
- " tmp_dict[\"city\"] = random.sample(city_set,1)[0]\n"
799
- ]
800
- },
801
- {
802
- "ename": "ValueError",
803
- "evalue": "Sample larger than population or is negative",
804
- "output_type": "error",
805
- "traceback": [
806
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
807
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
808
- "Cell \u001b[0;32mIn[73], line 23\u001b[0m\n\u001b[1;32m 21\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreview rate number\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m21\u001b[39m]\n\u001b[1;32m 22\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhouse_rules\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m24\u001b[39m]\n\u001b[0;32m---> 23\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcity\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcity_set\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 24\u001b[0m new_data\u001b[38;5;241m.\u001b[39mappend(tmp_dict)\n",
809
- "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/random.py:449\u001b[0m, in \u001b[0;36mRandom.sample\u001b[0;34m(self, population, k, counts)\u001b[0m\n\u001b[1;32m 447\u001b[0m randbelow \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m k \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m n:\n\u001b[0;32m--> 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSample larger than population or is negative\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 450\u001b[0m result \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m k\n\u001b[1;32m 451\u001b[0m setsize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m21\u001b[39m \u001b[38;5;66;03m# size of a small set minus size of an empty list\u001b[39;00m\n",
810
- "\u001b[0;31mValueError\u001b[0m: Sample larger than population or is negative"
811
- ]
812
- }
813
- ],
814
- "source": [
815
- "from tqdm.autonotebook import tqdm\n",
816
- "import random\n",
817
- "new_data = []\n",
818
- "for idx, unit in tqdm(enumerate(data_dict['data'])):\n",
819
- " tmp_dict = {k:\"\" for k in ['NAME','room type', 'price','minimum nights','review rate number','house_rules']}\n",
820
- " tmp_dict[\"NAME\"] = unit[1]\n",
821
- " tmp_dict[\"room type\"] = unit[13]\n",
822
- " if unit[13] == \"Shared room\":\n",
823
- " tmp_dict[\"maximum occupancy\"] = 1\n",
824
- " elif unit[13] == \"Hotel room\":\n",
825
- " tmp_dict[\"maximum occupancy\"] = random.randint(1, 2)\n",
826
- " elif unit[13] == \"Private room\":\n",
827
- " tmp_dict[\"maximum occupancy\"] = random.randint(1, 2)\n",
828
- " elif unit[13] == \"Entire home/apt\":\n",
829
- " try:\n",
830
- " tmp_dict[\"maximum occupancy\"] = random.randint(2, max(3,eval(unit[15].replace(\"$\",\"\").replace(\",\",\"\"))//100))\n",
831
- " except:\n",
832
- " tmp_dict[\"maximum occupancy\"] = random.randint(2, max(3,unit[15]//100))\n",
833
- " tmp_dict[\"price\"] = unit[15].replace(\"$\",\"\").replace(\",\",\"\")\n",
834
- " tmp_dict[\"minimum nights\"] = unit[17]\n",
835
- " tmp_dict[\"review rate number\"] = unit[21]\n",
836
- " tmp_dict[\"house_rules\"] = unit[24]\n",
837
- " tmp_dict[\"city\"] = random.sample(city_set,1)[0]\n",
838
- " new_data.append(tmp_dict)"
839
- ]
840
- },
841
- {
842
- "cell_type": "code",
843
- "execution_count": 20,
844
- "id": "fd3e8257",
845
- "metadata": {},
846
- "outputs": [
847
- {
848
- "data": {
849
- "text/plain": [
850
- "102599"
851
- ]
852
- },
853
- "execution_count": 20,
854
- "metadata": {},
855
- "output_type": "execute_result"
856
- }
857
- ],
858
- "source": [
859
- "len(new_data)"
860
- ]
861
- },
862
- {
863
- "cell_type": "code",
864
- "execution_count": 21,
865
- "id": "bfb243c0",
866
- "metadata": {},
867
- "outputs": [],
868
- "source": [
869
- "df = pd.DataFrame(new_data)"
870
- ]
871
- },
872
- {
873
- "cell_type": "code",
874
- "execution_count": 23,
875
- "id": "af7e3411",
876
- "metadata": {},
877
- "outputs": [],
878
- "source": [
879
- "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
880
- ]
881
- },
882
- {
883
- "cell_type": "code",
884
- "execution_count": 22,
885
- "id": "71d21fea",
886
- "metadata": {},
887
- "outputs": [
888
- {
889
- "data": {
890
- "text/html": [
891
- "<div>\n",
892
- "<style scoped>\n",
893
- " .dataframe tbody tr th:only-of-type {\n",
894
- " vertical-align: middle;\n",
895
- " }\n",
896
- "\n",
897
- " .dataframe tbody tr th {\n",
898
- " vertical-align: top;\n",
899
- " }\n",
900
- "\n",
901
- " .dataframe thead th {\n",
902
- " text-align: right;\n",
903
- " }\n",
904
- "</style>\n",
905
- "<table border=\"1\" class=\"dataframe\">\n",
906
- " <thead>\n",
907
- " <tr style=\"text-align: right;\">\n",
908
- " <th></th>\n",
909
- " <th>NAME</th>\n",
910
- " <th>room type</th>\n",
911
- " <th>price</th>\n",
912
- " <th>minimum nights</th>\n",
913
- " <th>review rate number</th>\n",
914
- " <th>house_rules</th>\n",
915
- " <th>maximum occupancy</th>\n",
916
- " <th>city</th>\n",
917
- " </tr>\n",
918
- " </thead>\n",
919
- " <tbody>\n",
920
- " <tr>\n",
921
- " <th>0</th>\n",
922
- " <td>Clean &amp; quiet apt home by the park</td>\n",
923
- " <td>Private room</td>\n",
924
- " <td>$966</td>\n",
925
- " <td>10.0</td>\n",
926
- " <td>4.0</td>\n",
927
- " <td>Clean up and treat the home the way you'd like...</td>\n",
928
- " <td>1</td>\n",
929
- " <td>Des Moines</td>\n",
930
- " </tr>\n",
931
- " <tr>\n",
932
- " <th>1</th>\n",
933
- " <td>Skylit Midtown Castle</td>\n",
934
- " <td>Entire home/apt</td>\n",
935
- " <td>$142</td>\n",
936
- " <td>30.0</td>\n",
937
- " <td>4.0</td>\n",
938
- " <td>Pet friendly but please confirm with me if the...</td>\n",
939
- " <td>2</td>\n",
940
- " <td>Wilmington</td>\n",
941
- " </tr>\n",
942
- " <tr>\n",
943
- " <th>2</th>\n",
944
- " <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
945
- " <td>Private room</td>\n",
946
- " <td>$620</td>\n",
947
- " <td>3.0</td>\n",
948
- " <td>5.0</td>\n",
949
- " <td>I encourage you to use my kitchen, cooking and...</td>\n",
950
- " <td>2</td>\n",
951
- " <td>St. George</td>\n",
952
- " </tr>\n",
953
- " <tr>\n",
954
- " <th>3</th>\n",
955
- " <td>NaN</td>\n",
956
- " <td>Entire home/apt</td>\n",
957
- " <td>$368</td>\n",
958
- " <td>30.0</td>\n",
959
- " <td>4.0</td>\n",
960
- " <td>NaN</td>\n",
961
- " <td>2</td>\n",
962
- " <td>Kalamazoo</td>\n",
963
- " </tr>\n",
964
- " <tr>\n",
965
- " <th>4</th>\n",
966
- " <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
967
- " <td>Entire home/apt</td>\n",
968
- " <td>$204</td>\n",
969
- " <td>10.0</td>\n",
970
- " <td>3.0</td>\n",
971
- " <td>Please no smoking in the house, porch or on th...</td>\n",
972
- " <td>3</td>\n",
973
- " <td>Cheyenne</td>\n",
974
- " </tr>\n",
975
- " <tr>\n",
976
- " <th>...</th>\n",
977
- " <td>...</td>\n",
978
- " <td>...</td>\n",
979
- " <td>...</td>\n",
980
- " <td>...</td>\n",
981
- " <td>...</td>\n",
982
- " <td>...</td>\n",
983
- " <td>...</td>\n",
984
- " <td>...</td>\n",
985
- " </tr>\n",
986
- " <tr>\n",
987
- " <th>102594</th>\n",
988
- " <td>Spare room in Williamsburg</td>\n",
989
- " <td>Private room</td>\n",
990
- " <td>$844</td>\n",
991
- " <td>1.0</td>\n",
992
- " <td>3.0</td>\n",
993
- " <td>No Smoking No Parties or Events of any kind Pl...</td>\n",
994
- " <td>1</td>\n",
995
- " <td>White Plains</td>\n",
996
- " </tr>\n",
997
- " <tr>\n",
998
- " <th>102595</th>\n",
999
- " <td>Best Location near Columbia U</td>\n",
1000
- " <td>Private room</td>\n",
1001
- " <td>$837</td>\n",
1002
- " <td>1.0</td>\n",
1003
- " <td>2.0</td>\n",
1004
- " <td>House rules: Guests agree to the following ter...</td>\n",
1005
- " <td>2</td>\n",
1006
- " <td>Mosinee</td>\n",
1007
- " </tr>\n",
1008
- " <tr>\n",
1009
- " <th>102596</th>\n",
1010
- " <td>Comfy, bright room in Brooklyn</td>\n",
1011
- " <td>Private room</td>\n",
1012
- " <td>$988</td>\n",
1013
- " <td>3.0</td>\n",
1014
- " <td>5.0</td>\n",
1015
- " <td>NaN</td>\n",
1016
- " <td>2</td>\n",
1017
- " <td>Amarillo</td>\n",
1018
- " </tr>\n",
1019
- " <tr>\n",
1020
- " <th>102597</th>\n",
1021
- " <td>Big Studio-One Stop from Midtown</td>\n",
1022
- " <td>Entire home/apt</td>\n",
1023
- " <td>$546</td>\n",
1024
- " <td>2.0</td>\n",
1025
- " <td>3.0</td>\n",
1026
- " <td>NaN</td>\n",
1027
- " <td>4</td>\n",
1028
- " <td>Binghamton</td>\n",
1029
- " </tr>\n",
1030
- " <tr>\n",
1031
- " <th>102598</th>\n",
1032
- " <td>585 sf Luxury Studio</td>\n",
1033
- " <td>Entire home/apt</td>\n",
1034
- " <td>$1,032</td>\n",
1035
- " <td>1.0</td>\n",
1036
- " <td>3.0</td>\n",
1037
- " <td>NaN</td>\n",
1038
- " <td>7</td>\n",
1039
- " <td>Flint</td>\n",
1040
- " </tr>\n",
1041
- " </tbody>\n",
1042
- "</table>\n",
1043
- "<p>102599 rows × 8 columns</p>\n",
1044
- "</div>"
1045
- ],
1046
- "text/plain": [
1047
- " NAME room type \n",
1048
- "0 Clean & quiet apt home by the park Private room \\\n",
1049
- "1 Skylit Midtown Castle Entire home/apt \n",
1050
- "2 THE VILLAGE OF HARLEM....NEW YORK ! Private room \n",
1051
- "3 NaN Entire home/apt \n",
1052
- "4 Entire Apt: Spacious Studio/Loft by central park Entire home/apt \n",
1053
- "... ... ... \n",
1054
- "102594 Spare room in Williamsburg Private room \n",
1055
- "102595 Best Location near Columbia U Private room \n",
1056
- "102596 Comfy, bright room in Brooklyn Private room \n",
1057
- "102597 Big Studio-One Stop from Midtown Entire home/apt \n",
1058
- "102598 585 sf Luxury Studio Entire home/apt \n",
1059
- "\n",
1060
- " price minimum nights review rate number \n",
1061
- "0 $966 10.0 4.0 \\\n",
1062
- "1 $142 30.0 4.0 \n",
1063
- "2 $620 3.0 5.0 \n",
1064
- "3 $368 30.0 4.0 \n",
1065
- "4 $204 10.0 3.0 \n",
1066
- "... ... ... ... \n",
1067
- "102594 $844 1.0 3.0 \n",
1068
- "102595 $837 1.0 2.0 \n",
1069
- "102596 $988 3.0 5.0 \n",
1070
- "102597 $546 2.0 3.0 \n",
1071
- "102598 $1,032 1.0 3.0 \n",
1072
- "\n",
1073
- " house_rules maximum occupancy \n",
1074
- "0 Clean up and treat the home the way you'd like... 1 \\\n",
1075
- "1 Pet friendly but please confirm with me if the... 2 \n",
1076
- "2 I encourage you to use my kitchen, cooking and... 2 \n",
1077
- "3 NaN 2 \n",
1078
- "4 Please no smoking in the house, porch or on th... 3 \n",
1079
- "... ... ... \n",
1080
- "102594 No Smoking No Parties or Events of any kind Pl... 1 \n",
1081
- "102595 House rules: Guests agree to the following ter... 2 \n",
1082
- "102596 NaN 2 \n",
1083
- "102597 NaN 4 \n",
1084
- "102598 NaN 7 \n",
1085
- "\n",
1086
- " city \n",
1087
- "0 Des Moines \n",
1088
- "1 Wilmington \n",
1089
- "2 St. George \n",
1090
- "3 Kalamazoo \n",
1091
- "4 Cheyenne \n",
1092
- "... ... \n",
1093
- "102594 White Plains \n",
1094
- "102595 Mosinee \n",
1095
- "102596 Amarillo \n",
1096
- "102597 Binghamton \n",
1097
- "102598 Flint \n",
1098
- "\n",
1099
- "[102599 rows x 8 columns]"
1100
- ]
1101
- },
1102
- "execution_count": 22,
1103
- "metadata": {},
1104
- "output_type": "execute_result"
1105
- }
1106
- ],
1107
- "source": [
1108
- "df"
1109
- ]
1110
- },
1111
- {
1112
- "cell_type": "code",
1113
- "execution_count": 50,
1114
- "id": "0ec56283",
1115
- "metadata": {},
1116
- "outputs": [],
1117
- "source": [
1118
- "import pandas as pd\n",
1119
- "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
1120
- ]
1121
- },
1122
- {
1123
- "cell_type": "code",
1124
- "execution_count": 52,
1125
- "id": "5dc27048",
1126
- "metadata": {},
1127
- "outputs": [
1128
- {
1129
- "data": {
1130
- "text/html": [
1131
- "<div>\n",
1132
- "<style scoped>\n",
1133
- " .dataframe tbody tr th:only-of-type {\n",
1134
- " vertical-align: middle;\n",
1135
- " }\n",
1136
- "\n",
1137
- " .dataframe tbody tr th {\n",
1138
- " vertical-align: top;\n",
1139
- " }\n",
1140
- "\n",
1141
- " .dataframe thead th {\n",
1142
- " text-align: right;\n",
1143
- " }\n",
1144
- "</style>\n",
1145
- "<table border=\"1\" class=\"dataframe\">\n",
1146
- " <thead>\n",
1147
- " <tr style=\"text-align: right;\">\n",
1148
- " <th></th>\n",
1149
- " <th>Unnamed: 0</th>\n",
1150
- " <th>NAME</th>\n",
1151
- " <th>room type</th>\n",
1152
- " <th>price</th>\n",
1153
- " <th>minimum nights</th>\n",
1154
- " <th>review rate number</th>\n",
1155
- " <th>house_rules</th>\n",
1156
- " <th>maximum occupancy</th>\n",
1157
- " <th>city</th>\n",
1158
- " </tr>\n",
1159
- " </thead>\n",
1160
- " <tbody>\n",
1161
- " <tr>\n",
1162
- " <th>0</th>\n",
1163
- " <td>0</td>\n",
1164
- " <td>Clean &amp; quiet apt home by the park</td>\n",
1165
- " <td>Private room</td>\n",
1166
- " <td>$966</td>\n",
1167
- " <td>10.0</td>\n",
1168
- " <td>4.0</td>\n",
1169
- " <td>Clean up and treat the home the way you'd like...</td>\n",
1170
- " <td>1</td>\n",
1171
- " <td>Des Moines</td>\n",
1172
- " </tr>\n",
1173
- " <tr>\n",
1174
- " <th>1</th>\n",
1175
- " <td>1</td>\n",
1176
- " <td>Skylit Midtown Castle</td>\n",
1177
- " <td>Entire home/apt</td>\n",
1178
- " <td>$142</td>\n",
1179
- " <td>30.0</td>\n",
1180
- " <td>4.0</td>\n",
1181
- " <td>Pet friendly but please confirm with me if the...</td>\n",
1182
- " <td>2</td>\n",
1183
- " <td>Wilmington</td>\n",
1184
- " </tr>\n",
1185
- " <tr>\n",
1186
- " <th>2</th>\n",
1187
- " <td>2</td>\n",
1188
- " <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
1189
- " <td>Private room</td>\n",
1190
- " <td>$620</td>\n",
1191
- " <td>3.0</td>\n",
1192
- " <td>5.0</td>\n",
1193
- " <td>I encourage you to use my kitchen, cooking and...</td>\n",
1194
- " <td>2</td>\n",
1195
- " <td>St. George</td>\n",
1196
- " </tr>\n",
1197
- " <tr>\n",
1198
- " <th>3</th>\n",
1199
- " <td>3</td>\n",
1200
- " <td>NaN</td>\n",
1201
- " <td>Entire home/apt</td>\n",
1202
- " <td>$368</td>\n",
1203
- " <td>30.0</td>\n",
1204
- " <td>4.0</td>\n",
1205
- " <td>NaN</td>\n",
1206
- " <td>2</td>\n",
1207
- " <td>Kalamazoo</td>\n",
1208
- " </tr>\n",
1209
- " <tr>\n",
1210
- " <th>4</th>\n",
1211
- " <td>4</td>\n",
1212
- " <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
1213
- " <td>Entire home/apt</td>\n",
1214
- " <td>$204</td>\n",
1215
- " <td>10.0</td>\n",
1216
- " <td>3.0</td>\n",
1217
- " <td>Please no smoking in the house, porch or on th...</td>\n",
1218
- " <td>3</td>\n",
1219
- " <td>Cheyenne</td>\n",
1220
- " </tr>\n",
1221
- " <tr>\n",
1222
- " <th>...</th>\n",
1223
- " <td>...</td>\n",
1224
- " <td>...</td>\n",
1225
- " <td>...</td>\n",
1226
- " <td>...</td>\n",
1227
- " <td>...</td>\n",
1228
- " <td>...</td>\n",
1229
- " <td>...</td>\n",
1230
- " <td>...</td>\n",
1231
- " <td>...</td>\n",
1232
- " </tr>\n",
1233
- " <tr>\n",
1234
- " <th>102594</th>\n",
1235
- " <td>102594</td>\n",
1236
- " <td>Spare room in Williamsburg</td>\n",
1237
- " <td>Private room</td>\n",
1238
- " <td>$844</td>\n",
1239
- " <td>1.0</td>\n",
1240
- " <td>3.0</td>\n",
1241
- " <td>No Smoking No Parties or Events of any kind Pl...</td>\n",
1242
- " <td>1</td>\n",
1243
- " <td>White Plains</td>\n",
1244
- " </tr>\n",
1245
- " <tr>\n",
1246
- " <th>102595</th>\n",
1247
- " <td>102595</td>\n",
1248
- " <td>Best Location near Columbia U</td>\n",
1249
- " <td>Private room</td>\n",
1250
- " <td>$837</td>\n",
1251
- " <td>1.0</td>\n",
1252
- " <td>2.0</td>\n",
1253
- " <td>House rules: Guests agree to the following ter...</td>\n",
1254
- " <td>2</td>\n",
1255
- " <td>Mosinee</td>\n",
1256
- " </tr>\n",
1257
- " <tr>\n",
1258
- " <th>102596</th>\n",
1259
- " <td>102596</td>\n",
1260
- " <td>Comfy, bright room in Brooklyn</td>\n",
1261
- " <td>Private room</td>\n",
1262
- " <td>$988</td>\n",
1263
- " <td>3.0</td>\n",
1264
- " <td>5.0</td>\n",
1265
- " <td>NaN</td>\n",
1266
- " <td>2</td>\n",
1267
- " <td>Amarillo</td>\n",
1268
- " </tr>\n",
1269
- " <tr>\n",
1270
- " <th>102597</th>\n",
1271
- " <td>102597</td>\n",
1272
- " <td>Big Studio-One Stop from Midtown</td>\n",
1273
- " <td>Entire home/apt</td>\n",
1274
- " <td>$546</td>\n",
1275
- " <td>2.0</td>\n",
1276
- " <td>3.0</td>\n",
1277
- " <td>NaN</td>\n",
1278
- " <td>4</td>\n",
1279
- " <td>Binghamton</td>\n",
1280
- " </tr>\n",
1281
- " <tr>\n",
1282
- " <th>102598</th>\n",
1283
- " <td>102598</td>\n",
1284
- " <td>585 sf Luxury Studio</td>\n",
1285
- " <td>Entire home/apt</td>\n",
1286
- " <td>$1,032</td>\n",
1287
- " <td>1.0</td>\n",
1288
- " <td>3.0</td>\n",
1289
- " <td>NaN</td>\n",
1290
- " <td>7</td>\n",
1291
- " <td>Flint</td>\n",
1292
- " </tr>\n",
1293
- " </tbody>\n",
1294
- "</table>\n",
1295
- "<p>102599 rows × 9 columns</p>\n",
1296
- "</div>"
1297
- ],
1298
- "text/plain": [
1299
- " Unnamed: 0 NAME \n",
1300
- "0 0 Clean & quiet apt home by the park \\\n",
1301
- "1 1 Skylit Midtown Castle \n",
1302
- "2 2 THE VILLAGE OF HARLEM....NEW YORK ! \n",
1303
- "3 3 NaN \n",
1304
- "4 4 Entire Apt: Spacious Studio/Loft by central park \n",
1305
- "... ... ... \n",
1306
- "102594 102594 Spare room in Williamsburg \n",
1307
- "102595 102595 Best Location near Columbia U \n",
1308
- "102596 102596 Comfy, bright room in Brooklyn \n",
1309
- "102597 102597 Big Studio-One Stop from Midtown \n",
1310
- "102598 102598 585 sf Luxury Studio \n",
1311
- "\n",
1312
- " room type price minimum nights review rate number \n",
1313
- "0 Private room $966 10.0 4.0 \\\n",
1314
- "1 Entire home/apt $142 30.0 4.0 \n",
1315
- "2 Private room $620 3.0 5.0 \n",
1316
- "3 Entire home/apt $368 30.0 4.0 \n",
1317
- "4 Entire home/apt $204 10.0 3.0 \n",
1318
- "... ... ... ... ... \n",
1319
- "102594 Private room $844 1.0 3.0 \n",
1320
- "102595 Private room $837 1.0 2.0 \n",
1321
- "102596 Private room $988 3.0 5.0 \n",
1322
- "102597 Entire home/apt $546 2.0 3.0 \n",
1323
- "102598 Entire home/apt $1,032 1.0 3.0 \n",
1324
- "\n",
1325
- " house_rules maximum occupancy \n",
1326
- "0 Clean up and treat the home the way you'd like... 1 \\\n",
1327
- "1 Pet friendly but please confirm with me if the... 2 \n",
1328
- "2 I encourage you to use my kitchen, cooking and... 2 \n",
1329
- "3 NaN 2 \n",
1330
- "4 Please no smoking in the house, porch or on th... 3 \n",
1331
- "... ... ... \n",
1332
- "102594 No Smoking No Parties or Events of any kind Pl... 1 \n",
1333
- "102595 House rules: Guests agree to the following ter... 2 \n",
1334
- "102596 NaN 2 \n",
1335
- "102597 NaN 4 \n",
1336
- "102598 NaN 7 \n",
1337
- "\n",
1338
- " city \n",
1339
- "0 Des Moines \n",
1340
- "1 Wilmington \n",
1341
- "2 St. George \n",
1342
- "3 Kalamazoo \n",
1343
- "4 Cheyenne \n",
1344
- "... ... \n",
1345
- "102594 White Plains \n",
1346
- "102595 Mosinee \n",
1347
- "102596 Amarillo \n",
1348
- "102597 Binghamton \n",
1349
- "102598 Flint \n",
1350
- "\n",
1351
- "[102599 rows x 9 columns]"
1352
- ]
1353
- },
1354
- "execution_count": 52,
1355
- "metadata": {},
1356
- "output_type": "execute_result"
1357
- }
1358
- ],
1359
- "source": [
1360
- "data"
1361
- ]
1362
- },
1363
- {
1364
- "cell_type": "code",
1365
- "execution_count": 63,
1366
- "id": "bebb9c93",
1367
- "metadata": {},
1368
- "outputs": [],
1369
- "source": [
1370
- "filtered_data = data[data.iloc[:, -3].notna()]"
1371
- ]
1372
- },
1373
- {
1374
- "cell_type": "code",
1375
- "execution_count": 64,
1376
- "id": "bd010fc9",
1377
- "metadata": {},
1378
- "outputs": [],
1379
- "source": [
1380
- "dict_representation = filtered_data.to_dict(orient='split')"
1381
- ]
1382
- },
1383
- {
1384
- "cell_type": "code",
1385
- "execution_count": 71,
1386
- "id": "e84db5c4",
1387
- "metadata": {},
1388
- "outputs": [
1389
- {
1390
- "data": {
1391
- "text/plain": [
1392
- "50468"
1393
- ]
1394
- },
1395
- "execution_count": 71,
1396
- "metadata": {},
1397
- "output_type": "execute_result"
1398
- }
1399
- ],
1400
- "source": [
1401
- "len(dict_representation['data'])"
1402
- ]
1403
- },
1404
- {
1405
- "cell_type": "code",
1406
- "execution_count": 67,
1407
- "id": "31eaadf3",
1408
- "metadata": {},
1409
- "outputs": [],
1410
- "source": [
1411
- "sample_df = filtered_data.sample(frac=0.1)"
1412
- ]
1413
- },
1414
- {
1415
- "cell_type": "code",
1416
- "execution_count": 69,
1417
- "id": "33998ec6",
1418
- "metadata": {},
1419
- "outputs": [],
1420
- "source": [
1421
- "sample_df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
1422
- ]
1423
- },
1424
- {
1425
- "cell_type": "code",
1426
- "execution_count": 72,
1427
- "id": "25396015",
1428
- "metadata": {},
1429
- "outputs": [
1430
- {
1431
- "data": {
1432
- "text/plain": [
1433
- "5047"
1434
- ]
1435
- },
1436
- "execution_count": 72,
1437
- "metadata": {},
1438
- "output_type": "execute_result"
1439
- }
1440
- ],
1441
- "source": [
1442
- "len(sample_df)"
1443
- ]
1444
- },
1445
- {
1446
- "cell_type": "code",
1447
- "execution_count": 3,
1448
- "id": "17d054b5",
1449
- "metadata": {},
1450
- "outputs": [],
1451
- "source": [
1452
- "import pandas as pd\n",
1453
- "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
1454
- ]
1455
- },
1456
- {
1457
- "cell_type": "code",
1458
- "execution_count": 4,
1459
- "id": "64db8d6c",
1460
- "metadata": {},
1461
- "outputs": [],
1462
- "source": [
1463
- "data_dict = data.to_dict(orient = 'split')"
1464
- ]
1465
- },
1466
- {
1467
- "cell_type": "code",
1468
- "execution_count": 21,
1469
- "id": "b32b2f0c",
1470
- "metadata": {},
1471
- "outputs": [
1472
- {
1473
- "name": "stdout",
1474
- "output_type": "stream",
1475
- "text": [
1476
- "0 Unnamed: 0.1\n",
1477
- "1 Unnamed: 0\n",
1478
- "2 NAME\n",
1479
- "3 room type\n",
1480
- "4 price\n",
1481
- "5 minimum nights\n",
1482
- "6 review rate number\n",
1483
- "7 house_rules\n",
1484
- "8 maximum occupancy\n",
1485
- "9 city\n"
1486
- ]
1487
- }
1488
- ],
1489
- "source": [
1490
- "for idx, unit in enumerate(data_dict['columns']):\n",
1491
- " print(idx,unit)"
1492
- ]
1493
- },
1494
- {
1495
- "cell_type": "code",
1496
- "execution_count": 8,
1497
- "id": "fe415c1c",
1498
- "metadata": {},
1499
- "outputs": [
1500
- {
1501
- "data": {
1502
- "text/plain": [
1503
- "[0,\n",
1504
- " 'Beautiful room upper manhttn.',\n",
1505
- " 'Private room',\n",
1506
- " 131.0,\n",
1507
- " 1.0,\n",
1508
- " 2.0,\n",
1509
- " 'No smoking. No pets. ',\n",
1510
- " 1,\n",
1511
- " 'Christiansted']"
1512
- ]
1513
- },
1514
- "execution_count": 8,
1515
- "metadata": {},
1516
- "output_type": "execute_result"
1517
- }
1518
- ],
1519
- "source": [
1520
- "data_dict['data'][0]"
1521
- ]
1522
- },
1523
- {
1524
- "cell_type": "code",
1525
- "execution_count": 40,
1526
- "id": "38cb5c5a",
1527
- "metadata": {},
1528
- "outputs": [],
1529
- "source": [
1530
- "import random\n",
1531
- "new_data = []\n",
1532
- "for idx, unit in enumerate(data_dict['data']):\n",
1533
- " tmp_dict = {k:j for k,j in zip(['NAME','room type', 'price','minimum nights','review rate number','house_rules','maximum occupancy','city'],unit[1:])}\n",
1534
- " if type(unit[4]) == str:\n",
1535
- " tmp_dict[\"price\"] = eval(unit[4].replace(\"$\",\"\").replace(\",\",\"\"))\n",
1536
- " house_rules_number = random.choice([0,1,1,1,2,2,3])\n",
1537
- " tmp_dict['house_rules'] = \" & \".join(x for x in random.sample([\"No parties\",\"No smoking\",\"No children under 10\",\"No pets\",\"No visitors\"],house_rules_number))\n",
1538
- " tmp_dict['city'] = tmp_dict['city'].split('/')[0]\n",
1539
- " new_data.append(tmp_dict)"
1540
- ]
1541
- },
1542
- {
1543
- "cell_type": "code",
1544
- "execution_count": 41,
1545
- "id": "ae3d551e",
1546
- "metadata": {},
1547
- "outputs": [
1548
- {
1549
- "data": {
1550
- "text/plain": [
1551
- "{'NAME': 'BIG room with bath & balcony in BK!',\n",
1552
- " 'room type': 'Private room',\n",
1553
- " 'price': 1123.0,\n",
1554
- " 'minimum nights': 1.0,\n",
1555
- " 'review rate number': 4.0,\n",
1556
- " 'house_rules': 'No parties',\n",
1557
- " 'maximum occupancy': 2,\n",
1558
- " 'city': 'Louisville'}"
1559
- ]
1560
- },
1561
- "execution_count": 41,
1562
- "metadata": {},
1563
- "output_type": "execute_result"
1564
- }
1565
- ],
1566
- "source": [
1567
- "new_data[2]"
1568
- ]
1569
- },
1570
- {
1571
- "cell_type": "code",
1572
- "execution_count": 42,
1573
- "id": "6fac856c",
1574
- "metadata": {},
1575
- "outputs": [
1576
- {
1577
- "name": "stdout",
1578
- "output_type": "stream",
1579
- "text": [
1580
- "\n",
1581
- "----------\n",
1582
- "No pets & No visitors & No smoking\n",
1583
- "----------\n",
1584
- "No parties & No visitors\n",
1585
- "----------\n",
1586
- "No children under 10 & No pets & No smoking\n",
1587
- "----------\n",
1588
- "No parties & No pets & No visitors\n",
1589
- "----------\n",
1590
- "No pets & No children under 10\n",
1591
- "----------\n",
1592
- "No children under 10 & No parties & No pets\n",
1593
- "----------\n",
1594
- "No visitors\n",
1595
- "----------\n",
1596
- "No parties & No children under 10\n",
1597
- "----------\n",
1598
- "No children under 10 & No smoking & No visitors\n",
1599
- "----------\n",
1600
- "No children under 10 & No parties & No smoking\n",
1601
- "----------\n",
1602
- "No pets & No smoking & No children under 10\n",
1603
- "----------\n",
1604
- "No pets & No visitors\n",
1605
- "----------\n",
1606
- "No visitors & No pets\n",
1607
- "----------\n",
1608
- "No children under 10 & No smoking & No pets\n",
1609
- "----------\n",
1610
- "No smoking & No parties & No pets\n",
1611
- "----------\n",
1612
- "No visitors & No children under 10 & No parties\n",
1613
- "----------\n",
1614
- "No parties & No children under 10 & No smoking\n",
1615
- "----------\n",
1616
- "No visitors & No children under 10 & No smoking\n",
1617
- "----------\n",
1618
- "No pets & No parties\n",
1619
- "----------\n",
1620
- "No smoking & No parties\n",
1621
- "----------\n",
1622
- "No smoking & No children under 10\n",
1623
- "----------\n",
1624
- "No parties & No children under 10 & No visitors\n",
1625
- "----------\n",
1626
- "No children under 10 & No smoking\n",
1627
- "----------\n",
1628
- "No visitors & No pets & No smoking\n",
1629
- "----------\n",
1630
- "No pets\n",
1631
- "----------\n",
1632
- "No children under 10 & No pets\n",
1633
- "----------\n",
1634
- "No visitors & No smoking\n",
1635
- "----------\n",
1636
- "No smoking\n",
1637
- "----------\n",
1638
- "No parties & No smoking & No children under 10\n",
1639
- "----------\n",
1640
- "No parties & No smoking\n",
1641
- "----------\n",
1642
- "No smoking & No visitors & No parties\n",
1643
- "----------\n",
1644
- "No pets & No smoking\n",
1645
- "----------\n",
1646
- "No pets & No smoking & No parties\n",
1647
- "----------\n",
1648
- "No smoking & No children under 10 & No visitors\n",
1649
- "----------\n",
1650
- "No parties & No smoking & No visitors\n",
1651
- "----------\n",
1652
- "No visitors & No parties\n",
1653
- "----------\n",
1654
- "No visitors & No children under 10\n",
1655
- "----------\n",
1656
- "No parties & No smoking & No pets\n",
1657
- "----------\n",
1658
- "No children under 10 & No pets & No visitors\n",
1659
- "----------\n",
1660
- "No smoking & No pets & No parties\n",
1661
- "----------\n",
1662
- "No children under 10 & No smoking & No parties\n",
1663
- "----------\n",
1664
- "No visitors & No children under 10 & No pets\n",
1665
- "----------\n",
1666
- "No children under 10 & No parties\n",
1667
- "----------\n",
1668
- "No pets & No parties & No visitors\n",
1669
- "----------\n",
1670
- "No children under 10 & No visitors & No parties\n",
1671
- "----------\n",
1672
- "No parties & No pets\n",
1673
- "----------\n",
1674
- "No visitors & No parties & No pets\n",
1675
- "----------\n",
1676
- "No smoking & No pets & No visitors\n",
1677
- "----------\n",
1678
- "No smoking & No pets\n",
1679
- "----------\n",
1680
- "No visitors & No smoking & No children under 10\n",
1681
- "----------\n",
1682
- "No pets & No children under 10 & No parties\n",
1683
- "----------\n",
1684
- "No visitors & No pets & No children under 10\n",
1685
- "----------\n",
1686
- "No pets & No children under 10 & No smoking\n",
1687
- "----------\n",
1688
- "No parties & No visitors & No children under 10\n",
1689
- "----------\n",
1690
- "No pets & No smoking & No visitors\n",
1691
- "----------\n",
1692
- "No pets & No parties & No smoking\n",
1693
- "----------\n",
1694
- "No parties & No visitors & No smoking\n",
1695
- "----------\n",
1696
- "No pets & No visitors & No children under 10\n",
1697
- "----------\n",
1698
- "No parties & No visitors & No pets\n",
1699
- "----------\n",
1700
- "No children under 10\n",
1701
- "----------\n",
1702
- "No children under 10 & No pets & No parties\n",
1703
- "----------\n",
1704
- "No children under 10 & No visitors & No smoking\n",
1705
- "----------\n",
1706
- "No smoking & No children under 10 & No parties\n",
1707
- "----------\n",
1708
- "No pets & No parties & No children under 10\n",
1709
- "----------\n",
1710
- "No children under 10 & No visitors & No pets\n",
1711
- "----------\n",
1712
- "No parties & No pets & No smoking\n",
1713
- "----------\n",
1714
- "No pets & No children under 10 & No visitors\n",
1715
- "----------\n",
1716
- "No parties & No children under 10 & No pets\n",
1717
- "----------\n",
1718
- "No parties & No pets & No children under 10\n",
1719
- "----------\n",
1720
- "No smoking & No parties & No visitors\n",
1721
- "----------\n",
1722
- "No parties\n",
1723
- "----------\n",
1724
- "No visitors & No pets & No parties\n",
1725
- "----------\n",
1726
- "No children under 10 & No visitors\n",
1727
- "----------\n",
1728
- "No smoking & No children under 10 & No pets\n",
1729
- "----------\n",
1730
- "No smoking & No parties & No children under 10\n",
1731
- "----------\n",
1732
- "No visitors & No smoking & No parties\n",
1733
- "----------\n",
1734
- "No pets & No visitors & No parties\n",
1735
- "----------\n",
1736
- "No smoking & No visitors\n",
1737
- "----------\n",
1738
- "No smoking & No visitors & No children under 10\n",
1739
- "----------\n",
1740
- "No visitors & No smoking & No pets\n",
1741
- "----------\n",
1742
- "No smoking & No visitors & No pets\n",
1743
- "----------\n",
1744
- "No visitors & No parties & No smoking\n",
1745
- "----------\n",
1746
- "No smoking & No pets & No children under 10\n",
1747
- "----------\n",
1748
- "No children under 10 & No parties & No visitors\n",
1749
- "----------\n",
1750
- "No visitors & No parties & No children under 10\n",
1751
- "----------\n"
1752
- ]
1753
- }
1754
- ],
1755
- "source": [
1756
- "maximum_occupancy_set = set()\n",
1757
- "for unit in new_data:\n",
1758
- " maximum_occupancy_set.add(unit['house_rules'])\n",
1759
- "for unit in maximum_occupancy_set:\n",
1760
- " print(unit)\n",
1761
- " print(\"----------\")"
1762
- ]
1763
- },
1764
- {
1765
- "cell_type": "code",
1766
- "execution_count": 45,
1767
- "id": "8056052a",
1768
- "metadata": {},
1769
- "outputs": [
1770
- {
1771
- "data": {
1772
- "text/html": [
1773
- "<div>\n",
1774
- "<style scoped>\n",
1775
- " .dataframe tbody tr th:only-of-type {\n",
1776
- " vertical-align: middle;\n",
1777
- " }\n",
1778
- "\n",
1779
- " .dataframe tbody tr th {\n",
1780
- " vertical-align: top;\n",
1781
- " }\n",
1782
- "\n",
1783
- " .dataframe thead th {\n",
1784
- " text-align: right;\n",
1785
- " }\n",
1786
- "</style>\n",
1787
- "<table border=\"1\" class=\"dataframe\">\n",
1788
- " <thead>\n",
1789
- " <tr style=\"text-align: right;\">\n",
1790
- " <th></th>\n",
1791
- " <th>NAME</th>\n",
1792
- " <th>room type</th>\n",
1793
- " <th>price</th>\n",
1794
- " <th>minimum nights</th>\n",
1795
- " <th>review rate number</th>\n",
1796
- " <th>house_rules</th>\n",
1797
- " <th>maximum occupancy</th>\n",
1798
- " <th>city</th>\n",
1799
- " </tr>\n",
1800
- " </thead>\n",
1801
- " <tbody>\n",
1802
- " <tr>\n",
1803
- " <th>0</th>\n",
1804
- " <td>Beautiful room upper manhttn.</td>\n",
1805
- " <td>Private room</td>\n",
1806
- " <td>131.0</td>\n",
1807
- " <td>1.0</td>\n",
1808
- " <td>2.0</td>\n",
1809
- " <td>No smoking</td>\n",
1810
- " <td>1</td>\n",
1811
- " <td>Christiansted</td>\n",
1812
- " </tr>\n",
1813
- " <tr>\n",
1814
- " <th>1</th>\n",
1815
- " <td>Roomy and Comftable Room</td>\n",
1816
- " <td>Private room</td>\n",
1817
- " <td>548.0</td>\n",
1818
- " <td>10.0</td>\n",
1819
- " <td>5.0</td>\n",
1820
- " <td>No children under 10 &amp; No parties</td>\n",
1821
- " <td>2</td>\n",
1822
- " <td>Laredo</td>\n",
1823
- " </tr>\n",
1824
- " <tr>\n",
1825
- " <th>2</th>\n",
1826
- " <td>BIG room with bath &amp; balcony in BK!</td>\n",
1827
- " <td>Private room</td>\n",
1828
- " <td>1123.0</td>\n",
1829
- " <td>1.0</td>\n",
1830
- " <td>4.0</td>\n",
1831
- " <td>No parties</td>\n",
1832
- " <td>2</td>\n",
1833
- " <td>Louisville</td>\n",
1834
- " </tr>\n",
1835
- " <tr>\n",
1836
- " <th>3</th>\n",
1837
- " <td>4A-</td>\n",
1838
- " <td>Entire home/apt</td>\n",
1839
- " <td>225.0</td>\n",
1840
- " <td>30.0</td>\n",
1841
- " <td>4.0</td>\n",
1842
- " <td>No pets</td>\n",
1843
- " <td>3</td>\n",
1844
- " <td>Greensboro</td>\n",
1845
- " </tr>\n",
1846
- " <tr>\n",
1847
- " <th>4</th>\n",
1848
- " <td>Nice and Comfortable Private Room</td>\n",
1849
- " <td>Private room</td>\n",
1850
- " <td>761.0</td>\n",
1851
- " <td>2.0</td>\n",
1852
- " <td>1.0</td>\n",
1853
- " <td>No smoking &amp; No parties</td>\n",
1854
- " <td>2</td>\n",
1855
- " <td>Cape Girardeau</td>\n",
1856
- " </tr>\n",
1857
- " <tr>\n",
1858
- " <th>...</th>\n",
1859
- " <td>...</td>\n",
1860
- " <td>...</td>\n",
1861
- " <td>...</td>\n",
1862
- " <td>...</td>\n",
1863
- " <td>...</td>\n",
1864
- " <td>...</td>\n",
1865
- " <td>...</td>\n",
1866
- " <td>...</td>\n",
1867
- " </tr>\n",
1868
- " <tr>\n",
1869
- " <th>5042</th>\n",
1870
- " <td>Amazing LOFT in Prime Williamsburg</td>\n",
1871
- " <td>Private room</td>\n",
1872
- " <td>249.0</td>\n",
1873
- " <td>5.0</td>\n",
1874
- " <td>5.0</td>\n",
1875
- " <td>No pets</td>\n",
1876
- " <td>2</td>\n",
1877
- " <td>Trenton</td>\n",
1878
- " </tr>\n",
1879
- " <tr>\n",
1880
- " <th>5043</th>\n",
1881
- " <td>Private Queen Bedroom in Brooklyn</td>\n",
1882
- " <td>Private room</td>\n",
1883
- " <td>1032.0</td>\n",
1884
- " <td>1.0</td>\n",
1885
- " <td>1.0</td>\n",
1886
- " <td>No pets</td>\n",
1887
- " <td>1</td>\n",
1888
- " <td>Des Moines</td>\n",
1889
- " </tr>\n",
1890
- " <tr>\n",
1891
- " <th>5044</th>\n",
1892
- " <td>Bushwick / Bed Sty Retreat</td>\n",
1893
- " <td>Private room</td>\n",
1894
- " <td>546.0</td>\n",
1895
- " <td>2.0</td>\n",
1896
- " <td>4.0</td>\n",
1897
- " <td>No children under 10 &amp; No visitors &amp; No smoking</td>\n",
1898
- " <td>2</td>\n",
1899
- " <td>Scottsbluff</td>\n",
1900
- " </tr>\n",
1901
- " <tr>\n",
1902
- " <th>5045</th>\n",
1903
- " <td>Charming Mid-Century Studio</td>\n",
1904
- " <td>Entire home/apt</td>\n",
1905
- " <td>1115.0</td>\n",
1906
- " <td>2.0</td>\n",
1907
- " <td>5.0</td>\n",
1908
- " <td>No pets &amp; No children under 10</td>\n",
1909
- " <td>7</td>\n",
1910
- " <td>Butte</td>\n",
1911
- " </tr>\n",
1912
- " <tr>\n",
1913
- " <th>5046</th>\n",
1914
- " <td>3 Bed/ 2 Bath Full Apt. BK Heights</td>\n",
1915
- " <td>Entire home/apt</td>\n",
1916
- " <td>396.0</td>\n",
1917
- " <td>2.0</td>\n",
1918
- " <td>1.0</td>\n",
1919
- " <td>No smoking</td>\n",
1920
- " <td>3</td>\n",
1921
- " <td>Norfolk</td>\n",
1922
- " </tr>\n",
1923
- " </tbody>\n",
1924
- "</table>\n",
1925
- "<p>5047 rows × 8 columns</p>\n",
1926
- "</div>"
1927
- ],
1928
- "text/plain": [
1929
- " NAME room type price \n",
1930
- "0 Beautiful room upper manhttn. Private room 131.0 \\\n",
1931
- "1 Roomy and Comftable Room Private room 548.0 \n",
1932
- "2 BIG room with bath & balcony in BK! Private room 1123.0 \n",
1933
- "3 4A- Entire home/apt 225.0 \n",
1934
- "4 Nice and Comfortable Private Room Private room 761.0 \n",
1935
- "... ... ... ... \n",
1936
- "5042 Amazing LOFT in Prime Williamsburg Private room 249.0 \n",
1937
- "5043 Private Queen Bedroom in Brooklyn Private room 1032.0 \n",
1938
- "5044 Bushwick / Bed Sty Retreat Private room 546.0 \n",
1939
- "5045 Charming Mid-Century Studio Entire home/apt 1115.0 \n",
1940
- "5046 3 Bed/ 2 Bath Full Apt. BK Heights Entire home/apt 396.0 \n",
1941
- "\n",
1942
- " minimum nights review rate number \n",
1943
- "0 1.0 2.0 \\\n",
1944
- "1 10.0 5.0 \n",
1945
- "2 1.0 4.0 \n",
1946
- "3 30.0 4.0 \n",
1947
- "4 2.0 1.0 \n",
1948
- "... ... ... \n",
1949
- "5042 5.0 5.0 \n",
1950
- "5043 1.0 1.0 \n",
1951
- "5044 2.0 4.0 \n",
1952
- "5045 2.0 5.0 \n",
1953
- "5046 2.0 1.0 \n",
1954
- "\n",
1955
- " house_rules maximum occupancy \n",
1956
- "0 No smoking 1 \\\n",
1957
- "1 No children under 10 & No parties 2 \n",
1958
- "2 No parties 2 \n",
1959
- "3 No pets 3 \n",
1960
- "4 No smoking & No parties 2 \n",
1961
- "... ... ... \n",
1962
- "5042 No pets 2 \n",
1963
- "5043 No pets 1 \n",
1964
- "5044 No children under 10 & No visitors & No smoking 2 \n",
1965
- "5045 No pets & No children under 10 7 \n",
1966
- "5046 No smoking 3 \n",
1967
- "\n",
1968
- " city \n",
1969
- "0 Christiansted \n",
1970
- "1 Laredo \n",
1971
- "2 Louisville \n",
1972
- "3 Greensboro \n",
1973
- "4 Cape Girardeau \n",
1974
- "... ... \n",
1975
- "5042 Trenton \n",
1976
- "5043 Des Moines \n",
1977
- "5044 Scottsbluff \n",
1978
- "5045 Butte \n",
1979
- "5046 Norfolk \n",
1980
- "\n",
1981
- "[5047 rows x 8 columns]"
1982
- ]
1983
- },
1984
- "execution_count": 45,
1985
- "metadata": {},
1986
- "output_type": "execute_result"
1987
- }
1988
- ],
1989
- "source": [
1990
- "df"
1991
- ]
1992
- },
1993
- {
1994
- "cell_type": "code",
1995
- "execution_count": 44,
1996
- "id": "54423e0d",
1997
- "metadata": {},
1998
- "outputs": [],
1999
- "source": [
2000
- "df = pd.DataFrame(new_data)\n",
2001
- "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
2002
- ]
2003
- },
2004
- {
2005
- "cell_type": "code",
2006
- "execution_count": null,
2007
- "id": "5767aa80",
2008
- "metadata": {},
2009
- "outputs": [],
2010
- "source": [
2011
- "df.rename(columns={'old_name1': 'new_name1', 'old_name2': 'new_name2'}, inplace=True)\n",
2012
- "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
2013
- ]
2014
- }
2015
- ],
2016
- "metadata": {
2017
- "kernelspec": {
2018
- "display_name": "Python 3 (ipykernel)",
2019
- "language": "python",
2020
- "name": "python3"
2021
- },
2022
- "language_info": {
2023
- "codemirror_mode": {
2024
- "name": "ipython",
2025
- "version": 3
2026
- },
2027
- "file_extension": ".py",
2028
- "mimetype": "text/x-python",
2029
- "name": "python",
2030
- "nbconvert_exporter": "python",
2031
- "pygments_lexer": "ipython3",
2032
- "version": "3.9.16"
2033
- }
2034
- },
2035
- "nbformat": 4,
2036
- "nbformat_minor": 5
2037
- }