hsaest commited on
Commit
e6a4db6
1 Parent(s): d278fbb

Delete tools/restaurants/test.ipynb

Browse files
Files changed (1) hide show
  1. tools/restaurants/test.ipynb +0 -1152
tools/restaurants/test.ipynb DELETED
@@ -1,1152 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 50,
6
- "id": "1f939e73",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import pandas as pd\n",
11
- "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/restaurants/zomato.csv')"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 51,
17
- "id": "876e4fff",
18
- "metadata": {},
19
- "outputs": [],
20
- "source": [
21
- "data_dict = data.to_dict(orient = 'split')"
22
- ]
23
- },
24
- {
25
- "cell_type": "code",
26
- "execution_count": 52,
27
- "id": "dbaee06c",
28
- "metadata": {},
29
- "outputs": [
30
- {
31
- "data": {
32
- "text/plain": [
33
- "['Restaurant ID',\n",
34
- " 'Restaurant Name',\n",
35
- " 'Country Code',\n",
36
- " 'City',\n",
37
- " 'Address',\n",
38
- " 'Locality',\n",
39
- " 'Locality Verbose',\n",
40
- " 'Longitude',\n",
41
- " 'Latitude',\n",
42
- " 'Cuisines',\n",
43
- " 'Average Cost for two',\n",
44
- " 'Currency',\n",
45
- " 'Has Table booking',\n",
46
- " 'Has Online delivery',\n",
47
- " 'Is delivering now',\n",
48
- " 'Switch to order menu',\n",
49
- " 'Price range',\n",
50
- " 'Aggregate rating',\n",
51
- " 'Rating color',\n",
52
- " 'Rating text',\n",
53
- " 'Votes']"
54
- ]
55
- },
56
- "execution_count": 52,
57
- "metadata": {},
58
- "output_type": "execute_result"
59
- }
60
- ],
61
- "source": [
62
- "data_dict['columns']"
63
- ]
64
- },
65
- {
66
- "cell_type": "code",
67
- "execution_count": 53,
68
- "id": "cb540128",
69
- "metadata": {},
70
- "outputs": [
71
- {
72
- "data": {
73
- "text/plain": [
74
- "9551"
75
- ]
76
- },
77
- "execution_count": 53,
78
- "metadata": {},
79
- "output_type": "execute_result"
80
- }
81
- ],
82
- "source": [
83
- "len(data_dict['data'])"
84
- ]
85
- },
86
- {
87
- "cell_type": "code",
88
- "execution_count": 14,
89
- "id": "ea9858c5",
90
- "metadata": {},
91
- "outputs": [
92
- {
93
- "data": {
94
- "text/plain": [
95
- "[6600970,\n",
96
- " 'Pizza 礞 Bessa',\n",
97
- " 30,\n",
98
- " 'Bras韄lia',\n",
99
- " 'SCS 214, Bloco C, Loja 40, Asa Sul, Bras韄lia',\n",
100
- " 'Asa Sul',\n",
101
- " 'Asa Sul, Bras韄lia',\n",
102
- " -47.91566667,\n",
103
- " -15.83116667,\n",
104
- " 'Pizza',\n",
105
- " 50,\n",
106
- " 'Brazilian Real(R$)',\n",
107
- " 'No',\n",
108
- " 'No',\n",
109
- " 'No',\n",
110
- " 'No',\n",
111
- " 2,\n",
112
- " 3.2,\n",
113
- " 'Orange',\n",
114
- " 'Average',\n",
115
- " 11]"
116
- ]
117
- },
118
- "execution_count": 14,
119
- "metadata": {},
120
- "output_type": "execute_result"
121
- }
122
- ],
123
- "source": [
124
- "data_dict['data'][26]"
125
- ]
126
- },
127
- {
128
- "cell_type": "code",
129
- "execution_count": 9,
130
- "id": "e21af5d1",
131
- "metadata": {},
132
- "outputs": [],
133
- "source": [
134
- "flight = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')"
135
- ]
136
- },
137
- {
138
- "cell_type": "code",
139
- "execution_count": 10,
140
- "id": "966feef9",
141
- "metadata": {},
142
- "outputs": [],
143
- "source": [
144
- "flight = flight.to_dict(orient = 'split')"
145
- ]
146
- },
147
- {
148
- "cell_type": "code",
149
- "execution_count": 93,
150
- "id": "c5f81f43",
151
- "metadata": {},
152
- "outputs": [],
153
- "source": [
154
- "city_set = open('/home/xj/toolAugEnv/code/toolConstraint/database/background/citySet.txt','r').read().strip().split('\\n')"
155
- ]
156
- },
157
- {
158
- "cell_type": "code",
159
- "execution_count": 94,
160
- "id": "bfce5f56",
161
- "metadata": {},
162
- "outputs": [
163
- {
164
- "data": {
165
- "text/plain": [
166
- "['San Diego',\n",
167
- " 'Pellston',\n",
168
- " 'Buffalo',\n",
169
- " 'Charlotte Amalie',\n",
170
- " 'Flagstaff',\n",
171
- " 'Evansville',\n",
172
- " 'Hilo',\n",
173
- " 'Twin Falls',\n",
174
- " 'Newark',\n",
175
- " 'State College',\n",
176
- " 'Johnstown',\n",
177
- " 'Charleston',\n",
178
- " 'Montgomery',\n",
179
- " 'Redding',\n",
180
- " 'Lynchburg',\n",
181
- " 'South Bend',\n",
182
- " 'Sarasota',\n",
183
- " 'Sioux Falls',\n",
184
- " 'Paducah',\n",
185
- " 'Kahului',\n",
186
- " 'Atlantic City',\n",
187
- " 'Bemidji',\n",
188
- " 'Toledo',\n",
189
- " 'Abilene',\n",
190
- " 'Sacramento',\n",
191
- " 'Amarillo',\n",
192
- " 'Moline',\n",
193
- " 'Hilton Head',\n",
194
- " 'Manhattan',\n",
195
- " 'Minneapolis',\n",
196
- " 'Fort Myers',\n",
197
- " 'Roswell',\n",
198
- " 'Harlingen',\n",
199
- " 'Seattle',\n",
200
- " 'Manchester',\n",
201
- " 'Gulfport',\n",
202
- " 'Gainesville',\n",
203
- " 'Pago Pago',\n",
204
- " 'Wrangell',\n",
205
- " 'Augusta',\n",
206
- " 'Waterloo',\n",
207
- " 'Yuma',\n",
208
- " 'Saipan',\n",
209
- " 'Christiansted',\n",
210
- " 'North Bend',\n",
211
- " 'Richmond',\n",
212
- " 'Albuquerque',\n",
213
- " 'Nashville',\n",
214
- " 'Aberdeen',\n",
215
- " 'Harrisburg',\n",
216
- " 'Fort Wayne',\n",
217
- " 'Green Bay',\n",
218
- " 'Wenatchee',\n",
219
- " 'Santa Fe',\n",
220
- " 'St. Petersburg',\n",
221
- " 'Belleville',\n",
222
- " 'Greensboro',\n",
223
- " 'Lake Charles',\n",
224
- " 'Traverse City',\n",
225
- " 'Erie',\n",
226
- " 'Niagara Falls',\n",
227
- " 'Pocatello',\n",
228
- " 'Idaho Falls',\n",
229
- " 'Alpena',\n",
230
- " 'Wilmington',\n",
231
- " 'Ontario',\n",
232
- " 'Iron Mountain',\n",
233
- " 'Lubbock',\n",
234
- " 'Helena',\n",
235
- " 'Kalamazoo',\n",
236
- " 'Cleveland',\n",
237
- " 'Grand Island',\n",
238
- " 'Bishop',\n",
239
- " 'New Bern',\n",
240
- " 'Melbourne',\n",
241
- " 'Bristol',\n",
242
- " 'Orlando',\n",
243
- " 'Bismarck',\n",
244
- " 'Fresno',\n",
245
- " 'Billings',\n",
246
- " 'Jackson',\n",
247
- " 'Daytona Beach',\n",
248
- " 'College Station',\n",
249
- " 'Jacksonville',\n",
250
- " 'Salt Lake City',\n",
251
- " 'Corpus Christi',\n",
252
- " 'Florence',\n",
253
- " 'Moab',\n",
254
- " 'Grand Forks',\n",
255
- " 'Las Vegas',\n",
256
- " 'Fairbanks',\n",
257
- " 'Petersburg',\n",
258
- " 'Wichita',\n",
259
- " 'Rhinelander',\n",
260
- " 'Kansas City',\n",
261
- " 'Dothan',\n",
262
- " 'Alamosa',\n",
263
- " 'Adak Island',\n",
264
- " 'Islip',\n",
265
- " 'Wichita Falls',\n",
266
- " 'Presque Isle',\n",
267
- " 'San Luis Obispo',\n",
268
- " 'Dayton',\n",
269
- " 'Brunswick',\n",
270
- " 'Fort Smith',\n",
271
- " \"Martha's Vineyard\",\n",
272
- " 'Portland',\n",
273
- " 'Waco',\n",
274
- " 'New York',\n",
275
- " 'Columbus',\n",
276
- " 'Tampa',\n",
277
- " 'Dallas',\n",
278
- " 'Little Rock',\n",
279
- " 'Kona',\n",
280
- " 'Clarksburg',\n",
281
- " 'San Angelo',\n",
282
- " 'Saginaw',\n",
283
- " 'Houston',\n",
284
- " 'Duluth',\n",
285
- " 'Valparaiso',\n",
286
- " 'Phoenix',\n",
287
- " 'Oakland',\n",
288
- " 'Watertown',\n",
289
- " 'Ogden',\n",
290
- " 'Cedar Rapids',\n",
291
- " 'Cape Girardeau',\n",
292
- " 'Sun Valley',\n",
293
- " 'Sault Ste. Marie',\n",
294
- " 'Trenton',\n",
295
- " 'Missoula',\n",
296
- " 'Pasco',\n",
297
- " 'Brainerd',\n",
298
- " 'Newburgh',\n",
299
- " 'Gustavus',\n",
300
- " 'Branson',\n",
301
- " 'Providence',\n",
302
- " 'Minot',\n",
303
- " 'Huntsville',\n",
304
- " 'San Antonio',\n",
305
- " 'Marquette',\n",
306
- " 'Owensboro',\n",
307
- " 'Del Rio',\n",
308
- " 'Portsmouth',\n",
309
- " 'Bloomington',\n",
310
- " 'Lexington',\n",
311
- " 'Santa Barbara',\n",
312
- " 'Baltimore',\n",
313
- " 'Panama City',\n",
314
- " 'Kodiak',\n",
315
- " 'Jacksonville',\n",
316
- " 'Yakima',\n",
317
- " 'Vernal',\n",
318
- " 'Salisbury',\n",
319
- " 'Mission',\n",
320
- " 'Newport News',\n",
321
- " 'Charlottesville',\n",
322
- " 'Grand Junction',\n",
323
- " 'Baton Rouge',\n",
324
- " 'Beaumont',\n",
325
- " 'Staunton',\n",
326
- " 'Kalispell',\n",
327
- " 'Key West',\n",
328
- " 'Worcester',\n",
329
- " 'West Palm Beach',\n",
330
- " 'Boise',\n",
331
- " 'Grand Rapids',\n",
332
- " 'Salina',\n",
333
- " 'Fort Leonard Wood',\n",
334
- " 'Walla Walla',\n",
335
- " 'Everett',\n",
336
- " 'Dillingham',\n",
337
- " 'Bellingham',\n",
338
- " 'Lansing',\n",
339
- " 'Madison',\n",
340
- " 'Victoria',\n",
341
- " 'Sioux City',\n",
342
- " 'Hattiesburg',\n",
343
- " 'Stockton',\n",
344
- " 'Anchorage',\n",
345
- " 'Charlotte',\n",
346
- " 'Jamestown',\n",
347
- " 'Laramie',\n",
348
- " 'Decatur',\n",
349
- " 'Durango',\n",
350
- " 'Longview',\n",
351
- " 'Syracuse',\n",
352
- " 'St. Cloud',\n",
353
- " 'Santa Rosa',\n",
354
- " 'Bakersfield',\n",
355
- " 'North Platte',\n",
356
- " 'La Crosse',\n",
357
- " 'Plattsburgh',\n",
358
- " 'Concord',\n",
359
- " 'Atlanta',\n",
360
- " 'Provo',\n",
361
- " 'Ogdensburg',\n",
362
- " 'Ithaca',\n",
363
- " 'Colorado Springs',\n",
364
- " 'Washington',\n",
365
- " 'Williston',\n",
366
- " 'Tulsa',\n",
367
- " 'Midland',\n",
368
- " 'Champaign',\n",
369
- " 'Devils Lake',\n",
370
- " 'Greer',\n",
371
- " 'Muskegon',\n",
372
- " 'Hibbing',\n",
373
- " 'Santa Ana',\n",
374
- " 'Ponce',\n",
375
- " 'Prescott',\n",
376
- " 'Indianapolis',\n",
377
- " 'International Falls',\n",
378
- " 'Rapid City',\n",
379
- " 'Ketchikan',\n",
380
- " 'St. Louis',\n",
381
- " 'Santa Maria',\n",
382
- " 'Elmira',\n",
383
- " 'Alexandria',\n",
384
- " 'San Jose',\n",
385
- " 'Tucson',\n",
386
- " 'San Juan',\n",
387
- " 'Dubuque',\n",
388
- " 'Burbank',\n",
389
- " 'Gunnison',\n",
390
- " 'Cedar City',\n",
391
- " 'Hyannis',\n",
392
- " 'Raleigh',\n",
393
- " 'Norfolk',\n",
394
- " 'New Orleans',\n",
395
- " 'Medford',\n",
396
- " 'White Plains',\n",
397
- " 'Oklahoma City',\n",
398
- " 'Chicago',\n",
399
- " 'El Paso',\n",
400
- " 'Rockford',\n",
401
- " 'Aguadilla',\n",
402
- " 'Omaha',\n",
403
- " 'Scottsbluff',\n",
404
- " 'Yakutat',\n",
405
- " 'Arcata',\n",
406
- " 'Spokane',\n",
407
- " 'Brownsville',\n",
408
- " 'Bend',\n",
409
- " 'Hagerstown',\n",
410
- " 'Peoria',\n",
411
- " 'Appleton',\n",
412
- " 'Roanoke',\n",
413
- " 'Eugene',\n",
414
- " 'Rock Springs',\n",
415
- " 'Dodge City',\n",
416
- " 'Austin',\n",
417
- " 'Miami',\n",
418
- " 'Dallas',\n",
419
- " 'Mosinee',\n",
420
- " 'Killeen',\n",
421
- " 'Lihue',\n",
422
- " 'Pittsburgh',\n",
423
- " 'Tallahassee',\n",
424
- " 'Butte',\n",
425
- " 'Lawton',\n",
426
- " 'Honolulu',\n",
427
- " 'Greenville',\n",
428
- " 'Juneau',\n",
429
- " 'Myrtle Beach',\n",
430
- " 'Boston',\n",
431
- " 'Charleston',\n",
432
- " 'Latrobe',\n",
433
- " 'Knoxville',\n",
434
- " 'Denver',\n",
435
- " 'Bangor',\n",
436
- " 'Albany',\n",
437
- " 'Punta Gorda',\n",
438
- " 'Fort Lauderdale',\n",
439
- " 'Philadelphia',\n",
440
- " 'Binghamton',\n",
441
- " 'Great Falls',\n",
442
- " 'Shreveport',\n",
443
- " 'Asheville',\n",
444
- " 'Cheyenne',\n",
445
- " 'Milwaukee',\n",
446
- " 'Nome',\n",
447
- " 'Laredo',\n",
448
- " 'Des Moines',\n",
449
- " 'Fayetteville',\n",
450
- " 'Lewisburg',\n",
451
- " 'Fort Dodge',\n",
452
- " 'Cody',\n",
453
- " 'Chattanooga',\n",
454
- " 'Deadhorse',\n",
455
- " 'Kotzebue',\n",
456
- " 'Sitka',\n",
457
- " 'Bozeman',\n",
458
- " 'Palm Springs',\n",
459
- " 'Memphis',\n",
460
- " 'Nantucket',\n",
461
- " 'Texarkana',\n",
462
- " 'Lewiston',\n",
463
- " 'Valdosta',\n",
464
- " 'Birmingham',\n",
465
- " 'Scranton',\n",
466
- " 'Pensacola',\n",
467
- " 'Hancock',\n",
468
- " 'Los Angeles',\n",
469
- " 'Mason City',\n",
470
- " 'Savannah',\n",
471
- " 'West Yellowstone',\n",
472
- " 'Long Beach',\n",
473
- " 'Reno',\n",
474
- " 'Akron',\n",
475
- " 'Louisville',\n",
476
- " 'Hartford',\n",
477
- " 'Cincinnati',\n",
478
- " 'Rochester',\n",
479
- " 'San Francisco',\n",
480
- " 'Detroit',\n",
481
- " 'Monterey',\n",
482
- " 'Escanaba',\n",
483
- " 'Eau Claire']"
484
- ]
485
- },
486
- "execution_count": 94,
487
- "metadata": {},
488
- "output_type": "execute_result"
489
- }
490
- ],
491
- "source": [
492
- "city_set"
493
- ]
494
- },
495
- {
496
- "cell_type": "code",
497
- "execution_count": 16,
498
- "id": "cd0f41fb",
499
- "metadata": {},
500
- "outputs": [
501
- {
502
- "name": "stdout",
503
- "output_type": "stream",
504
- "text": [
505
- "1 Restaurant Name\n",
506
- "3 City\n",
507
- "9 Cuisines\n",
508
- "10 Average Cost for two\n",
509
- "11 Currency\n",
510
- "17 Aggregate rating\n"
511
- ]
512
- }
513
- ],
514
- "source": [
515
- "for idx, unit in enumerate(data_dict['columns']):\n",
516
- " if unit in ['Restaurant Name', 'City', 'Cuisines', 'Average Cost for two','Aggregate rating','Currency']:\n",
517
- " print(idx,unit)"
518
- ]
519
- },
520
- {
521
- "cell_type": "code",
522
- "execution_count": 17,
523
- "id": "04fe71b7",
524
- "metadata": {},
525
- "outputs": [],
526
- "source": [
527
- "currency_set = set()\n",
528
- "for unit in data_dict['data']:\n",
529
- " currency_set.add(unit[11])"
530
- ]
531
- },
532
- {
533
- "cell_type": "code",
534
- "execution_count": 18,
535
- "id": "3988186d",
536
- "metadata": {},
537
- "outputs": [
538
- {
539
- "data": {
540
- "text/plain": [
541
- "{'Botswana Pula(P)',\n",
542
- " 'Brazilian Real(R$)',\n",
543
- " 'Dollar($)',\n",
544
- " 'Emirati Diram(AED)',\n",
545
- " 'Indian Rupees(Rs.)',\n",
546
- " 'Indonesian Rupiah(IDR)',\n",
547
- " 'NewZealand($)',\n",
548
- " 'Pounds(專)',\n",
549
- " 'Qatari Rial(QR)',\n",
550
- " 'Rand(R)',\n",
551
- " 'Sri Lankan Rupee(LKR)',\n",
552
- " 'Turkish Lira(TL)'}"
553
- ]
554
- },
555
- "execution_count": 18,
556
- "metadata": {},
557
- "output_type": "execute_result"
558
- }
559
- ],
560
- "source": [
561
- "currency_set"
562
- ]
563
- },
564
- {
565
- "cell_type": "code",
566
- "execution_count": 20,
567
- "id": "257e6a76",
568
- "metadata": {},
569
- "outputs": [],
570
- "source": [
571
- "exchange_rate = {\"Botswana Pula(P)\":0.074,\n",
572
- " \"Brazilian Real(R$)\":0.21, \n",
573
- " 'Dollar($)':1, \n",
574
- " 'Emirati Diram(AED)':0.27,\n",
575
- " \"Indian Rupees(Rs.)\":0.012087,\n",
576
- " \"Indonesian Rupiah(IDR)\":0.000066,\n",
577
- " 'NewZealand($)':0.61,\n",
578
- " \"Pounds(專)\":1.28,\n",
579
- " \"Qatari Rial(QR)\":0.27,\n",
580
- " 'Rand(R)': 0.054,\n",
581
- " \"Sri Lankan Rupee(LKR)\":0.0031,\n",
582
- " 'Turkish Lira(TL)':0.037\n",
583
- " }"
584
- ]
585
- },
586
- {
587
- "cell_type": "code",
588
- "execution_count": 136,
589
- "id": "c6b2691e",
590
- "metadata": {},
591
- "outputs": [
592
- {
593
- "data": {
594
- "application/vnd.jupyter.widget-view+json": {
595
- "model_id": "b7890e2caa7340d1870e641ada3249e1",
596
- "version_major": 2,
597
- "version_minor": 0
598
- },
599
- "text/plain": [
600
- "0it [00:00, ?it/s]"
601
- ]
602
- },
603
- "metadata": {},
604
- "output_type": "display_data"
605
- }
606
- ],
607
- "source": [
608
- "from tqdm.autonotebook import tqdm\n",
609
- "import random\n",
610
- "new_data = []\n",
611
- "\n",
612
- "for idx, unit in tqdm(enumerate(data_dict['data'])):\n",
613
- " tmp_dict = {k:\"\" for k in ['Name', 'City', 'Cuisines', 'Average Cost','Aggregate Rating']}\n",
614
- " tmp_dict[\"Name\"] = unit[1]\n",
615
- " tmp_dict[\"City\"] = random.sample(city_set,1)[0]\n",
616
- " tmp_dict[\"Cuisines\"] = unit[9]\n",
617
- " tmp_dict[\"Average Cost\"] = max(random.randint(10,100),int(unit[10] / 2 * exchange_rate[unit[11]]))\n",
618
- " tmp_dict[\"Aggregate Rating\"] = unit[17]\n",
619
- " new_data.append(tmp_dict)"
620
- ]
621
- },
622
- {
623
- "cell_type": "code",
624
- "execution_count": 137,
625
- "id": "f27aaff1",
626
- "metadata": {},
627
- "outputs": [],
628
- "source": [
629
- "countries = [\"Chinese\", \"American\", \"Italian\", \"Mexican\", \"Indian\",\"Mediterranean\",\"French\"]\n",
630
- "cuisine = [\"Tea\",\"Seafood\",\"Bakery\",\"Desserts\",\"BBQ\",\"Fast Food\",\"Cafe\",\"Pizza\"]\n",
631
- "total_cuisine = countries + cuisine\n",
632
- "for unit in new_data:\n",
633
- " flag = False\n",
634
- " final_cuisine = set()\n",
635
- "# for c in total_cuisine:\n",
636
- "# if c in str(unit['Cuisines']):\n",
637
- "# final_cuisine.add(c)\n",
638
- " choice_number = random.choices([0,1,1,2])[0]\n",
639
- " for x in random.sample(countries,choice_number):\n",
640
- " final_cuisine.add(x)\n",
641
- " choice_number = random.choices([2,3,4])[0]\n",
642
- " for x in random.sample(cuisine,choice_number):\n",
643
- " final_cuisine.add(x)\n",
644
- " unit['Cuisines'] = \", \".join(x for x in final_cuisine)"
645
- ]
646
- },
647
- {
648
- "cell_type": "code",
649
- "execution_count": 134,
650
- "id": "8388274c",
651
- "metadata": {},
652
- "outputs": [
653
- {
654
- "name": "stdout",
655
- "output_type": "stream",
656
- "text": [
657
- "1\n"
658
- ]
659
- }
660
- ],
661
- "source": [
662
- "choice_number = random.choices([1,1,2])[0]\n",
663
- "print(choice_number)"
664
- ]
665
- },
666
- {
667
- "cell_type": "code",
668
- "execution_count": 149,
669
- "id": "6eb0520a",
670
- "metadata": {},
671
- "outputs": [
672
- {
673
- "data": {
674
- "text/plain": [
675
- "[1]"
676
- ]
677
- },
678
- "execution_count": 149,
679
- "metadata": {},
680
- "output_type": "execute_result"
681
- }
682
- ],
683
- "source": [
684
- "random.choices([1,1,2])"
685
- ]
686
- },
687
- {
688
- "cell_type": "code",
689
- "execution_count": 148,
690
- "id": "9e3afb30",
691
- "metadata": {},
692
- "outputs": [
693
- {
694
- "data": {
695
- "text/plain": [
696
- "{'Name': 'Gurgaon Hights',\n",
697
- " 'City': 'New York',\n",
698
- " 'Cuisines': 'Cafe, American, Indian, Fast Food',\n",
699
- " 'Average Cost': 46,\n",
700
- " 'Aggregate Rating': 2.5}"
701
- ]
702
- },
703
- "execution_count": 148,
704
- "metadata": {},
705
- "output_type": "execute_result"
706
- }
707
- ],
708
- "source": [
709
- "new_data[1357]"
710
- ]
711
- },
712
- {
713
- "cell_type": "code",
714
- "execution_count": 143,
715
- "id": "bfb243c0",
716
- "metadata": {},
717
- "outputs": [],
718
- "source": [
719
- "df = pd.DataFrame(new_data)"
720
- ]
721
- },
722
- {
723
- "cell_type": "code",
724
- "execution_count": 144,
725
- "id": "af7e3411",
726
- "metadata": {},
727
- "outputs": [],
728
- "source": [
729
- "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/restaurants/clean_restaurant_2022.csv')"
730
- ]
731
- },
732
- {
733
- "cell_type": "code",
734
- "execution_count": 128,
735
- "id": "dad9bf9f",
736
- "metadata": {},
737
- "outputs": [
738
- {
739
- "data": {
740
- "text/html": [
741
- "<div>\n",
742
- "<style scoped>\n",
743
- " .dataframe tbody tr th:only-of-type {\n",
744
- " vertical-align: middle;\n",
745
- " }\n",
746
- "\n",
747
- " .dataframe tbody tr th {\n",
748
- " vertical-align: top;\n",
749
- " }\n",
750
- "\n",
751
- " .dataframe thead th {\n",
752
- " text-align: right;\n",
753
- " }\n",
754
- "</style>\n",
755
- "<table border=\"1\" class=\"dataframe\">\n",
756
- " <thead>\n",
757
- " <tr style=\"text-align: right;\">\n",
758
- " <th></th>\n",
759
- " <th>Name</th>\n",
760
- " <th>City</th>\n",
761
- " <th>Cuisines</th>\n",
762
- " <th>Average Cost</th>\n",
763
- " <th>Aggregate Rating</th>\n",
764
- " </tr>\n",
765
- " </thead>\n",
766
- " <tbody>\n",
767
- " <tr>\n",
768
- " <th>0</th>\n",
769
- " <td>Le Petit Souffle</td>\n",
770
- " <td>Concord</td>\n",
771
- " <td>French, BBQ, Desserts, Fast Food</td>\n",
772
- " <td>45</td>\n",
773
- " <td>4.8</td>\n",
774
- " </tr>\n",
775
- " <tr>\n",
776
- " <th>1</th>\n",
777
- " <td>Izakaya Kikufuji</td>\n",
778
- " <td>Niagara Falls</td>\n",
779
- " <td>Mediterranean, Desserts, Seafood</td>\n",
780
- " <td>44</td>\n",
781
- " <td>4.5</td>\n",
782
- " </tr>\n",
783
- " <tr>\n",
784
- " <th>2</th>\n",
785
- " <td>Heat - Edsa Shangri-La</td>\n",
786
- " <td>Walla Walla</td>\n",
787
- " <td>Italian, BBQ, Fast Food, Cafe, Indian, Seafood</td>\n",
788
- " <td>148</td>\n",
789
- " <td>4.4</td>\n",
790
- " </tr>\n",
791
- " <tr>\n",
792
- " <th>3</th>\n",
793
- " <td>Ooma</td>\n",
794
- " <td>Salt Lake City</td>\n",
795
- " <td>Pizza, Italian, Bakery, Cafe, Seafood</td>\n",
796
- " <td>55</td>\n",
797
- " <td>4.9</td>\n",
798
- " </tr>\n",
799
- " <tr>\n",
800
- " <th>4</th>\n",
801
- " <td>Sambo Kojin</td>\n",
802
- " <td>Rochester</td>\n",
803
- " <td>Tea, Pizza, French, Cafe, Mediterranean, Seafood</td>\n",
804
- " <td>88</td>\n",
805
- " <td>4.8</td>\n",
806
- " </tr>\n",
807
- " <tr>\n",
808
- " <th>...</th>\n",
809
- " <td>...</td>\n",
810
- " <td>...</td>\n",
811
- " <td>...</td>\n",
812
- " <td>...</td>\n",
813
- " <td>...</td>\n",
814
- " </tr>\n",
815
- " <tr>\n",
816
- " <th>9546</th>\n",
817
- " <td>Naml郾 Gurme</td>\n",
818
- " <td>Minneapolis</td>\n",
819
- " <td>Tea, American, Desserts</td>\n",
820
- " <td>84</td>\n",
821
- " <td>4.1</td>\n",
822
- " </tr>\n",
823
- " <tr>\n",
824
- " <th>9547</th>\n",
825
- " <td>Ceviz A埕ac郾</td>\n",
826
- " <td>Waco</td>\n",
827
- " <td>Tea, Cafe, BBQ, Mediterranean</td>\n",
828
- " <td>58</td>\n",
829
- " <td>4.2</td>\n",
830
- " </tr>\n",
831
- " <tr>\n",
832
- " <th>9548</th>\n",
833
- " <td>Huqqa</td>\n",
834
- " <td>Chicago</td>\n",
835
- " <td>Tea, Chinese, Bakery, Italian</td>\n",
836
- " <td>13</td>\n",
837
- " <td>3.7</td>\n",
838
- " </tr>\n",
839
- " <tr>\n",
840
- " <th>9549</th>\n",
841
- " <td>A侓侓k Kahve</td>\n",
842
- " <td>Grand Rapids</td>\n",
843
- " <td>Cafe, French, Bakery, Fast Food</td>\n",
844
- " <td>30</td>\n",
845
- " <td>4.0</td>\n",
846
- " </tr>\n",
847
- " <tr>\n",
848
- " <th>9550</th>\n",
849
- " <td>Walter's Coffee Roastery</td>\n",
850
- " <td>Hibbing</td>\n",
851
- " <td>Pizza, Mexican, Bakery, Cafe, Seafood</td>\n",
852
- " <td>20</td>\n",
853
- " <td>4.0</td>\n",
854
- " </tr>\n",
855
- " </tbody>\n",
856
- "</table>\n",
857
- "<p>9551 rows × 5 columns</p>\n",
858
- "</div>"
859
- ],
860
- "text/plain": [
861
- " Name City \\\n",
862
- "0 Le Petit Souffle Concord \n",
863
- "1 Izakaya Kikufuji Niagara Falls \n",
864
- "2 Heat - Edsa Shangri-La Walla Walla \n",
865
- "3 Ooma Salt Lake City \n",
866
- "4 Sambo Kojin Rochester \n",
867
- "... ... ... \n",
868
- "9546 Naml郾 Gurme Minneapolis \n",
869
- "9547 Ceviz A埕ac郾 Waco \n",
870
- "9548 Huqqa Chicago \n",
871
- "9549 A侓侓k Kahve Grand Rapids \n",
872
- "9550 Walter's Coffee Roastery Hibbing \n",
873
- "\n",
874
- " Cuisines Average Cost \\\n",
875
- "0 French, BBQ, Desserts, Fast Food 45 \n",
876
- "1 Mediterranean, Desserts, Seafood 44 \n",
877
- "2 Italian, BBQ, Fast Food, Cafe, Indian, Seafood 148 \n",
878
- "3 Pizza, Italian, Bakery, Cafe, Seafood 55 \n",
879
- "4 Tea, Pizza, French, Cafe, Mediterranean, Seafood 88 \n",
880
- "... ... ... \n",
881
- "9546 Tea, American, Desserts 84 \n",
882
- "9547 Tea, Cafe, BBQ, Mediterranean 58 \n",
883
- "9548 Tea, Chinese, Bakery, Italian 13 \n",
884
- "9549 Cafe, French, Bakery, Fast Food 30 \n",
885
- "9550 Pizza, Mexican, Bakery, Cafe, Seafood 20 \n",
886
- "\n",
887
- " Aggregate Rating \n",
888
- "0 4.8 \n",
889
- "1 4.5 \n",
890
- "2 4.4 \n",
891
- "3 4.9 \n",
892
- "4 4.8 \n",
893
- "... ... \n",
894
- "9546 4.1 \n",
895
- "9547 4.2 \n",
896
- "9548 3.7 \n",
897
- "9549 4.0 \n",
898
- "9550 4.0 \n",
899
- "\n",
900
- "[9551 rows x 5 columns]"
901
- ]
902
- },
903
- "execution_count": 128,
904
- "metadata": {},
905
- "output_type": "execute_result"
906
- }
907
- ],
908
- "source": [
909
- "df"
910
- ]
911
- },
912
- {
913
- "cell_type": "code",
914
- "execution_count": 48,
915
- "id": "e168b1c5",
916
- "metadata": {},
917
- "outputs": [],
918
- "source": [
919
- "cuisine_dict = {}\n",
920
- "for unit in new_data:\n",
921
- " for x in str(unit['Cuisines']).split(', '):\n",
922
- " if x not in cuisine_dict:\n",
923
- " cuisine_dict[x] = 1\n",
924
- " else:\n",
925
- " cuisine_dict[x] += 1"
926
- ]
927
- },
928
- {
929
- "cell_type": "code",
930
- "execution_count": 49,
931
- "id": "564d4bda",
932
- "metadata": {},
933
- "outputs": [
934
- {
935
- "name": "stdout",
936
- "output_type": "stream",
937
- "text": [
938
- "French 29\n",
939
- "Japanese 135\n",
940
- "Desserts 653\n",
941
- "Seafood 174\n",
942
- "Asian 233\n",
943
- "Filipino 10\n",
944
- "Indian 70\n",
945
- "Sushi 75\n",
946
- "Korean 21\n",
947
- "Chinese 2735\n",
948
- "European 148\n",
949
- "Mexican 181\n",
950
- "American 390\n",
951
- "Ice Cream 226\n",
952
- "Cafe 703\n",
953
- "Italian 764\n",
954
- "Pizza 381\n",
955
- "Bakery 745\n",
956
- "Mediterranean 112\n",
957
- "Fast Food 1986\n",
958
- "Brazilian 28\n",
959
- "Arabian 28\n",
960
- "Bar Food 39\n",
961
- "Grill 21\n",
962
- "International 21\n",
963
- "Peruvian 1\n",
964
- "Latin American 11\n",
965
- "Burger 251\n",
966
- "Juices 29\n",
967
- "Healthy Food 150\n",
968
- "Beverages 229\n",
969
- "Lebanese 69\n",
970
- "Sandwich 53\n",
971
- "Steak 62\n",
972
- "BBQ 33\n",
973
- "Gourmet Fast Food 1\n",
974
- "Mineira 1\n",
975
- "North Eastern 9\n",
976
- "nan 9\n",
977
- "Coffee and Tea 19\n",
978
- "Vegetarian 23\n",
979
- "Tapas 19\n",
980
- "Breakfast 41\n",
981
- "Diner 6\n",
982
- "Southern 24\n",
983
- "Southwestern 7\n",
984
- "Spanish 16\n",
985
- "Argentine 2\n",
986
- "Caribbean 7\n",
987
- "German 10\n",
988
- "Vietnamese 21\n",
989
- "Thai 234\n",
990
- "Modern Australian 11\n",
991
- "Teriyaki 2\n",
992
- "Cajun 10\n",
993
- "Canadian 1\n",
994
- "Tex-Mex 19\n",
995
- "Middle Eastern 22\n",
996
- "Greek 15\n",
997
- "Bubble Tea 1\n",
998
- "Tea 48\n",
999
- "Australian 5\n",
1000
- "Fusion 4\n",
1001
- "Cuban 2\n",
1002
- "Hawaiian 8\n",
1003
- "Salad 93\n",
1004
- "Irish 1\n",
1005
- "New American 2\n",
1006
- "Soul Food 1\n",
1007
- "Turkish 15\n",
1008
- "Pub Food 2\n",
1009
- "Persian 2\n",
1010
- "Continental 736\n",
1011
- "Singaporean 4\n",
1012
- "Malay 1\n",
1013
- "Cantonese 2\n",
1014
- "Dim Sum 3\n",
1015
- "Western 10\n",
1016
- "Finger Food 114\n",
1017
- "British 16\n",
1018
- "Deli 3\n",
1019
- "Indonesian 14\n",
1020
- "North Indian 3960\n",
1021
- "Mughlai 995\n",
1022
- "Biryani 177\n",
1023
- "South Indian 636\n",
1024
- "Pakistani 12\n",
1025
- "Afghani 14\n",
1026
- "Hyderabadi 26\n",
1027
- "Rajasthani 21\n",
1028
- "Street Food 562\n",
1029
- "Goan 20\n",
1030
- "African 8\n",
1031
- "Portuguese 7\n",
1032
- "Gujarati 11\n",
1033
- "Armenian 3\n",
1034
- "Mithai 380\n",
1035
- "Maharashtrian 10\n",
1036
- "Modern Indian 16\n",
1037
- "Charcoal Grill 4\n",
1038
- "Malaysian 22\n",
1039
- "Burmese 10\n",
1040
- "Chettinad 11\n",
1041
- "Parsi 8\n",
1042
- "Tibetan 44\n",
1043
- "Raw Meats 114\n",
1044
- "Kerala 23\n",
1045
- "Belgian 2\n",
1046
- "Kashmiri 20\n",
1047
- "South American 2\n",
1048
- "Bengali 29\n",
1049
- "Iranian 3\n",
1050
- "Lucknowi 13\n",
1051
- "Awadhi 11\n",
1052
- "Nepalese 9\n",
1053
- "Drinks Only 2\n",
1054
- "Oriya 2\n",
1055
- "Bihari 6\n",
1056
- "Assamese 4\n",
1057
- "Andhra 10\n",
1058
- "Mangalorean 4\n",
1059
- "Malwani 1\n",
1060
- "Cuisine Varies 1\n",
1061
- "Moroccan 5\n",
1062
- "Naga 8\n",
1063
- "Sri Lankan 5\n",
1064
- "Peranakan 1\n",
1065
- "Sunda 3\n",
1066
- "Ramen 2\n",
1067
- "Kiwi 6\n",
1068
- "Asian Fusion 2\n",
1069
- "Taiwanese 2\n",
1070
- "Fish and Chips 1\n",
1071
- "Contemporary 9\n",
1072
- "Scottish 3\n",
1073
- "Curry 6\n",
1074
- "Patisserie 4\n",
1075
- "South African 6\n",
1076
- "Durban 1\n",
1077
- "Kebab 10\n",
1078
- "Turkish Pizza 8\n",
1079
- "Izgara 2\n",
1080
- "World Cuisine 4\n",
1081
- "D韄ner 1\n",
1082
- "Restaurant Cafe 4\n",
1083
- "B韄rek 1\n"
1084
- ]
1085
- }
1086
- ],
1087
- "source": [
1088
- "for unit in cuisine_dict:\n",
1089
- " print(unit,cuisine_dict[unit])"
1090
- ]
1091
- },
1092
- {
1093
- "cell_type": "code",
1094
- "execution_count": null,
1095
- "id": "967426f0",
1096
- "metadata": {},
1097
- "outputs": [],
1098
- "source": [
1099
- "cuisine = [\"Chinese\", \"American\", \"Italian\", \"Mexican\", \"Indian\",\"Mediterranean\",\"Middle Eastern\",\"Breakfast\",\"Korean\",\"Asian\",\"French\",\"Tea\",\"Seafood\",\"Bakery\",\"Street Food\"]"
1100
- ]
1101
- },
1102
- {
1103
- "cell_type": "code",
1104
- "execution_count": 67,
1105
- "id": "880dd6bf",
1106
- "metadata": {},
1107
- "outputs": [],
1108
- "source": [
1109
- "countries = [\"Chinese\", \"American\", \"Italian\", \"Mexican\", \"Indian\",\"Mediterranean\",\"Middle Eastern\",,\"Korean\",\"Asian\",\"French\"]"
1110
- ]
1111
- },
1112
- {
1113
- "cell_type": "code",
1114
- "execution_count": 68,
1115
- "id": "89d9aba9",
1116
- "metadata": {},
1117
- "outputs": [],
1118
- "source": [
1119
- "cuisine = [\"Tea\",\"Seafood\",\"Bakery\",\"Street Food\",\"Desserts\",\"BBQ\",\"Street Food\",\"Fast Food\",\"Cafe\",\"Pizza\"]"
1120
- ]
1121
- },
1122
- {
1123
- "cell_type": "code",
1124
- "execution_count": null,
1125
- "id": "ff103725",
1126
- "metadata": {},
1127
- "outputs": [],
1128
- "source": []
1129
- }
1130
- ],
1131
- "metadata": {
1132
- "kernelspec": {
1133
- "display_name": "Python 3 (ipykernel)",
1134
- "language": "python",
1135
- "name": "python3"
1136
- },
1137
- "language_info": {
1138
- "codemirror_mode": {
1139
- "name": "ipython",
1140
- "version": 3
1141
- },
1142
- "file_extension": ".py",
1143
- "mimetype": "text/x-python",
1144
- "name": "python",
1145
- "nbconvert_exporter": "python",
1146
- "pygments_lexer": "ipython3",
1147
- "version": "3.9.16"
1148
- }
1149
- },
1150
- "nbformat": 4,
1151
- "nbformat_minor": 5
1152
- }