{ "cells": [ { "cell_type": "code", "execution_count": 50, "id": "1f939e73", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/restaurants/zomato.csv')" ] }, { "cell_type": "code", "execution_count": 51, "id": "876e4fff", "metadata": {}, "outputs": [], "source": [ "data_dict = data.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 52, "id": "dbaee06c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Restaurant ID',\n", " 'Restaurant Name',\n", " 'Country Code',\n", " 'City',\n", " 'Address',\n", " 'Locality',\n", " 'Locality Verbose',\n", " 'Longitude',\n", " 'Latitude',\n", " 'Cuisines',\n", " 'Average Cost for two',\n", " 'Currency',\n", " 'Has Table booking',\n", " 'Has Online delivery',\n", " 'Is delivering now',\n", " 'Switch to order menu',\n", " 'Price range',\n", " 'Aggregate rating',\n", " 'Rating color',\n", " 'Rating text',\n", " 'Votes']" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dict['columns']" ] }, { "cell_type": "code", "execution_count": 53, "id": "cb540128", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9551" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data_dict['data'])" ] }, { "cell_type": "code", "execution_count": 14, "id": "ea9858c5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[6600970,\n", " 'Pizza 礞 Bessa',\n", " 30,\n", " 'Bras韄lia',\n", " 'SCS 214, Bloco C, Loja 40, Asa Sul, Bras韄lia',\n", " 'Asa Sul',\n", " 'Asa Sul, Bras韄lia',\n", " -47.91566667,\n", " -15.83116667,\n", " 'Pizza',\n", " 50,\n", " 'Brazilian Real(R$)',\n", " 'No',\n", " 'No',\n", " 'No',\n", " 'No',\n", " 2,\n", " 3.2,\n", " 'Orange',\n", " 'Average',\n", " 11]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dict['data'][26]" ] }, { "cell_type": "code", "execution_count": 9, "id": "e21af5d1", "metadata": {}, "outputs": [], "source": [ "flight = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')" ] }, { "cell_type": "code", "execution_count": 10, "id": "966feef9", "metadata": {}, "outputs": [], "source": [ "flight = flight.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 93, "id": "c5f81f43", "metadata": {}, "outputs": [], "source": [ "city_set = open('/home/xj/toolAugEnv/code/toolConstraint/database/background/citySet.txt','r').read().strip().split('\\n')" ] }, { "cell_type": "code", "execution_count": 94, "id": "bfce5f56", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['San Diego',\n", " 'Pellston',\n", " 'Buffalo',\n", " 'Charlotte Amalie',\n", " 'Flagstaff',\n", " 'Evansville',\n", " 'Hilo',\n", " 'Twin Falls',\n", " 'Newark',\n", " 'State College',\n", " 'Johnstown',\n", " 'Charleston',\n", " 'Montgomery',\n", " 'Redding',\n", " 'Lynchburg',\n", " 'South Bend',\n", " 'Sarasota',\n", " 'Sioux Falls',\n", " 'Paducah',\n", " 'Kahului',\n", " 'Atlantic City',\n", " 'Bemidji',\n", " 'Toledo',\n", " 'Abilene',\n", " 'Sacramento',\n", " 'Amarillo',\n", " 'Moline',\n", " 'Hilton Head',\n", " 'Manhattan',\n", " 'Minneapolis',\n", " 'Fort Myers',\n", " 'Roswell',\n", " 'Harlingen',\n", " 'Seattle',\n", " 'Manchester',\n", " 'Gulfport',\n", " 'Gainesville',\n", " 'Pago Pago',\n", " 'Wrangell',\n", " 'Augusta',\n", " 'Waterloo',\n", " 'Yuma',\n", " 'Saipan',\n", " 'Christiansted',\n", " 'North Bend',\n", " 'Richmond',\n", " 'Albuquerque',\n", " 'Nashville',\n", " 'Aberdeen',\n", " 'Harrisburg',\n", " 'Fort Wayne',\n", " 'Green Bay',\n", " 'Wenatchee',\n", " 'Santa Fe',\n", " 'St. Petersburg',\n", " 'Belleville',\n", " 'Greensboro',\n", " 'Lake Charles',\n", " 'Traverse City',\n", " 'Erie',\n", " 'Niagara Falls',\n", " 'Pocatello',\n", " 'Idaho Falls',\n", " 'Alpena',\n", " 'Wilmington',\n", " 'Ontario',\n", " 'Iron Mountain',\n", " 'Lubbock',\n", " 'Helena',\n", " 'Kalamazoo',\n", " 'Cleveland',\n", " 'Grand Island',\n", " 'Bishop',\n", " 'New Bern',\n", " 'Melbourne',\n", " 'Bristol',\n", " 'Orlando',\n", " 'Bismarck',\n", " 'Fresno',\n", " 'Billings',\n", " 'Jackson',\n", " 'Daytona Beach',\n", " 'College Station',\n", " 'Jacksonville',\n", " 'Salt Lake City',\n", " 'Corpus Christi',\n", " 'Florence',\n", " 'Moab',\n", " 'Grand Forks',\n", " 'Las Vegas',\n", " 'Fairbanks',\n", " 'Petersburg',\n", " 'Wichita',\n", " 'Rhinelander',\n", " 'Kansas City',\n", " 'Dothan',\n", " 'Alamosa',\n", " 'Adak Island',\n", " 'Islip',\n", " 'Wichita Falls',\n", " 'Presque Isle',\n", " 'San Luis Obispo',\n", " 'Dayton',\n", " 'Brunswick',\n", " 'Fort Smith',\n", " \"Martha's Vineyard\",\n", " 'Portland',\n", " 'Waco',\n", " 'New York',\n", " 'Columbus',\n", " 'Tampa',\n", " 'Dallas',\n", " 'Little Rock',\n", " 'Kona',\n", " 'Clarksburg',\n", " 'San Angelo',\n", " 'Saginaw',\n", " 'Houston',\n", " 'Duluth',\n", " 'Valparaiso',\n", " 'Phoenix',\n", " 'Oakland',\n", " 'Watertown',\n", " 'Ogden',\n", " 'Cedar Rapids',\n", " 'Cape Girardeau',\n", " 'Sun Valley',\n", " 'Sault Ste. Marie',\n", " 'Trenton',\n", " 'Missoula',\n", " 'Pasco',\n", " 'Brainerd',\n", " 'Newburgh',\n", " 'Gustavus',\n", " 'Branson',\n", " 'Providence',\n", " 'Minot',\n", " 'Huntsville',\n", " 'San Antonio',\n", " 'Marquette',\n", " 'Owensboro',\n", " 'Del Rio',\n", " 'Portsmouth',\n", " 'Bloomington',\n", " 'Lexington',\n", " 'Santa Barbara',\n", " 'Baltimore',\n", " 'Panama City',\n", " 'Kodiak',\n", " 'Jacksonville',\n", " 'Yakima',\n", " 'Vernal',\n", " 'Salisbury',\n", " 'Mission',\n", " 'Newport News',\n", " 'Charlottesville',\n", " 'Grand Junction',\n", " 'Baton Rouge',\n", " 'Beaumont',\n", " 'Staunton',\n", " 'Kalispell',\n", " 'Key West',\n", " 'Worcester',\n", " 'West Palm Beach',\n", " 'Boise',\n", " 'Grand Rapids',\n", " 'Salina',\n", " 'Fort Leonard Wood',\n", " 'Walla Walla',\n", " 'Everett',\n", " 'Dillingham',\n", " 'Bellingham',\n", " 'Lansing',\n", " 'Madison',\n", " 'Victoria',\n", " 'Sioux City',\n", " 'Hattiesburg',\n", " 'Stockton',\n", " 'Anchorage',\n", " 'Charlotte',\n", " 'Jamestown',\n", " 'Laramie',\n", " 'Decatur',\n", " 'Durango',\n", " 'Longview',\n", " 'Syracuse',\n", " 'St. Cloud',\n", " 'Santa Rosa',\n", " 'Bakersfield',\n", " 'North Platte',\n", " 'La Crosse',\n", " 'Plattsburgh',\n", " 'Concord',\n", " 'Atlanta',\n", " 'Provo',\n", " 'Ogdensburg',\n", " 'Ithaca',\n", " 'Colorado Springs',\n", " 'Washington',\n", " 'Williston',\n", " 'Tulsa',\n", " 'Midland',\n", " 'Champaign',\n", " 'Devils Lake',\n", " 'Greer',\n", " 'Muskegon',\n", " 'Hibbing',\n", " 'Santa Ana',\n", " 'Ponce',\n", " 'Prescott',\n", " 'Indianapolis',\n", " 'International Falls',\n", " 'Rapid City',\n", " 'Ketchikan',\n", " 'St. Louis',\n", " 'Santa Maria',\n", " 'Elmira',\n", " 'Alexandria',\n", " 'San Jose',\n", " 'Tucson',\n", " 'San Juan',\n", " 'Dubuque',\n", " 'Burbank',\n", " 'Gunnison',\n", " 'Cedar City',\n", " 'Hyannis',\n", " 'Raleigh',\n", " 'Norfolk',\n", " 'New Orleans',\n", " 'Medford',\n", " 'White Plains',\n", " 'Oklahoma City',\n", " 'Chicago',\n", " 'El Paso',\n", " 'Rockford',\n", " 'Aguadilla',\n", " 'Omaha',\n", " 'Scottsbluff',\n", " 'Yakutat',\n", " 'Arcata',\n", " 'Spokane',\n", " 'Brownsville',\n", " 'Bend',\n", " 'Hagerstown',\n", " 'Peoria',\n", " 'Appleton',\n", " 'Roanoke',\n", " 'Eugene',\n", " 'Rock Springs',\n", " 'Dodge City',\n", " 'Austin',\n", " 'Miami',\n", " 'Dallas',\n", " 'Mosinee',\n", " 'Killeen',\n", " 'Lihue',\n", " 'Pittsburgh',\n", " 'Tallahassee',\n", " 'Butte',\n", " 'Lawton',\n", " 'Honolulu',\n", " 'Greenville',\n", " 'Juneau',\n", " 'Myrtle Beach',\n", " 'Boston',\n", " 'Charleston',\n", " 'Latrobe',\n", " 'Knoxville',\n", " 'Denver',\n", " 'Bangor',\n", " 'Albany',\n", " 'Punta Gorda',\n", " 'Fort Lauderdale',\n", " 'Philadelphia',\n", " 'Binghamton',\n", " 'Great Falls',\n", " 'Shreveport',\n", " 'Asheville',\n", " 'Cheyenne',\n", " 'Milwaukee',\n", " 'Nome',\n", " 'Laredo',\n", " 'Des Moines',\n", " 'Fayetteville',\n", " 'Lewisburg',\n", " 'Fort Dodge',\n", " 'Cody',\n", " 'Chattanooga',\n", " 'Deadhorse',\n", " 'Kotzebue',\n", " 'Sitka',\n", " 'Bozeman',\n", " 'Palm Springs',\n", " 'Memphis',\n", " 'Nantucket',\n", " 'Texarkana',\n", " 'Lewiston',\n", " 'Valdosta',\n", " 'Birmingham',\n", " 'Scranton',\n", " 'Pensacola',\n", " 'Hancock',\n", " 'Los Angeles',\n", " 'Mason City',\n", " 'Savannah',\n", " 'West Yellowstone',\n", " 'Long Beach',\n", " 'Reno',\n", " 'Akron',\n", " 'Louisville',\n", " 'Hartford',\n", " 'Cincinnati',\n", " 'Rochester',\n", " 'San Francisco',\n", " 'Detroit',\n", " 'Monterey',\n", " 'Escanaba',\n", " 'Eau Claire']" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "city_set" ] }, { "cell_type": "code", "execution_count": 16, "id": "cd0f41fb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 Restaurant Name\n", "3 City\n", "9 Cuisines\n", "10 Average Cost for two\n", "11 Currency\n", "17 Aggregate rating\n" ] } ], "source": [ "for idx, unit in enumerate(data_dict['columns']):\n", " if unit in ['Restaurant Name', 'City', 'Cuisines', 'Average Cost for two','Aggregate rating','Currency']:\n", " print(idx,unit)" ] }, { "cell_type": "code", "execution_count": 17, "id": "04fe71b7", "metadata": {}, "outputs": [], "source": [ "currency_set = set()\n", "for unit in data_dict['data']:\n", " currency_set.add(unit[11])" ] }, { "cell_type": "code", "execution_count": 18, "id": "3988186d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Botswana Pula(P)',\n", " 'Brazilian Real(R$)',\n", " 'Dollar($)',\n", " 'Emirati Diram(AED)',\n", " 'Indian Rupees(Rs.)',\n", " 'Indonesian Rupiah(IDR)',\n", " 'NewZealand($)',\n", " 'Pounds(專)',\n", " 'Qatari Rial(QR)',\n", " 'Rand(R)',\n", " 'Sri Lankan Rupee(LKR)',\n", " 'Turkish Lira(TL)'}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "currency_set" ] }, { "cell_type": "code", "execution_count": 20, "id": "257e6a76", "metadata": {}, "outputs": [], "source": [ "exchange_rate = {\"Botswana Pula(P)\":0.074,\n", " \"Brazilian Real(R$)\":0.21, \n", " 'Dollar($)':1, \n", " 'Emirati Diram(AED)':0.27,\n", " \"Indian Rupees(Rs.)\":0.012087,\n", " \"Indonesian Rupiah(IDR)\":0.000066,\n", " 'NewZealand($)':0.61,\n", " \"Pounds(專)\":1.28,\n", " \"Qatari Rial(QR)\":0.27,\n", " 'Rand(R)': 0.054,\n", " \"Sri Lankan Rupee(LKR)\":0.0031,\n", " 'Turkish Lira(TL)':0.037\n", " }" ] }, { "cell_type": "code", "execution_count": 119, "id": "c6b2691e", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3984855550f54090b3264d7adc859433", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.autonotebook import tqdm\n", "import random\n", "new_data = []\n", "\n", "for idx, unit in tqdm(enumerate(data_dict['data'])):\n", " tmp_dict = {k:\"\" for k in ['Name', 'City', 'Cuisines', 'Average Cost','Aggregate Rating']}\n", " tmp_dict[\"Name\"] = unit[1]\n", " tmp_dict[\"City\"] = random.sample(city_set,1)[0]\n", " tmp_dict[\"Cuisines\"] = unit[9]\n", " tmp_dict[\"Average Cost\"] = max(random.randint(10,100),int(unit[10] / 2 * exchange_rate[unit[11]]))\n", " tmp_dict[\"Aggregate Rating\"] = unit[17]\n", " new_data.append(tmp_dict)" ] }, { "cell_type": "code", "execution_count": 120, "id": "f27aaff1", "metadata": {}, "outputs": [], "source": [ "countries = [\"Chinese\", \"American\", \"Italian\", \"Mexican\", \"Indian\",\"Mediterranean\",\"French\"]\n", "cuisine = [\"Tea\",\"Seafood\",\"Bakery\",\"Desserts\",\"BBQ\",\"Fast Food\",\"Cafe\",\"Pizza\"]\n", "total_cuisine = countries + cuisine\n", "for unit in new_data:\n", " flag = False\n", " final_cuisine = set()\n", "# for c in total_cuisine:\n", "# if c in str(unit['Cuisines']):\n", "# final_cuisine.add(c)\n", " choice_number = random.choices([1,1,2])[0]\n", " for x in random.sample(countries,choice_number):\n", " final_cuisine.add(x)\n", " choice_number = random.choices([2,3,4])[0]\n", " for x in random.sample(cuisine,choice_number):\n", " final_cuisine.add(x)\n", " unit['Cuisines'] = \", \".join(x for x in final_cuisine)" ] }, { "cell_type": "code", "execution_count": 121, "id": "9e3afb30", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Name': 'Karak韄y G韄ll韄o埕lu',\n", " 'City': 'Phoenix',\n", " 'Cuisines': 'Bakery, Indian, Desserts, Seafood',\n", " 'Average Cost': 75,\n", " 'Aggregate Rating': 4.7}" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data[-7]" ] }, { "cell_type": "code", "execution_count": 122, "id": "bfb243c0", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(new_data)" ] }, { "cell_type": "code", "execution_count": 123, "id": "af7e3411", "metadata": {}, "outputs": [], "source": [ "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/restaurants/clean_restaurant_2022.csv')" ] }, { "cell_type": "code", "execution_count": 92, "id": "dad9bf9f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Name | \n", "City | \n", "Cuisines | \n", "Average Cost | \n", "Aggregate Rating | \n", "
---|---|---|---|---|---|
0 | \n", "Le Petit Souffle | \n", "Eagle | \n", "Desserts, French, Fast Food, Chinese, Indian | \n", "40 | \n", "4.8 | \n", "
1 | \n", "Izakaya Kikufuji | \n", "Hilton Head | \n", "Mexican, BBQ, Mediterranean, Pizza | \n", "95 | \n", "4.5 | \n", "
2 | \n", "Heat - Edsa Shangri-La | \n", "Trenton | \n", "Tea, Pizza, French, Indian, Mediterranean, Sea... | \n", "148 | \n", "4.4 | \n", "
3 | \n", "Ooma | \n", "Portland | \n", "Tea, Pizza, French, BBQ, Cafe | \n", "89 | \n", "4.9 | \n", "
4 | \n", "Sambo Kojin | \n", "Milwaukee | \n", "Desserts, Tea, Italian, Cafe, Mediterranean | \n", "60 | \n", "4.8 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9546 | \n", "Naml郾 Gurme | \n", "Worcester | \n", "Tea, BBQ, Fast Food, Chinese, American, Medite... | \n", "72 | \n", "4.1 | \n", "
9547 | \n", "Ceviz A埕ac郾 | \n", "San Francisco | \n", "Cafe, Mexican, Pizza, Bakery | \n", "14 | \n", "4.2 | \n", "
9548 | \n", "Huqqa | \n", "Guam | \n", "Pizza, Italian, French, Mexican, BBQ, Chinese,... | \n", "25 | \n", "3.7 | \n", "
9549 | \n", "A侓侓k Kahve | \n", "Louisville | \n", "Chinese, Tea, Mexican, Cafe, Indian | \n", "96 | \n", "4.0 | \n", "
9550 | \n", "Walter's Coffee Roastery | \n", "Monroe | \n", "Pizza, Italian, Cafe, Indian, Mediterranean | \n", "79 | \n", "4.0 | \n", "
9551 rows × 5 columns
\n", "