Spaces:

osunlp
/

TravelPlannerLeaderboard

Running

App Files Files Community

hsaest commited on Jan 13, 2024

Commit

9be4956

verified ·

1 Parent(s): 9179e9f

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
__pycache__/content.cpython-39.pyc +0 -0
annotation/.DS_Store +0 -0
annotation/src/__pycache__/utils.cpython-39.pyc +0 -0
annotation/src/utils.py +186 -0
app.py +197 -0
content.py +70 -0
database/.DS_Store +0 -0
database/accommodations/.DS_Store +0 -0
database/accommodations/clean_accommodations_2022.csv +0 -0
database/attractions/attractions.csv +0 -0
database/background/attractions.csv +0 -0
database/background/citySet.txt +311 -0
database/background/citySet_with_states.txt +312 -0
database/background/clean_data.py +14 -0
database/background/get_state_set.py +22 -0
database/background/stateSet.txt +65 -0
database/background/test.py +8 -0
database/flights/.DS_Store +0 -0
database/flights/clean_Flights_2022.csv +3 -0
database/googleDistanceMatrix/clean_data.py +17 -0
database/googleDistanceMatrix/distance.csv +0 -0
database/googleDistanceMatrix/distance_org.csv +0 -0
database/restaurants/.DS_Store +0 -0
database/restaurants/clean_restaurant_2022.csv +0 -0
evaluation/.DS_Store +0 -0
evaluation/__pycache__/commonsenseConstraint.cpython-39.pyc +0 -0
evaluation/__pycache__/eval.cpython-39.pyc +0 -0
evaluation/__pycache__/hardConstraint.cpython-39.pyc +0 -0
evaluation/commonsenseConstraint.py +735 -0
evaluation/eval.py +181 -0
evaluation/hardConstraint.py +266 -0
evaluation/scored/1_validation_two-stage_1.jsonl +1 -0
evaluation/scored/textbox_validation_two-stage_1.jsonl +1 -0
requirements.txt +3 -0
tools/__init__.py +0 -0
tools/__pycache__/__init__.cpython-39.pyc +0 -0
tools/accommodations/.ipynb_checkpoints/test-checkpoint.ipynb +0 -0
tools/accommodations/__init__.py +0 -0
tools/accommodations/__pycache__/__init__.cpython-39.pyc +0 -0
tools/accommodations/__pycache__/apis.cpython-39.pyc +0 -0
tools/accommodations/apis.py +91 -0
tools/accommodations/test.ipynb +2037 -0
tools/accommodations/test.py +12 -0
tools/attractions/__pycache__/apis.cpython-39.pyc +0 -0
tools/attractions/apis.py +34 -0
tools/attractions/test.py +17 -0
tools/cities/__pycache__/apis.cpython-39.pyc +0 -0
tools/cities/apis.py +23 -0
tools/cities/test.py +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+database/flights/clean_Flights_2022.csv filter=lfs diff=lfs merge=lfs -text

__pycache__/content.cpython-39.pyc ADDED Viewed

Binary file (4.84 kB). View file

annotation/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

annotation/src/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (6.95 kB). View file

annotation/src/utils.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import json
+import re
+import os
+import gradio as gr
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def extract_query_number(query_string):
+    """
+    Extract the number from a query string formatted as "Query X" or "Query X --- Done".
+    Args:
+    - query_string (str): The input string.
+    Returns:
+    - int: The extracted number if found, else None.
+    """
+    pattern = r"Query (\d+)"
+    match = re.search(pattern, query_string)
+    return int(match.group(1)) if match else None
+def create_data_display(css_content,data,annotation_idx):
+    return f"""
+    <style>
+    {css_content}
+    </style>
+    <div>
+        <span class="query-highlighted"><strong>Query {annotation_idx}:</strong> {data[annotation_idx-1]['query']}</span><br>
+        <span class="highlighted"><strong>Day:</strong> {data[annotation_idx-1]['days']}</span>
+        <span class="highlighted"><strong>Visiting City Number:</strong> {data[annotation_idx-1]['visiting_city_number']}</span>
+        <span class="highlighted"><strong>Date:</strong> {data[annotation_idx-1]['date']}</span>
+        <span class="highlighted"><strong>Departure:</strong> {data[annotation_idx-1]['org']}</span>
+        <span class="highlighted"><strong>Destination:</strong> {data[annotation_idx-1]['dest']}</span><br>
+        <span class="highlighted-alt"><strong>People Number:</strong> {data[annotation_idx-1]['people_number']}</span>
+        <span class="highlighted-alt"><strong>Budget:</strong> {data[annotation_idx-1]['budget']}</span>
+        <span class="highlighted-alt"><strong>Hotel Rule:</strong> {data[annotation_idx-1]['local_constraint']['house rule']}</span>
+        <span class="highlighted-alt"><strong>Cuisine:</strong> {data[annotation_idx-1]['local_constraint']['cuisine']}</span>
+        <span class="highlighted-alt"><strong>Room Type:</strong> {data[annotation_idx-1]['local_constraint']['room type']}</span>
+        <span class="highlighted-alt"><strong>Transportation:</strong> {data[annotation_idx-1]['local_constraint']['transportation']}</span><br>
+    </div>
+    """
+def judge_valid_info(info):
+    if info == "" or not info or info == "You don't need to fill in the information for this or later days." :
+        return False
+    return True
+def judge_submit_info(info, current_day, label, annotation_data, *tested_data):
+    if info == "" or not info:
+        raise gr.Error("Day {} {} is empty!".format(current_day, label))
+    if info != "-":
+        if label == "transportation":
+            if not judge_valid_transportation(info, annotation_data):
+                raise gr.Error("Day {} {} is invalid! Please note the transportation.".format(current_day, label))
+        elif label == "accommodation":
+            if not judge_valid_room_type(info, annotation_data, tested_data[0]):
+                raise gr.Error("Day {} {} is invalid! Please note the room type.".format(current_day, label))
+            if not  judge_valid_room_rule(info, annotation_data, tested_data[0]):
+                raise gr.Error("Day {} {} is invalid! Please note the house rules.".format(current_day, label))
+    return True
+def judge_valid_transportation(info, annotation_data):
+    if  annotation_data['local_constraint']['transportation'] == 'no flight' and 'Flight' in info:
+        return False
+    elif annotation_data['local_constraint']['transportation'] == 'no self-driving' and 'Self-driving'  in info:
+        return False
+    return True
+def judge_valid_room_type(info, annotation_data, accommodation_data_all):
+    accommodation_data_filtered = get_filtered_data(info, accommodation_data_all)
+    if annotation_data['local_constraint']['room type'] == 'not shared room' and accommodation_data_filtered['room type'].values[0] == 'Shared room':
+        return False
+    # "shared room", "not shared room", "private room", "entire room"
+    elif annotation_data['local_constraint']['room type'] == 'shared room' and accommodation_data_filtered['room type'].values[0] != 'Shared room':
+        return False
+    elif annotation_data['local_constraint']['room type'] == 'private room' and accommodation_data_filtered['room type'].values[0] != 'Private room':
+        return False
+    elif annotation_data['local_constraint']['room type'] == 'entire room' and accommodation_data_filtered['room type'].values[0] != 'Entire home/apt':
+        return False
+    return True
+def judge_valid_room_rule(info, annotation_data, accommodation_data_all):
+    accommodation_data_filtered = get_filtered_data(info, accommodation_data_all)
+    if annotation_data['local_constraint']['house rule'] == 'smoking' and 'No smoking' in str(accommodation_data_filtered['house_rules'].values[0]):
+        return False
+    if annotation_data['local_constraint']['house rule'] == 'parities' and 'No parties' in str(accommodation_data_filtered['house_rules'].values[0]):
+        return False
+    if annotation_data['local_constraint']['house rule'] == 'children under 10' and 'No children under 10' in str(accommodation_data_filtered['house_rules'].values[0]):
+        return False
+    if annotation_data['local_constraint']['house rule'] == 'visitors' and 'No visitors' in str(accommodation_data_filtered['house_rules'].values[0]):
+        return False
+    if annotation_data['local_constraint']['house rule'] == 'pets' and 'No pets' in str(accommodation_data_filtered['house_rules'].values[0]):
+        return False
+    return True
+def judge_valid_cuisine(info, annotation_data, restaurant_data_all, cuisine_set: set):
+    if info != "-" and annotation_data['local_constraint']['cuisine'] is not None and annotation_data['org'] not in info:
+        restaurant_data_filtered = get_filtered_data(info, restaurant_data_all,('Name','City'))
+        for cuisine in annotation_data['local_constraint']['cuisine']:
+            if cuisine in restaurant_data_filtered.iloc[0]['Cuisines']:
+                cuisine_set.add(cuisine)
+    return cuisine_set
+def get_valid_name_city(info):
+    # Modified the pattern to preserve spaces at the end of the name
+    pattern = r'(.*?),\s*([^,]+)(\(\w[\w\s]*\))?$'
+    match = re.search(pattern, info)
+    if match:
+        return match.group(1).strip(), extract_before_parenthesis(match.group(2).strip()).strip()
+    else:
+        print(f"{info} can not be parsed, '-' will be used instead.")
+        return "-","-"
+def extract_numbers_from_filenames(directory):
+    # Define the pattern to match files
+    pattern = r'annotation_(\d+).json'
+    # List all files in the directory
+    files = os.listdir(directory)
+    # Extract numbers from filenames that match the pattern
+    numbers = [int(re.search(pattern, file).group(1)) for file in files if re.match(pattern, file)]
+    return numbers
+def get_city_list(days, deparure_city, destination):
+    city_list = []
+    city_list.append(deparure_city)
+    if days == 3:
+        city_list.append(destination)
+    else:
+        city_set = open('../database/background/citySet_with_states.txt').read().split('\n')
+        state_city_map = {}
+        for unit in city_set:
+            city, state = unit.split('\t')
+            if state not in state_city_map:
+                state_city_map[state] = []
+            state_city_map[state].append(city)
+        for city in state_city_map[destination]:
+            if city != deparure_city:
+                city_list.append(city + f"({destination})")
+    return city_list
+def get_filtered_data(component,data, column_name=('NAME','city')):
+    name, city = get_valid_name_city(component)
+    return data[(data[column_name[0]] == name) & (data[column_name[1]] == city)]
+def extract_before_parenthesis(s):
+    match = re.search(r'^(.*?)\([^)]*\)', s)
+    return match.group(1) if match else s
+def count_consecutive_values(lst):
+    if not lst:
+        return []
+    result = []
+    current_string = lst[0]
+    count = 1
+    for i in range(1, len(lst)):
+        if lst[i] == current_string:
+            count += 1
+        else:
+            result.append((current_string, count))
+            current_string = lst[i]
+            count = 1
+    result.append((current_string, count))  # Add the last group of values
+    return result

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "./leaderboard/evaluation")))
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "./leaderboard")))
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+import json
+import datetime
+from email.utils import parseaddr
+import gradio as gr
+import pandas as pd
+import numpy as np
+from datasets import load_dataset
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import HfApi
+# InfoStrings
+# from scorer import question_scorer
+from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
+from evaluation.eval import eval_score
+TOKEN = os.environ.get("TOKEN", None)
+OWNER="osunlp"
+DATA_DATASET = f"{OWNER}/TravelBench"
+EVAL_DATASET = f"{OWNER}/TravelBenchEval"
+api = HfApi()
+YEAR_VERSION = "2024"
+os.makedirs("scored", exist_ok=True)
+# # Display the results
+eval_results = load_dataset(EVAL_DATASET, 'scores', token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
+def get_dataframe_from_results(eval_results, split):
+    local_df = eval_results[split]
+    local_df = local_df.remove_columns(["Mail"])
+    df = pd.DataFrame(local_df)
+    df = df.sort_values(by=["Final Pass Rate"], ascending=False)
+    numeric_cols = [c for c in local_df.column_names if "Rate" in c]
+    df[numeric_cols] = df[numeric_cols].multiply(100).round(decimals=2)
+    return df
+eval_dataframe_val = get_dataframe_from_results(eval_results=eval_results, split="validation")
+eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
+# def restart_space():
+#     api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def add_new_eval(
+    val_or_test: str,
+    eval_mode: str,
+    model: str,
+    planning_strategy: str,
+    organization: str,
+    mail: str,
+    path_to_file: str,
+):
+    # Very basic email parsing
+    _, parsed_mail = parseaddr(mail)
+    if not "@" in parsed_mail:
+        return format_warning("Please provide a valid email adress.")
+    print("Adding new eval")
+    if path_to_file is None:
+        return format_warning("Please attach a file.")
+    # Save submitted file
+    api.upload_file(
+        repo_id=EVAL_DATASET,
+        path_or_fileobj=path_to_file.name,
+        path_in_repo=f"{organization}/{val_or_test}_{eval_mode}_{planning_strategy}_raw_{datetime.datetime.today()}.jsonl",
+        repo_type="dataset",
+        token=TOKEN
+    )
+    # Compute score
+    file_path = path_to_file.name
+    result = eval_score(val_or_test,file_path=file_path,TOKEN=TOKEN)
+    with open(f"scored/{organization}_{val_or_test}_{eval_mode}_{planning_strategy}.jsonl", "w") as scored_file:
+        scored_file.write(json.dumps(result) + "\n")
+    # Save scored file
+    api.upload_file(
+        repo_id=EVAL_DATASET,
+        path_or_fileobj=f"scored/{organization}_{val_or_test}_{eval_mode}_{planning_strategy}.jsonl",
+        path_in_repo=f"{organization}/{model}/{val_or_test}_{eval_mode}_{planning_strategy}_scored_{datetime.datetime.today()}.jsonl",
+        repo_type="dataset",
+        token=TOKEN
+    )
+    # Actual submission
+    eval_entry = {
+        "Model": model,
+        "Planning Strategy": planning_strategy,
+        "Organization": organization,
+        "Mail": mail,
+        "Delivery Rate": result['Delivery Rate'],
+        "Commonsense Constraint Micro Pass Rate":result['Commonsense Constraint Micro Pass Rate'],
+        "Commonsense Constraint Macro Pass Rate":result['Commonsense Constraint Macro Pass Rate'],
+        "Hard Constraint Micro Pass Rate":result['Hard Constraint Micro Pass Rate'],
+        "Hard Constraint Macro Pass Rate":result['Hard Constraint Macro Pass Rate'],
+        "Final Pass Rate":result['Final Pass Rate']
+    }
+    eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
+    print(eval_results)
+    eval_results.push_to_hub(EVAL_DATASET, config_name = 'scores', token=TOKEN)
+    return format_log(f"Model {model} submitted by {organization} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
+def refresh():
+    eval_results = load_dataset(EVAL_DATASET, 'scores', token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
+    eval_dataframe_val = get_dataframe_from_results(eval_results=eval_results, split="validation")
+    eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
+    return eval_dataframe_val, eval_dataframe_test
+# def upload_file(files):
+#     file_paths = [file.name for file in files]
+#     return file_paths
+demo = gr.Blocks()
+with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    with gr.Tab("Results: Validation"):
+        leaderboard_table_val = gr.components.Dataframe(
+            value=eval_dataframe_val, interactive=False,
+        )
+    with gr.Tab("Results: Test"):
+        leaderboard_table_test = gr.components.Dataframe(
+            value=eval_dataframe_test, interactive=False,
+        )
+    refresh_button = gr.Button("Refresh")
+    refresh_button.click(
+        refresh,
+        inputs=[],
+        outputs=[
+            leaderboard_table_val,
+            leaderboard_table_test,
+        ],
+    )
+    with gr.Accordion("Submit a new file for evaluation"):
+        with gr.Row():
+            with gr.Column():
+                level_of_test = gr.Radio(["validation", "test"], value="validation", label="Split")
+                eval_mode = gr.Radio(["two-stage", "sole-planning"], value="two-stage", label="Eval Mode")
+                model = gr.Textbox(label="Foundation Model")
+                planning_strategy = gr.Textbox(label="Planning Strategy")
+            with gr.Column():
+                organization = gr.Textbox(label="Organization")
+                mail = gr.Textbox(label="Contact email")
+                file_output = gr.File()
+        submit_button = gr.Button("Submit Eval")
+        submission_result = gr.Markdown()
+        submit_button.click(
+            add_new_eval,
+            [
+                level_of_test,
+                eval_mode,
+                model,
+                planning_strategy,
+                organization,
+                mail,
+                file_output,
+            ],
+            submission_result,
+        )
+# scheduler = BackgroundScheduler()
+# scheduler.add_job(restart_space, "interval", seconds=3600)
+# scheduler.start()
+demo.launch(debug=True)

content.py ADDED Viewed

	@@ -0,0 +1,70 @@

+TITLE = """<h1 align="center" id="space-title">TravelBench Leaderboard</h1>"""
+INTRODUCTION_TEXT = """
+TravelBench is a benchmark crafted for evaluating language agents in tool-use and complex planning within multiple constraints. (See our [paper](https://arxiv.org/abs/2311.12983) for more details.)
+## Data
+In TravelBench, for a given query, language agents are expected to formulate a comprehensive plan that includes transportation, daily meals, attractions, and accommodation for each day.
+For constraints, from the perspective of real world applications, we design three types of them: Environment Constraint, Commonsense Constraint, and Hard Constraint.
+TravelBench comprises 1,225 queries in total. The number of days and hard constraints are designed to test agents' abilities across both the breadth and depth of complex planning.
+TravelBench data can be found in [this dataset](https://huggingface.co/datasets/osunlp/TravelBench).
+## Submission Guidelines for TravelBench
+Participants are invited to submit results for both validation and testing phases. The submissions will be evaluated based on several metrics: delivery rate, commonsense constraint pass rate (micro/macro), hard constraint pass rate (micro/macro), and the final pass rate.
+### Format of Submission:
+Submissions must be in the form of a JSON-line file. Each line should adhere to the following structure:
+```
+{"idx":0,"query":"Natural Language Query","plan":[{"day": 1, "current_city": "from [City A] to [City B]", "transportation": "Flight Number: XXX, from A to B", "breakfast": "Name, City", "attraction": "Name, City;Name, City;...;Name, City;", "lunch": "Name, City", "dinner": "Name, City", "accommodation": "Name, City"}, {"day": 2, "current_city": "City B", "transportation": "-", "breakfast": "Name, City", "attraction": "Name, City;Name, City;", "lunch": "Name, City", "dinner": "Name, City", "accommodation": "Name, City"}, ...]}
+```
+Explanation of Fields:
+#### day:
+Description: Indicates the specific day in the itinerary.
+Format: Enter the numerical value representing the sequence of the day within the travel plan. For instance, '1' for the first day, '2' for the second day, and so on.
+#### current city:
+Description: Indicates the city where the traveler is currently located.
+Format: When there is a change in location, use "from [City A] to [City B]" to denote the transition. If remaining in the same city, simply use the city's name (e.g., "City A").
+#### transportation:
+Description: Specifies the mode of transportation used.
+Format: For flights, include the details in the format "Flight Number: XXX, from [City A] to [City B]". For self-driven or taxi travel, use "self-driving/taxi, from [City A] to [City B]". If there is no travel between cities on that day, use "-".
+#### breakfast, lunch, and dinner:
+Description: Details about dining arrangements.
+Format: Use "Name, City" to specify the chosen restaurant and its location. If a meal is not planned, use "-".
+#### attraction:
+Description: Information about attractions visited.
+Format: List attractions as "Name, City". If visiting multiple attractions, separate them with a semicolon ";". If no attraction is planned, use "-".
+Please refer to [this](https://huggingface.co/datasets/osunlp/TravelBench/resolve/main/example_submission.jsonl?download=true) for example submission file.
+Submission made by our team are labelled "TravelBench authors". Each submission will be automatically evaluated and scored based on the predefined metrics. The scores and rankings will be updated and displayed on the leaderboard.
+"""
+CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""@misc{Xie2024TravelBench,
+      title={},
+      author={},
+      year={2024},
+      eprint={,
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}"""
+def format_error(msg):
+    return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
+def format_warning(msg):
+    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
+def format_log(msg):
+    return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
+def model_hyperlink(link, model_name):
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'

database/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

database/accommodations/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

database/accommodations/clean_accommodations_2022.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/attractions/attractions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/background/attractions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/background/citySet.txt ADDED Viewed

	@@ -0,0 +1,311 @@

+San Diego
+Pellston
+Buffalo
+Charlotte Amalie
+Flagstaff
+Evansville
+Hilo
+Twin Falls
+Newark
+State College
+Johnstown
+Montgomery
+Redding
+Lynchburg
+South Bend
+Sarasota
+Sioux Falls
+Paducah
+Kahului
+Atlantic City
+Bemidji
+Toledo
+Abilene
+Sacramento
+Amarillo
+Moline
+Hilton Head
+Manhattan
+Minneapolis
+Fort Myers
+Roswell
+Harlingen
+Seattle
+Manchester
+Gulfport
+Gainesville
+Pago Pago
+Wrangell
+Augusta
+Waterloo
+Yuma
+Saipan
+Christiansted
+North Bend
+Richmond
+Albuquerque
+Nashville
+Aberdeen
+Harrisburg
+Fort Wayne
+Green Bay
+Wenatchee
+Santa Fe
+St. Petersburg
+Belleville
+Greensboro
+Lake Charles
+Traverse City
+Erie
+Niagara Falls
+Pocatello
+Idaho Falls
+Alpena
+Wilmington
+Ontario
+Iron Mountain
+Lubbock
+Helena
+Kalamazoo
+Cleveland
+Grand Island
+New Bern
+Melbourne
+Bristol
+Orlando
+Bismarck
+Fresno
+Billings
+Daytona Beach
+College Station
+Jacksonville
+Salt Lake City
+Corpus Christi
+Florence
+Moab
+Grand Forks
+Las Vegas
+Fairbanks
+Petersburg
+Wichita
+Rhinelander
+Kansas City
+Dothan
+Alamosa
+Adak Island
+Islip
+Wichita Falls
+Presque Isle
+San Luis Obispo
+Dayton
+Fort Smith
+Martha's Vineyard
+Portland
+Waco
+New York
+Columbus
+Tampa
+Little Rock
+Kona
+Clarksburg
+San Angelo
+Saginaw
+Houston
+Duluth
+Valparaiso
+Phoenix
+Oakland
+Watertown
+Ogden
+Cedar Rapids
+Cape Girardeau
+Sun Valley
+Sault Ste. Marie
+Trenton
+Missoula
+Pasco
+Brainerd
+Newburgh
+Gustavus
+Branson
+Providence
+Minot
+Huntsville
+San Antonio
+Marquette
+Owensboro
+Del Rio
+Portsmouth
+Bloomington
+Lexington
+Santa Barbara
+Baltimore
+Panama City
+Kodiak
+Yakima
+Vernal
+Salisbury
+Mission
+Newport News
+Charlottesville
+Grand Junction
+Baton Rouge
+Beaumont
+Staunton
+Kalispell
+Key West
+Worcester
+West Palm Beach
+Boise
+Grand Rapids
+Salina
+Fort Leonard Wood
+Walla Walla
+Everett
+Dillingham
+Lansing
+Madison
+Victoria
+Sioux City
+Hattiesburg
+Stockton
+Anchorage
+Charlotte
+Jamestown
+Laramie
+Decatur
+Durango
+Longview
+Syracuse
+St. Cloud
+Santa Rosa
+Bakersfield
+North Platte
+La Crosse
+Plattsburgh
+Concord
+Atlanta
+Provo
+Ogdensburg
+Ithaca
+Colorado Springs
+Washington
+Williston
+Tulsa
+Midland
+Champaign
+Devils Lake
+Greer
+Muskegon
+Hibbing
+Santa Ana
+Ponce
+Prescott
+Indianapolis
+International Falls
+Rapid City
+Ketchikan
+St. Louis
+Santa Maria
+Elmira
+Alexandria
+San Jose
+Tucson
+San Juan
+Dubuque
+Burbank
+Gunnison
+Cedar City
+Hyannis
+Raleigh
+Norfolk
+New Orleans
+Medford
+White Plains
+Oklahoma City
+Chicago
+El Paso
+Rockford
+Aguadilla
+Omaha
+Scottsbluff
+Yakutat
+Arcata
+Spokane
+Brownsville
+Bend
+Hagerstown
+Peoria
+Appleton
+Roanoke
+Eugene
+Rock Springs
+Dodge City
+Austin
+Miami
+Dallas
+Mosinee
+Killeen
+Lihue
+Pittsburgh
+Tallahassee
+Butte
+Lawton
+Honolulu
+Greenville
+Juneau
+Myrtle Beach
+Boston
+Charleston
+Latrobe
+Knoxville
+Denver
+Bangor
+Albany
+Punta Gorda
+Fort Lauderdale
+Philadelphia
+Binghamton
+Great Falls
+Shreveport
+Asheville
+Cheyenne
+Milwaukee
+Nome
+Laredo
+Des Moines
+Fayetteville
+Lewisburg
+Fort Dodge
+Cody
+Chattanooga
+Deadhorse
+Kotzebue
+Sitka
+Bozeman
+Palm Springs
+Memphis
+Nantucket
+Texarkana
+Lewiston
+Valdosta
+Birmingham
+Scranton
+Pensacola
+Hancock
+Los Angeles
+Mason City
+Savannah
+West Yellowstone
+Long Beach
+Reno
+Akron
+Louisville
+Hartford
+Cincinnati
+Rochester
+San Francisco
+Detroit
+Monterey
+Escanaba
+Eau Claire

database/background/citySet_with_states.txt ADDED Viewed

	@@ -0,0 +1,312 @@

+San Diego	California
+Pellston	Michigan
+Buffalo	New York
+Charlotte Amalie	St. Thomas
+Flagstaff	Arizona
+Evansville	Indiana
+Hilo	Hawaii
+Twin Falls	Idaho
+Newark	New Jersey
+State College	Pennsylvania
+Johnstown	Pennsylvania
+Charleston	South Carolina
+Montgomery	Alabama
+Redding	California
+Lynchburg	Virginia
+South Bend	Indiana
+Sarasota	Florida
+Sioux Falls	South Dakota
+Paducah	Kentucky
+Kahului	Hawaii
+Atlantic City	New Jersey
+Bemidji	Minnesota
+Toledo	Ohio
+Abilene	Texas
+Sacramento	California
+Amarillo	Texas
+Moline	Illinois
+Hilton Head	South Carolina
+Manhattan	New York
+Minneapolis	Minnesota
+Fort Myers	Florida
+Roswell	New Mexico
+Harlingen	Texas
+Seattle	Washington
+Manchester	England
+Gulfport	Mississippi
+Gainesville	Florida
+Pago Pago	Eastern District
+Wrangell	Alaska
+Augusta	Georgia
+Waterloo	Wallonia
+Yuma	Arizona
+Saipan	Saipan
+Christiansted	St. Croix
+North Bend	Oregon
+Richmond	Virginia
+Albuquerque	New Mexico
+Nashville	Tennessee
+Aberdeen	Scotland
+Harrisburg	Pennsylvania
+Fort Wayne	Indiana
+Green Bay	Wisconsin
+Wenatchee	Washington
+Santa Fe	New Mexico
+St. Petersburg	Saint Petersburg
+Belleville	Illinois
+Greensboro	North Carolina
+Lake Charles	Louisiana
+Traverse City	Michigan
+Erie	Pennsylvania
+Niagara Falls	New York
+Pocatello	Idaho
+Idaho Falls	Idaho
+Alpena	Michigan
+Wilmington	North Carolina
+Ontario	Ontario
+Iron Mountain	Michigan
+Lubbock	Texas
+Helena	Montana
+Kalamazoo	Michigan
+Cleveland	Ohio
+Grand Island	Nebraska
+New Bern	North Carolina
+Melbourne	Victoria
+Bristol	Tennessee
+Orlando	Florida
+Bismarck	North Dakota
+Fresno	California
+Billings	Montana
+Jackson	Mississippi
+Daytona Beach	Florida
+College Station	Texas
+Jacksonville	Florida
+Salt Lake City	Utah
+Corpus Christi	Texas
+Florence	Tuscany
+Moab	Utah
+Grand Forks	North Dakota
+Las Vegas	Nevada
+Fairbanks	Alaska
+Petersburg	Virginia
+Wichita	Kansas
+Rhinelander	Wisconsin
+Kansas City	Missouri
+Dothan	Alabama
+Alamosa	Colorado
+Adak Island	Alaska
+Islip	New York
+Wichita Falls	Texas
+Presque Isle	Maine
+San Luis Obispo	California
+Dayton	Ohio
+Fort Smith	Arkansas
+Martha's Vineyard	Massachusetts
+Portland	Oregon
+Waco	Texas
+New York	New York
+Columbus	Ohio
+Tampa	Florida
+Little Rock	Arkansas
+Kona	Hawaii
+Clarksburg	West Virginia
+San Angelo	Texas
+Saginaw	Michigan
+Houston	Texas
+Duluth	Minnesota
+Valparaiso	Indiana
+Phoenix	Arizona
+Oakland	California
+Watertown	New York
+Ogden	Utah
+Cedar Rapids	Iowa
+Cape Girardeau	Missouri
+Sun Valley	Idaho
+Sault Ste. Marie	Ontario
+Trenton	New Jersey
+Missoula	Montana
+Pasco	Washington
+Brainerd	Minnesota
+Newburgh	New York
+Gustavus	Minnesota
+Branson	Missouri
+Providence	Rhode Island
+Minot	North Dakota
+Huntsville	Alabama
+San Antonio	Texas
+Marquette	Wisconsin
+Owensboro	Kentucky
+Del Rio	Texas
+Portsmouth	England
+Bloomington	Illinois
+Lexington	Kentucky
+Santa Barbara	California
+Baltimore	Maryland
+Panama City	Florida
+Kodiak	Alaska
+Yakima	Washington
+Vernal	Utah
+Salisbury	Maryland
+Mission	Texas
+Newport News	Virginia
+Charlottesville	Virginia
+Grand Junction	Colorado
+Baton Rouge	Louisiana
+Beaumont	Texas
+Staunton	Virginia
+Kalispell	Montana
+Key West	Florida
+Worcester	England
+West Palm Beach	Florida
+Boise	Idaho
+Grand Rapids	Michigan
+Salina	Kansas
+Fort Leonard Wood	Missouri
+Walla Walla	Washington
+Everett	Washington
+Dillingham	Alaska
+Lansing	Michigan
+Madison	Wisconsin
+Victoria	Victoria
+Sioux City	Iowa
+Hattiesburg	Mississippi
+Stockton	California
+Anchorage	Alaska
+Charlotte	North Carolina
+Jamestown	Virginia
+Laramie	Wyoming
+Decatur	Georgia
+Durango	Colorado
+Longview	Texas
+Syracuse	New York
+St. Cloud	Minnesota
+Santa Rosa	California
+Bakersfield	California
+North Platte	Nebraska
+La Crosse	Wisconsin
+Plattsburgh	New York
+Concord	New Hampshire
+Atlanta	Georgia
+Provo	Utah
+Ogdensburg	New York
+Ithaca	New York
+Colorado Springs	Colorado
+Washington	District of Columbia
+Williston	North Dakota
+Tulsa	Oklahoma
+Midland	Texas
+Champaign	Illinois
+Devils Lake	Wisconsin
+Greer	South Carolina
+Muskegon	Michigan
+Hibbing	Minnesota
+Santa Ana	California
+Ponce	Ponce
+Prescott	Arizona
+Indianapolis	Indiana
+International Falls	Minnesota
+Rapid City	South Dakota
+Ketchikan	Alaska
+St. Louis	Missouri
+Santa Maria	California
+Elmira	New York
+Alexandria	Alexandria Governorate
+San Jose	California
+Tucson	Arizona
+San Juan	San Juan
+Dubuque	Iowa
+Burbank	California
+Gunnison	Colorado
+Cedar City	Utah
+Hyannis	Massachusetts
+Raleigh	North Carolina
+Norfolk	Virginia
+New Orleans	Louisiana
+Medford	Oregon
+White Plains	New York
+Oklahoma City	Oklahoma
+Chicago	Illinois
+El Paso	Texas
+Rockford	Illinois
+Aguadilla	Aguadilla
+Omaha	Nebraska
+Scottsbluff	Nebraska
+Yakutat	Alaska
+Arcata	California
+Spokane	Washington
+Brownsville	Texas
+Bend	Oregon
+Hagerstown	Maryland
+Peoria	Illinois
+Appleton	Wisconsin
+Roanoke	Virginia
+Eugene	Oregon
+Rock Springs	Wyoming
+Dodge City	Kansas
+Austin	Texas
+Miami	Florida
+Dallas	Texas
+Mosinee	Wisconsin
+Killeen	Texas
+Lihue	Hawaii
+Pittsburgh	Pennsylvania
+Tallahassee	Florida
+Butte	California
+Lawton	Oklahoma
+Honolulu	Hawaii
+Greenville	South Carolina
+Juneau	Alaska
+Myrtle Beach	South Carolina
+Boston	Massachusetts
+Latrobe	Pennsylvania
+Knoxville	Tennessee
+Denver	Colorado
+Bangor	Maine
+Albany	New York
+Punta Gorda	Florida
+Fort Lauderdale	Florida
+Philadelphia	Pennsylvania
+Binghamton	New York
+Great Falls	Montana
+Shreveport	Louisiana
+Asheville	North Carolina
+Cheyenne	Wyoming
+Milwaukee	Wisconsin
+Nome	Alaska
+Laredo	Texas
+Des Moines	Iowa
+Fayetteville	North Carolina
+Lewisburg	Pennsylvania
+Fort Dodge	Iowa
+Cody	Wyoming
+Chattanooga	Tennessee
+Deadhorse	Alaska
+Kotzebue	Alaska
+Sitka	Alaska
+Bozeman	Montana
+Palm Springs	California
+Memphis	Tennessee
+Nantucket	Massachusetts
+Texarkana	Texas
+Lewiston	Idaho
+Valdosta	Georgia
+Birmingham	England
+Scranton	Pennsylvania
+Pensacola	Florida
+Hancock	Michigan
+Los Angeles	California
+Mason City	Iowa
+Savannah	Georgia
+West Yellowstone	Montana
+Long Beach	California
+Reno	Nevada
+Akron	Ohio
+Louisville	Kentucky
+Hartford	Connecticut
+Cincinnati	Ohio
+Rochester	New York
+San Francisco	California
+Detroit	Michigan
+Monterey	California
+Escanaba	Michigan
+Eau Claire	Wisconsin

database/background/clean_data.py ADDED Viewed

	@@ -0,0 +1,14 @@

+with open('database/background/citySet.txt','r') as f:
+    city_set = f.read().strip().split('\n')
+with open('database/background/citySet_with_states.txt','r') as f:
+    lines = f.read().strip().split('\n')
+    data = []
+    for unit in lines:
+        if unit.split('\t')[0] in city_set:
+            data.append(unit)
+with open('database/background/citySet_with_states.txt','w') as f:
+    for unit in data:
+        f.write(unit + '\n')
+    f.close()

database/background/get_state_set.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+# print now directory
+print(os.getcwd())
+state_set = set()
+city_set = set()
+with open('database/background/citySet_with_states.txt','r') as f:
+    city_set = f.read().strip().split('\n')
+    for city in city_set:
+        city_name = city.split('\t')[0]
+        state_name = city.split('\t')[1]
+        state_set.add(state_name)
+        city_set.add(city_name)
+        # write to new file
+    f.close()
+# with open('database/background/stateSet.txt', 'a') as f:
+#     for state_name in state_set:
+#         f.write(state_name.split('\\')[0] + '\n')
+#     f.close()
+with open('database/background/citySet_2.txt', 'a') as f:
+    for city_name in city_set:
+        f.write(city_name.split('\\')[0] + '\n')
+    f.close()

database/background/stateSet.txt ADDED Viewed

	@@ -0,0 +1,65 @@

+Wallonia
+St. Thomas
+Alaska
+Washington
+Kansas
+Scotland
+Michigan
+Eastern District
+New Jersey
+Utah
+Alexandria Governorate
+North Dakota
+Connecticut
+West Virginia
+Aguadilla
+North Carolina
+Ohio
+Colorado
+Arkansas
+New York
+Mississippi
+San Juan
+Minnesota
+California
+Maine
+Nebraska
+Idaho
+Alabama
+Texas
+Maryland
+England
+New Mexico
+South Carolina
+Montana
+Ponce
+Tennessee
+Florida
+Oklahoma
+Hawaii
+New Hampshire
+Iowa
+Oregon
+Wyoming
+Pennsylvania
+Tuscany
+Virginia
+Indiana
+Missouri
+District of Columbia
+Saint Petersburg
+Nevada
+Massachusetts
+Louisiana
+Wisconsin
+Saipan
+Ontario
+St. Croix
+Kentucky
+South Dakota
+Arizona
+Georgia
+Rhode Island
+Illinois
+None
+Victoria

database/background/test.py ADDED Viewed

	@@ -0,0 +1,8 @@

+f = open('/home/xj/toolAugEnv/code/toolConstraint/database/background/citySet.txt','r').read().strip().split('\n')
+citySet = []
+for line in f:
+    if line not in citySet:
+        citySet.append(line.strip())
+    else:
+        print(line)

database/flights/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

database/flights/clean_Flights_2022.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dafdb0e3f8b79ce599a1e612a772865295bc226b46e5fb278368f7255b11cee
+size 304807007

database/googleDistanceMatrix/clean_data.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import re
+import pandas as pd
+import csv
+def extract_before_parenthesis(s):
+    match = re.search(r'^(.*?)\([^)]*\)', s)
+    return match.group(1) if match else s
+if __name__ == '__main__':
+    data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/googleDistanceMatrix/distance.csv')
+    data = data.to_dict(orient = 'split')
+    fieldnames = ['origin', 'destination', 'cost', 'duration', 'distance']
+    with open('/home/xj/toolAugEnv/code/toolConstraint/database/googleDistanceMatrix/distance2.csv', 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in data['data']:
+            writer.writerow({'origin': extract_before_parenthesis(row[0]), 'destination': extract_before_parenthesis(row[1]), 'cost': row[2], 'duration': row[3], 'distance': row[4]})

database/googleDistanceMatrix/distance.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/googleDistanceMatrix/distance_org.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/restaurants/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

database/restaurants/clean_restaurant_2022.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

evaluation/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

evaluation/__pycache__/commonsenseConstraint.cpython-39.pyc ADDED Viewed

Binary file (14 kB). View file

evaluation/__pycache__/eval.cpython-39.pyc ADDED Viewed

Binary file (7.05 kB). View file

evaluation/__pycache__/hardConstraint.cpython-39.pyc ADDED Viewed

Binary file (8.13 kB). View file

evaluation/commonsenseConstraint.py ADDED Viewed

	@@ -0,0 +1,735 @@

+from annotation.src.utils import get_valid_name_city,extract_before_parenthesis,extract_numbers_from_filenames
+from tools.flights.apis import Flights
+from tools.accommodations.apis import Accommodations
+from tools.restaurants.apis import Restaurants
+from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
+from tools.attractions.apis import Attractions
+import math
+import json
+import re
+import os
+import sys
+from tqdm import tqdm
+import argparse
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+flight = Flights()
+accommodation = Accommodations()
+restaurants = Restaurants()
+googleDistanceMatrix = GoogleDistanceMatrix()
+attractions = Attractions()
+city_state_set = open('../database/background/citySet_with_states.txt','r').read().split('\n')
+city_state_map = {x:y for x,y in [unit.split('\t') for unit in city_state_set]}
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def count_consecutive_values(lst):
+    if not lst:
+        return []
+    result = []
+    current_string = lst[0]
+    count = 1
+    for i in range(1, len(lst)):
+        if lst[i] == current_string:
+            count += 1
+        else:
+            result.append((current_string, count))
+            current_string = lst[i]
+            count = 1
+    result.append((current_string, count))  # Add the last group of values
+    return result
+def transportation_match(text: str):
+    if 'taxi' in text.lower():
+        return 'Taxi'
+    elif 'self-driving' in text.lower():
+        return 'Self-driving'
+    elif 'flight' in text.lower():
+        return 'Flight'
+def extract_from_to(text: str):
+    """
+    Extracts 'A' and 'B' from the format "from A to B" in the given text, with B ending at a comma or the end of the string.
+    Args:
+    - text (str): The input string.
+    Returns:
+    - tuple: A tuple containing 'A' and 'B'. If no match is found, returns (None, None).
+    """
+    pattern = r"from\s+(.+?)\s+to\s+([^,]+)(?=[,\s]|$)"
+    matches = re.search(pattern, text)
+    return matches.groups() if matches else (None, None)
+def is_valid_city_sequence(city_list):
+    """
+    Checks if the city sequence is valid. A valid sequence has every city (except the first and last)
+    appearing consecutively, and no city should appear again once its sequence is over.
+    Args:
+    - city_list (list): List of cities.
+    Returns:
+    - bool: True if the sequence is valid, False otherwise.
+    """
+    # If the list has less than 3 cities, it's invalid.
+    if len(city_list) < 3:
+        return False
+    # Set to keep track of visited cities
+    visited_cities = set()
+    i = 0
+    while i < len(city_list):
+        city = city_list[i]
+        # If the city was already visited, it's invalid.
+        if city in visited_cities and (i != 0 and i != len(city_list) - 1):
+            return False
+        # Count the consecutive occurrences of the city
+        count = 0
+        while i < len(city_list) and city_list[i] == city:
+            count += 1
+            i += 1
+        # If the city appeared only once in the medium, it's invalid.
+        if count == 1 and 0 < i - 1 < len(city_list) - 1:
+            return False
+        visited_cities.add(city)
+    return True
+def is_reasonalbe_visiting_city(question, tested_data):
+    city_list = []
+    # print(tested_data)
+    for i in range(min(question['days'],len(tested_data))):
+        city_value = tested_data[i]['current_city']
+        if 'from' in city_value:
+            city1, city2 = extract_from_to(city_value)
+            city1 = extract_before_parenthesis(city1)
+            city2 = extract_before_parenthesis(city2)
+            if i==0 and  city1 != question['org']:
+                return False, f"The first day's city should be {question['org']}."
+            city_list += [city1, city2]
+        else:
+            city_list.append(extract_before_parenthesis(city_value))
+    if city_list[0] != city_list[-1]:
+        return False, "The trip should be a closed circle."
+    if not is_valid_city_sequence(city_list):
+        return False, "The city sequence is invalid."
+    for idx, city in enumerate(city_list):
+        if city not in city_state_map:
+            return False, f"{city} is not a valid city."
+        if idx not in [0,len(city_list)-1] and question['days'] >3 and city_state_map[city] != question['dest']:
+            return False, f"{city} is not in {question['dest']}."
+    return True, None
+def is_valid_restaurants(question, tested_data):
+    restaurants_list = []
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'breakfast' in unit and unit['breakfast'] and unit['breakfast'] != '-':
+            if unit['breakfast'] not in restaurants_list:
+                restaurants_list.append(unit['breakfast'])
+            else:
+                return False, f"The restaurant in day {i+1} breakfast is repeated."
+        # elif 'breakfast' not in unit :
+        #     return False, f"No Breakfast Info."
+        if 'lunch' in unit and unit['lunch'] and unit['lunch'] != '-':
+            if unit['lunch'] not in restaurants_list:
+                restaurants_list.append(unit['lunch'])
+            else:
+                return False, f"The restaurant in day {i+1} lunch {unit['lunch']} is repeated."
+        # elif 'lunch' not in unit:
+        #     return False, f"No Lunch Info."
+        if 'dinner' in unit and unit['dinner'] and unit['dinner'] != '-':
+            if unit['dinner'] not in restaurants_list:
+                restaurants_list.append(unit['dinner'])
+            else:
+                return False, f"The restaurant in day {i+1} dinner is repeated."
+        # elif 'dinner' not in unit:
+        #     return False, f"No Dinner Info."
+    return True, None
+def is_valid_attractions(question, tested_data):
+    attractions_list = []
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'attraction' in unit and unit['attraction'] and unit['attraction'] != '-':
+            for attraction in unit['attraction'].split(';')[:-1]:
+                if attraction not in attractions_list:
+                    attractions_list.append(attraction)
+                else:
+                    return False, f"The attraction '{attraction}' in day {i+1} is repeated."
+        # elif 'attraction' not in unit:
+        #     return False, f"No Attraction Info."
+    return True, None
+def is_valid_transportation(question, tested_data):
+    if tested_data[0]['transportation'] and tested_data[0]['transportation'] != '-':
+        transportation_list = [transportation_match(tested_data[0]['transportation'])]
+    else:
+        return False, "The transportation in day 1 should not be empty."
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'transportation' in unit and unit['transportation'] and unit['transportation'] != '-':
+            transportation_list.append(transportation_match(unit['transportation']))
+        # elif 'transportation' not in unit:
+        #     return False, f"No Transportation Info."
+    if (('Self-driving' in transportation_list) and ('Flight' in transportation_list)) or (('Taxi' in transportation_list) and ('Self-driving' in transportation_list)):
+        return False, "The transportation is conflicting."
+    return True, None
+def is_valid_information_in_current_city(question, tested_data):
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        current_city = unit['current_city']
+        final_city_list = []
+        if 'from' in current_city:
+            city1, city2 = extract_from_to(current_city)
+            city1 = extract_before_parenthesis(city1)
+            city2 = extract_before_parenthesis(city2)
+            final_city_list = [city1, city2]
+        else:
+            final_city_list = extract_before_parenthesis(current_city)
+        if 'transportation' in unit and unit['transportation'] and unit['transportation'] != '-':
+            for city in final_city_list:
+                if city not in unit['transportation']:
+                    # print(city)
+                    return False, f"The transportation in day {i+1} is invalid city choice."
+        # elif 'transportation' not in unit:
+        #     return False, f"No Transportation Info."
+        if 'breakfast' in unit and unit['breakfast'] and unit['breakfast'] != '-':
+            flag = False
+            for city in final_city_list:
+                if city  in unit['breakfast']:
+                    flag = True
+            if not flag:
+                return False, f"The breakfast in day {i+1} is invalid city choice."
+        # elif 'breakfast' not in unit:
+        #     return False, f"No Breakfast Info."
+        if 'lunch' in unit and unit['lunch'] and unit['lunch'] != '-':
+            flag = False
+            for city in final_city_list:
+                if city  in unit['lunch']:
+                    flag = True
+            if not flag:
+                return False, f"The lunch in day {i+1} is invalid city choice."
+        # elif 'lunch' not in unit:
+        #     return False, f"No Lunch Info."
+        if 'dinner' in unit and unit['dinner'] and unit['dinner'] != '-':
+            flag = False
+            for city in final_city_list:
+                if city  in unit['dinner']:
+                    flag = True
+            if not flag:
+                return False, f"The dinner in day {i+1} is invalid city choice."
+        # elif 'dinner' not in unit:
+        #     return False, f"No Dinner Info."
+        if 'attraction' in unit and unit['attraction'] and unit['attraction'] != '-':
+            attraction_list = unit['attraction'].split(';')[:-1]
+            for attraction in attraction_list:
+                flag = False
+                for city in final_city_list:
+                    if city  in attraction:
+                        flag = True
+                if not flag:
+                    return False, f"The attraction in day {i+1} is invalid city choice."
+        # elif 'attraction' not in unit:
+        #     return False, f"No Attraction Info."
+        if 'accommodation' in unit and unit['accommodation'] and unit['accommodation'] != '-':
+            if final_city_list[-1] not in unit['accommodation']:
+                return False, f"The accommodation in day {i+1} is invalid city choice."
+        # elif 'accommodation' not in unit:
+        #     return False, f"No Accommodation Info."
+    return True, None
+# hallucination
+def is_valid_information_in_sandbox(question, tested_data):
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['transportation'] and unit['transportation'] != '-':
+            value = unit['transportation']
+            org_city, dest_city = extract_from_to(value)
+            if org_city == None or dest_city == None:
+                org_city, dest_city = extract_from_to(unit['current_city'])
+            if 'flight number' in value.lower():
+                try:
+                    org_city = extract_before_parenthesis(org_city)
+                    dest_city = extract_before_parenthesis(dest_city)
+                except TypeError:
+                    raise ValueError("The transportation {} in day {} can not be parsed.".format(value,i+1))
+                # print(value)
+                if len(flight.data[(flight.data['Flight Number'] == value.split('Flight Number: ')[1].split(',')[0]) & (flight.data['OriginCityName']==org_city) & (flight.data['DestCityName']==dest_city)]) < 1:
+                     return False, f"The flight number in day {i+1} is invalid in the sandbox."
+            elif 'self-driving' in value.lower() or 'taxi' in value.lower():
+                try:
+                    org_city = extract_before_parenthesis(org_city)
+                    dest_city = extract_before_parenthesis(dest_city)
+                except TypeError:
+                    org_city = '-'
+                    dest_city = '-'
+                    print("The transportation {} in day {} can not be parsed and '-' will be used instead.".format(value,i+1))
+                if 'self-driving' in value.lower():
+                    if googleDistanceMatrix.run_for_evaluation(org_city, dest_city, mode='self-driving')['cost'] == None:
+                        return False, f"The self-driving in day {i+1} is invalid in the sandbox."
+                else:
+                    if googleDistanceMatrix.run_for_evaluation(org_city, dest_city, mode='taxi')['cost'] == None:
+                        return False, f"The taxi in day {i+1} is invalid in the sandbox."
+        if 'breakfast' in unit and unit['breakfast'] and unit['breakfast'] != '-':
+            name, city = get_valid_name_city(unit['breakfast'])
+            if len(restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]) < 1:
+                return False, f"The breakfast in day {i+1} is invalid in the sandbox."
+        # elif 'breakfast' not in unit:
+        #     return False, f"No Breakfast Info."
+        if 'lunch' in unit and unit['lunch'] and unit['lunch'] != '-':
+            name, city = get_valid_name_city(unit['lunch'])
+            if len(restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]) < 1:
+                return False, f"The lunch in day {i+1} is invalid in the sandbox."
+        # elif 'lunch' not in unit:
+        #     return False, f"No Lunch Info."
+        if 'dinner' in unit and unit['dinner'] and unit['dinner'] != '-':
+            name, city = get_valid_name_city(unit['dinner'])
+            if len(restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]) < 1:
+                return False, f"The dinner in day {i+1} is invalid in the sandbox."
+        # elif 'dinner' not in unit:
+        #     return False, f"No Dinner Info."
+        if 'attraction' in unit and unit['attraction'] and unit['attraction'] != '-':
+            attractions_list = unit['attraction'].split(';')[:-1]
+            for attraction in attractions_list:
+                name, city = get_valid_name_city(attraction)
+                if len(attractions.data[(attractions.data['Name'].astype(str).str.contains(re.escape(name))) & (attractions.data['City'] == city)]) < 1:
+                    return False, f"The attraction {attraction} in day {i+1} is invalid in the sandbox."
+        # elif 'attraction' not in unit:
+        #     return False, f"No Attraction Info."
+        if 'accommodation' in unit and unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            # print(name,city)
+            # print(accommodation.data[accommodation.data['NAME'].astype(str).str.contains(re.escape(name))])
+            if len(accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]) < 1:
+                return False, f"The accommodation in day {i+1} is invalid in the sandbox."
+        # elif 'accommodation' not in unit:
+        #     return False, f"No Accommodation Info."
+    return True, None
+def is_valid_accommodaton(question, tested_data):
+    data = []
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'accommodation' not in unit:
+            return False, f"No Accommodation Info."
+        data.append(unit['accommodation'])
+    # data = [unit['accommodation'] for unit in tested_data]
+    consectutive_accommodation = count_consecutive_values(data)
+    for unit in consectutive_accommodation:
+        # print(unit)
+        if unit and unit[0] not in  ['-',''] :
+            name, city = get_valid_name_city(unit[0])
+            # print(unit[0],name,city)
+            # try:
+            if len(accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]) == 1 and unit[1] <  accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)].iloc[0]['minimum nights']:
+                return False, f"The accommodation {unit[0]} do not obey the minumum nights rule."
+            # can not parse data
+            # except re.error:
+            #     continue
+    return True, None
+def is_valid_visiting_city_number(question, tested_data):
+    city_set = set()
+    for i in range(min(question['days'],len(tested_data))):
+        city_value = tested_data[i]['current_city']
+        if 'from' in city_value:
+            city1, city2 = extract_from_to(city_value)
+            city1 = extract_before_parenthesis(city1)
+            city2 = extract_before_parenthesis(city2)
+            if i==0 and  city1 != question['org']:
+                return False, f"The first day's city should be {question['org']}."
+            city_set.add(city1)
+            city_set.add(city2)
+        else:
+            city_set.add(extract_before_parenthesis(city_value))
+    city_set.discard(question['org'])
+    if len(city_set) != question['visiting_city_number']:
+        return False, f"The number of visiting cities should be {question['visiting_city_number']}."
+    return True, None
+def is_valid_days(question, tested_data):
+    lens = 0
+    for i in range(min(question['days'],len(tested_data))):
+        if tested_data[i] != {} and tested_data[i]['current_city'] != "You don't need to fill in the information for this or later days.":
+            lens += 1
+    if lens != question['days']:
+        # print(lens)
+        return False, f"The number of days should be {question['days']}."
+    else:
+        return True, None
+def is_not_absent(question, tested_data):
+    needed_info = 6 * question['days']
+    total_valid_info = 0
+    if not is_valid_days(question, tested_data)[0]:
+        return False, "Invalid Days"
+    if not is_valid_visiting_city_number(question, tested_data)[0]:
+        return False, "Invalid City Number"
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'transportation' not in unit:
+            return False, f"No Transportation Info."
+        if 'breakfast' not in unit:
+            return False, f"No Breakfast Info."
+        if 'lunch' not in unit:
+            return False, f"No Lunch Info."
+        if 'dinner' not in unit:
+            return False, f"No Dinner Info."
+        if 'attraction' not in unit:
+            return False, f"No Attraction Info."
+        if 'accommodation' not in unit:
+            return False, f"No Accommodation Info."
+        if ('from ' in unit['current_city'] or 'to ' in unit['current_city']) and unit['transportation'] in ['','-']:
+            return False, f"No transportation in day {i+1} is not allowed."
+        if ('from ' not in unit['current_city'] and  ' to ' not in unit['current_city']) and unit['attraction'] in ['','-']:
+            return False, f"No attaction in day {i+1} is not allowed."
+        if i != question['days'] - 1 and unit['accommodation'] in ['','-']:
+            return False, f"No accommodation in day {i+1} is not allowed."
+        if (unit['breakfast'] in ['','-'] or unit['lunch'] in ['','-'] or unit['dinner'] in ['','-']) and 'from ' not in unit['current_city']:
+            return False, f"No meal in day {i+1} is not allowed."
+        for key in unit:
+            if unit[key] and unit[key] != '-':
+                total_valid_info += 1
+    if total_valid_info * 1.0 / needed_info < 0.5:
+        return False, f"The absent information is more than 50%."
+    return True, None
+def evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['is_reasonalbe_visiting_city'] = is_reasonalbe_visiting_city(query_data, tested_data)
+    return_info['is_valid_restaurants'] = is_valid_restaurants(query_data, tested_data)
+    return_info['is_valid_attractions'] = is_valid_attractions(query_data, tested_data)
+    return_info['is_valid_accommodation'] = is_valid_accommodaton(query_data, tested_data)
+    return_info['is_valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['is_valid_information_in_current_city'] = is_valid_information_in_current_city(query_data, tested_data)
+    return_info['is_valid_information_in_sandbox'] = is_valid_information_in_sandbox(query_data, tested_data)
+    return_info['is_not_absent'] = is_not_absent(query_data, tested_data)
+    return return_info
+def boolean_evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['is_reasonalbe_visiting_city'] = is_reasonalbe_visiting_city(query_data, tested_data)
+    return_info['is_valid_restaurants'] = is_valid_restaurants(query_data, tested_data)
+    return_info['is_valid_accommodation'] = is_valid_accommodaton(query_data, tested_data)
+    return_info['is_valid_attractions'] = is_valid_attractions(query_data, tested_data)
+    return_info['is_valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['is_valid_information_in_current_city'] = is_valid_information_in_current_city(query_data, tested_data)
+    return_info['is_valid_information_in_sandbox'] = is_valid_information_in_sandbox(query_data, tested_data)
+    return_info['is_not_absent'] = is_not_absent(query_data, tested_data)
+    for key in return_info:
+        if return_info[key][0] == False:
+            print(return_info[key][1])
+            return False
+    return True
+# if __name__ == '__main__':
+#     number_list = extract_numbers_from_filenames('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz')
+#     # json_data = json.load(open('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/x/annotation_4.json'))
+#     query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/lrz.jsonl')
+#     for idx in number_list:
+#         json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz/annotation_{idx}.json'))
+#         print(str(idx), evaluation(query_data[idx-1], json_data))
+#     # json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/plan_{idx}.json'))
+#     # query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/test.jsonl')[idx-1]
+#     # help me write all function name in this file, just the name
+#     #
+#     # list all function name in this file
+#     # ['is_reasonalbe_visiting_city', 'is_valiable_restaurants', 'is_valiable_attractions', 'is_valiable_transportation', 'is_valid_information_in_current_city', 'is_valid_information_in_sandbox']
+#     # print(is_valiable_restaurants(query_data, json_data))
+# if __name__ == "__main__":
+#     user = 'zk'
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     for idx in idx_number_list:
+#         print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json'))
+#         # generated_plan = generated_plan[:-1]
+#         if generated_plan[-1]['gpt-3.5-turbo-16k-result'] != 'Plan Fail':
+#             info_box = evaluation(query_data, generated_plan[-1]['gpt-3.5-turbo-16k-result'])
+#             generated_plan[-1]['toolAug-commonsense'] = info_box
+#         else:
+#             generated_plan[-1]['toolAug-commonsense'] = None
+#             info_box = None
+#         commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+#         with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json','w') as f:
+#             json.dump(generated_plan,f)
+#     with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/commonsense_statistic.json','w') as f:
+#         json.dump(commonsense_statistic,f)
+# if __name__ == "__main__":
+#     user = 'all'
+#     model_type = ['chatgpt','gpt4','greedy_search'][2]
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     idx_number_list = [i for i in range(1,501)]
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     for idx in idx_number_list:
+#         print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json'))
+#         # generated_plan = generated_plan[:-1]
+#         if model_type == 'greedy_search':
+#             info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
+#         else:
+#             info_box = evaluation(query_data, generated_plan[-1][f'{model_type}_human_collected_info_results_parsed'])
+#         generated_plan[-1][f'{model_type}_with_human_collected_commonsense'] = info_box
+#         commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+#         with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json','w') as f:
+#             json.dump(generated_plan,f)
+#     with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/{model_type}_with_human_collected_commonsense_statistic.json','w') as f:
+#         json.dump(commonsense_statistic,f)
+# if __name__ == "__main__":
+#     user = 'all'
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     hardConstraint_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     not_satified = []
+#     for idx in tqdm(idx_number_list):
+#         # print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}/annotation_{idx}.json'))
+#         if not boolean_evaluation(query_data, generated_plan):
+#             not_satified.append(idx)
+#             print(idx)
+#         generated_plan = generated_plan[:-1]
+#     print(not_satified)
+if __name__ == "__main__":
+    set_type = ["train",'dev','test'][0]
+    query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/query/query.jsonl')
+    # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan')
+    commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+    not_satified = []
+    # print( idx_number_list)
+    for idx in tqdm(range(1,len(query_data_list)+1)):
+        # print(idx)
+        query_data = query_data_list[idx-1]
+        generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan/plan_{idx}.json'))
+        try:
+            store_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json'))
+        except FileNotFoundError:
+            store_plan = [{}]
+        info_box = evaluation(query_data,generated_plan[1])
+        # if not boolean_evaluation(query_data, generated_plan[1]):
+        #     not_satified.append(idx)
+        #     print(idx)
+        # print(store_plan[-1])
+        store_plan[-1][f'human_anno_commonsense_constraint'] = info_box
+        with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json','w') as f:
+             json.dump(store_plan,f)
+        commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+    print(not_satified)
+    with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/human_anno_commonsense_constraint.json','w') as f:
+        json.dump(commonsense_statistic,f)
+# if __name__ == "__main__":
+#     user = 'all'
+#     model_type = ['chatgpt','gpt4'][1]
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     idx_number_list = [i for i in range(1,501)]
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     cnt = 0
+#     for idx in idx_number_list:
+#         # print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre/{user}/plan_{idx}.json'))[-1]['gpt4_human_collected_info_results_parsed']
+#         # generated_plan = generated_plan[:-1]
+#         if not boolean_evaluation(query_data, generated_plan):
+#             cnt += 1
+#             print(idx)
+#     print(cnt)
+# if __name__ == "__main__":
+#     parser = argparse.ArgumentParser(description="")
+#     # model_type = ['gpt-3.5-turbo-1106','gpt-4-1106-preview','greedy_search','mistral-7B-32K','gemini2','mixtral','gpt-3.5-turbo-11062'][-1]
+#     # method = ['direct','cot','react','reflexion','tool-use'][-1]
+#     # set_type = ['dev','test'][0]
+#     parser.add_argument("--model_type", type=str, default="gpt-3.5-turbo-1106")
+#     parser.add_argument("--method", type=str, default="direct")
+#     parser.add_argument("--set_type", type=str, default="dev")
+#     args = parser.parse_args()
+#     directory = f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{args.set_type}'
+#     query_data_list = load_line_json_data(os.path.join(directory, 'query/query.jsonl'))
+#     # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     idx_number_list = [i for i in range(1,len(query_data_list)+1)]
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     deliver_cnt = 0
+#     if args.method == 'tool-use':
+#         suffix = ''
+#     else:
+#         suffix = '_with_human_info'
+#     for idx in tqdm(idx_number_list):
+#         # print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json'))
+#         # generated_plan = generated_plan[:-1]
+#         if args.model_type == 'greedy_search':
+#             info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
+#         else:
+#             if args.method == 'tool-use':
+#                 suffix2 = ''
+#             else:
+#                 suffix2 = '_collected'
+#             if generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] and generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results']!='Max Token Length Exceeded.':
+#                 try:
+#                     info_box = evaluation(query_data, generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_results_parsed'])
+#                 except KeyError:
+#                     info_box = None
+#                     generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
+#                 except IndexError:
+#                     info_box = None
+#                     generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
+#             else:
+#                 info_box = None
+#         if info_box:
+#             deliver_cnt += 1
+#         generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_commonsense_constraint'] = info_box
+#         commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+#         with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json','w') as f:
+#             json.dump(generated_plan,f)
+#     with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/{args.model_type}_{args.method}{suffix}_commonsense_constraint.json','w') as f:
+#         json.dump(commonsense_statistic,f)
+#     if args.set_type == 'dev':
+#         print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/180}" )
+#     elif args.set_type == 'test':
+#         print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/1000}" )

evaluation/eval.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from commonsenseConstraint import evaluation as commonsense_eval
+from hardConstraint import evaluation as hard_eval
+import json
+from tqdm import tqdm
+from datasets import load_dataset
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def count_true_false(data):
+    """Count the number of true and false values in a list."""
+    true_count = data.count(True)
+    false_count = data.count(False)
+    return true_count, false_count
+def statistics(commonsense_statistic):
+    """Generate statistics for each level and day in the given data with a different structure."""
+    result = {level: {day: {} for day in commonsense_statistic[level]} for level in commonsense_statistic}
+    for level, days in commonsense_statistic.items():
+        for day, dicts in days.items():
+            for dct in dicts:
+                if dct:
+                    for key, data in dct.items():
+                        true_count, false_count = count_true_false(data)
+                        if key not in result[level][day]:
+                            result[level][day][key] = {"true": 0, "false": 0}
+                        result[level][day][key]["true"] += true_count
+                        result[level][day][key]["false"] += false_count
+    return result
+def eval_score(validation_or_test: str, file_path: str, TOKEN):
+    if validation_or_test == 'validation':
+        query_data_list  = load_dataset('osunlp/TravelBenchEval','validation',token=TOKEN)['validation']
+    elif validation_or_test == 'test':
+        query_data_list  = load_dataset('osunlp/TravelBenchEval','test',token=TOKEN)['test']
+    query_data_list = [x for x in query_data_list]
+    hardConstraint_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+    commonsenseConstraint_statistic = {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+    tested_plans = load_line_json_data(file_path)
+    delivery_cnt = 0
+    plan_constraint_store = []
+    for idx in tqdm(range(0,len(query_data_list))):
+        query_data = query_data_list[idx]
+        tested_plan = tested_plans[idx]
+        if type(query_data) == str:
+            query_data = eval(query_data)
+        if type(tested_plan) == str:
+            tested_plan = eval(tested_plan)
+        if type(query_data['local_constraint']) == str:
+            query_data['local_constraint'] = eval(query_data['local_constraint'])
+        if tested_plan['plan']:
+            delivery_cnt += 1
+            commonsense_info_box = commonsense_eval(query_data,tested_plan['plan'])
+        else:
+            commonsense_info_box = None
+        if commonsense_info_box and commonsense_info_box['is_not_absent'][0] and commonsense_info_box['is_valid_information_in_sandbox'][0]:
+            hard_info_box = hard_eval(query_data,tested_plan['plan'])
+        else:
+            hard_info_box = None
+        plan_constraint_store.append({'commonsense_constraint':commonsense_info_box,'hard_constraint':hard_info_box})
+        commonsenseConstraint_statistic[query_data['level']][query_data['days']].append(commonsense_info_box)
+        hardConstraint_statistic[query_data['level']][query_data['days']].append(hard_info_box)
+    commonsenseConstraint_statistic_processed = statistics(commonsenseConstraint_statistic)
+    hardConstraint_statistic_processed = statistics(hardConstraint_statistic)
+    # print(commonsenseConstraint_statistic_processed)
+    # print(hardConstraint_statistic_processed)
+    constraint_record = {key: {day: {'house rule':0, 'cuisine':0, 'room type':0, 'transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
+    constraint_mapping = {'house rule':'valid_room_rule','cuisine':'valid_cuisine','room type':'valid_room_type','transportation':'valid_transportation'}
+    mapping_constraint_record = {key: {day: {'valid_room_rule':0, 'valid_cuisine':0, 'valid_room_type':0, 'valid_transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
+    count_record = {key:{day:0 for day in [3,5,7]} for key in ['easy','medium','hard']}
+    for unit in query_data_list:
+        count_record[unit['level']][unit['days']] += 1
+        for key in constraint_record['medium'][3]:
+            if unit['local_constraint'][key] != None:
+                constraint_record[unit['level']][unit['days']][key] += 1
+                mapping_constraint_record[unit['level']][unit['days']][constraint_mapping[key]] += 1
+    data_record = {key:{day:[] for day in [3,5,7]} for key in ['easy','medium','hard']}
+    constraint_dis_record = {"commonsense":{"pass":0,"total":0},"hard":{"pass":0,"total":0}}
+    for constraint in ['commonsense','hard']:
+        if constraint == 'commonsense':
+            constraint_statistic = commonsenseConstraint_statistic_processed
+        elif constraint == 'hard':
+            constraint_statistic = hardConstraint_statistic_processed
+        key_dict = {'commonsense':['is_valid_information_in_current_city','is_valid_information_in_sandbox','is_reasonalbe_visiting_city','is_valid_restaurants','is_valid_transportation','is_valid_attractions','is_valid_accommodation','is_not_absent'],'hard':['valid_cost','valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']}
+        for key in constraint_statistic:
+            # level
+            for key2 in constraint_statistic[key]:
+                # day
+                # print(key2)
+                # key2 = eval(key2)
+                if key2 == -1:
+                    print(constraint_statistic[key])
+                    exit(0)
+                for key3 in key_dict[constraint]:
+                    data_record[key][key2].append('0/0')
+                    if key3 in constraint_statistic[key][key2]:
+                        constraint_dis_record[constraint]['pass'] += constraint_statistic[key][key2][key3]['true']
+                        if constraint == 'hard':
+                            if key == 'hard' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']:
+                                data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
+                                constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
+                            elif key == 'medium' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type']:
+                                data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
+                                constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
+                            else:
+                                data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
+                                if key3 in ['valid_cost','valid_visitng_city_number','valid_days']:
+                                    constraint_dis_record[constraint]['total'] += count_record[key][key2]
+                        else:
+                            data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
+                            constraint_dis_record[constraint]['total'] += count_record[key][key2]
+    final_all_cnt = 0
+    final_commonsense_cnt = 0
+    final_hardConstraint_cnt = 0
+    final_all_cnt_map = {level:0 for level in ['easy','medium','hard']}
+    for idx in (range(0,len(query_data_list))):
+        if plan_constraint_store[idx]['commonsense_constraint']:
+            final_commonsense_pass = True
+            final_hardConstraint_pass = True
+            for item in plan_constraint_store[idx]['commonsense_constraint']:
+                if plan_constraint_store[idx]['commonsense_constraint'][item][0] is not None and not plan_constraint_store[idx]['commonsense_constraint'][item][0]:
+                    final_commonsense_pass = False
+                    break
+            if plan_constraint_store[idx]['hard_constraint'] is None:
+                continue
+            for item in plan_constraint_store[idx]['hard_constraint']:
+                if plan_constraint_store[idx]['hard_constraint'][item][0] is not None and  plan_constraint_store[idx]['hard_constraint'][item][0] == False:
+                    final_hardConstraint_pass = False
+                    break
+            if final_commonsense_pass:
+                final_commonsense_cnt += 1
+            if final_hardConstraint_pass:
+                final_hardConstraint_cnt += 1
+            if final_commonsense_pass and final_hardConstraint_pass:
+                final_all_cnt += 1
+                final_all_cnt_map[query_data_list[idx]['level']] += 1
+    result = {}
+    if validation_or_test == 'validation':
+        result['Delivery Rate'] = delivery_cnt / 180
+        result['Commonsense Constraint Micro Pass Rate'] = constraint_dis_record['commonsense']['pass'] / 1440
+        result['Commonsense Constraint Macro Pass Rate'] = final_commonsense_cnt / 180
+        result['Hard Constraint Micro Pass Rate'] = constraint_dis_record['hard']['pass'] / 420
+        result['Hard Constraint Macro Pass Rate'] = final_hardConstraint_cnt / 180
+        result['Final Pass Rate'] = final_all_cnt / 180
+    elif validation_or_test == 'test':
+        result['Delivery Rate'] = delivery_cnt / 1000
+        result['Commonsense Constraint Micro Pass Rate'] = constraint_dis_record['commonsense']['pass'] / 8000
+        result['Commonsense Constraint Macro Pass Rate'] = final_commonsense_cnt / 1000
+        result['Hard Constraint Micro Pass Rate'] = constraint_dis_record['hard']['pass'] / 2290
+        result['Hard Constraint Macro Pass Rate'] = final_hardConstraint_cnt / 1000
+        result['Final Pass Rate'] = final_all_cnt / 1000
+    return result

evaluation/hardConstraint.py ADDED Viewed

	@@ -0,0 +1,266 @@

+from annotation.src.utils import get_valid_name_city,extract_before_parenthesis,extract_numbers_from_filenames
+from tools.flights.apis import Flights
+from tools.accommodations.apis import Accommodations
+from tools.restaurants.apis import Restaurants
+from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
+from tools.attractions.apis import Attractions
+import math
+import json
+import re
+import numpy as np
+import os
+import sys
+from tqdm import tqdm
+import argparse
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+flight = Flights()
+accommodation = Accommodations()
+restaurants = Restaurants()
+googleDistanceMatrix = GoogleDistanceMatrix()
+attractions = Attractions()
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def convert_bool_values(item):
+    if isinstance(item, dict):
+        # If the item is a dictionary, recurse on each value
+        return {key: convert_bool_values(value) for key, value in item.items()}
+    elif isinstance(item, list):
+        # If the item is a list, recurse on each item in the list
+        return [convert_bool_values(value) for value in item]
+    elif isinstance(item, tuple):
+        # If the item is a tuple, recurse on each item in the tuple and repackage as a tuple
+        return tuple(convert_bool_values(value) for value in item)
+    elif isinstance(item, np.bool_):  # Here we check for numpy's bool_ type
+        # If the item is a numpy bool_, convert it to a standard Python bool
+        return bool(item)
+    else:
+        # If the item is any other type, return it unchanged
+        return item
+def extract_from_to(text: str):
+    """
+    Extracts 'A' and 'B' from the format "from A to B" in the given text, with B ending at a comma or the end of the string.
+    Args:
+    - text (str): The input string.
+    Returns:
+    - tuple: A tuple containing 'A' and 'B'. If no match is found, returns (None, None).
+    """
+    pattern = r"from\s+(.+?)\s+to\s+([^,]+)(?=[,\s]|$)"
+    matches = re.search(pattern, text)
+    return matches.groups() if matches else (None, None)
+def get_total_cost(question, tested_data):
+    total_cost = 0
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        # transporation
+        if unit['transportation'] and  unit['transportation'] != '-':
+            value = unit['transportation']
+            org_city, dest_city = extract_from_to(value)
+            if org_city == None or dest_city == None:
+                org_city, dest_city = extract_from_to(unit['current_city'])
+            if org_city == None or dest_city == None:
+                pass
+            else:
+                if 'flight number' in value.lower():
+                    res = flight.data[flight.data['Flight Number'] == value.split('Flight Number: ')[1].split(',')[0]]
+                    if len(res) > 0:
+                        total_cost += res['Price'].values[0] * question['people_number']
+                elif 'self-driving' in value.lower() or 'taxi' in value.lower():
+                    if 'self-driving' in value.lower():
+                        # print(org_city,dest_city)
+                        cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'self-driving')['cost']
+                        total_cost += cost * math.ceil(question['people_number'] * 1.0 / 5)
+                    else:
+                        cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'taxi')['cost']
+                        total_cost += cost * math.ceil(question['people_number'] * 1.0 / 4)
+        # breakfast
+        if unit['breakfast'] and unit['breakfast'] != '-':
+            name, city = get_valid_name_city(unit['breakfast'])
+            res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+            if len(res) > 0:
+                total_cost += res['Average Cost'].values[0] * question['people_number']
+        # lunch
+        if unit['lunch'] and unit['lunch'] != '-':
+            name, city = get_valid_name_city(unit['lunch'])
+            res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+            if len(res) > 0:
+                total_cost += res['Average Cost'].values[0] * question['people_number']
+        # dinner
+        if unit['dinner'] and unit['dinner'] != '-':
+            name, city = get_valid_name_city(unit['dinner'])
+            res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+            if len(res) > 0:
+                total_cost += res['Average Cost'].values[0] * question['people_number']
+        # accommodation
+        if unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]
+            if len(res) > 0:
+                total_cost += res['price'].values[0] * math.ceil(question['people_number'] * 1.0 / res['maximum occupancy'].values[0])
+    # print(total_cost)
+    return total_cost
+def is_valid_room_rule(question, tested_data):
+    if question['local_constraint']['house rule'] is None:
+        return None,None
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]
+            if len(res) > 0:
+                if question['local_constraint']['house rule'] == 'smoking' and 'No smoking' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'parities' and 'No parties' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'children under 10' and 'No children under 10' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'visitors' and 'No visitors' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'pets' and 'No pets' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+    return True, None
+def is_valid_cuisine(question, tested_data):
+    cuisine_set = set()
+    if question['local_constraint']['cuisine']:
+        for i in range(min(question['days'],len(tested_data))):
+            unit = tested_data[i]
+            if unit['breakfast'] and unit['breakfast'] != '-':
+                name, city = get_valid_name_city(unit['breakfast'])
+                if city == question['org']:
+                    continue
+                res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+                if len(res) > 0:
+                    for cuisine in question['local_constraint']['cuisine']:
+                        if cuisine in res.iloc[0]['Cuisines']:
+                            cuisine_set.add(cuisine)
+            if unit['lunch'] and unit['lunch'] != '-':
+                name, city = get_valid_name_city(unit['lunch'])
+                if city == question['org']:
+                    continue
+                res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+                if len(res) > 0:
+                    for cuisine in question['local_constraint']['cuisine']:
+                        if cuisine in res.iloc[0]['Cuisines']:
+                            cuisine_set.add(cuisine)
+            if unit['dinner'] and unit['dinner'] != '-':
+                name, city = get_valid_name_city(unit['dinner'])
+                if city == question['org']:
+                    continue
+                res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+                if len(res) > 0:
+                    for cuisine in question['local_constraint']['cuisine']:
+                        if cuisine in res.iloc[0]['Cuisines']:
+                            cuisine_set.add(cuisine)
+        if len(cuisine_set) == len(question['local_constraint']['cuisine']):
+            return True, None
+        else:
+            # judge which cuisine is not satisfied
+            for cuisine in question['local_constraint']['cuisine']:
+                if cuisine not in cuisine_set:
+                    return False, f"The cuisine {cuisine} is not satisfied."
+            # return False, f"The cuisine should be {question['local_constraint']['cuisine']}."
+    else:
+        return None,None
+def is_valid_transportation(question, tested_data):
+    if question['local_constraint']['transportation'] is None:
+        return None,None
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['transportation'] and unit['transportation'] != '-':
+            value = unit['transportation']
+            if question['local_constraint']['transportation'] == 'no flight' and 'Flight' in value:
+                return False, f"The transportation should not be {question['local_constraint']['transportation']}."
+            elif question['local_constraint']['transportation'] == 'no self-driving' and 'Self-driving'  in value:
+                return False, f"The transportation should not be {question['local_constraint']['transportation']}."
+    return True, None
+def is_valid_room_type(question, tested_data):
+    if question['local_constraint']['room type'] is None:
+        return None,None
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]
+            if len(res) > 0:
+                if question['local_constraint']['room type'] == 'not shared room' and res['room type'].values[0] == 'Shared room':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+                # "shared room", "not shared room", "private room", "entire room"
+                elif question['local_constraint']['room type'] == 'shared room' and res['room type'].values[0] != 'Shared room':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+                elif question['local_constraint']['room type'] == 'private room' and res['room type'].values[0] != 'Private room':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+                elif question['local_constraint']['room type'] == 'entire room' and res['room type'].values[0] != 'Entire home/apt':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+    return True, None
+def evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data)
+    return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data)
+    return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data)
+    return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None)
+    return return_info
+def boolean_evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data)
+    return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data)
+    return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data)
+    return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None)
+    for key in return_info:
+        if return_info[key][0] == False:
+            print(key)
+            return False
+    return True

evaluation/scored/1_validation_two-stage_1.jsonl ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"Delivery Rate": 0.8944444444444445, "Commonsense Constraint Micro Pass Rate": 0.6111111111111112, "Commonsense Constraint Macro Pass Rate": 0.027777777777777776, "Hard Constraint Micro Pass Rate": 0.1523809523809524, "Hard Constraint Macro Pass Rate": 0.10555555555555556, "Final Pass Rate": 0.005555555555555556}

evaluation/scored/textbox_validation_two-stage_1.jsonl ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"Delivery Rate": 0.8944444444444445, "Commonsense Constraint Micro Pass Rate": 0.6111111111111112, "Commonsense Constraint Macro Pass Rate": 0.027777777777777776, "Hard Constraint Micro Pass Rate": 0.1523809523809524, "Hard Constraint Macro Pass Rate": 0.10555555555555556, "Final Pass Rate": 0.005555555555555556}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+datasets==2.16.1
+gradio==3.50.2
+huggingface-hub==0.20.2

tools/__init__.py ADDED Viewed

File without changes

tools/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (170 Bytes). View file

tools/accommodations/.ipynb_checkpoints/test-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tools/accommodations/__init__.py ADDED Viewed

File without changes

tools/accommodations/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (185 Bytes). View file

tools/accommodations/__pycache__/apis.cpython-39.pyc ADDED Viewed

Binary file (1.57 kB). View file

tools/accommodations/apis.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import pandas as pd
+from pandas import DataFrame
+from typing import Optional
+from annotation.src.utils import extract_before_parenthesis
+class Accommodations:
+    def __init__(self, path="../database/accommodations/clean_accommodations_2022.csv"):
+        self.path = path
+        self.data = pd.read_csv(self.path).dropna()[['NAME','price','room type', 'house_rules', 'minimum nights', 'maximum occupancy', 'review rate number', 'city']]
+        print("Accommodations loaded.")
+    def load_db(self):
+        self.data = pd.read_csv(self.path).dropna()
+    def run(self,
+            city: str,
+            ) -> DataFrame:
+        """Search for accommodations by city."""
+        results = self.data[self.data["city"] == city]
+        # results = results[results["date"] == date]
+        # if order == "ascPrice":
+        #     results = results.sort_values(by=["price"], ascending=True)
+        # elif order == "descPrice":
+        #     results = results.sort_values(by=["price"], ascending=False)
+        # elif order == "ascRate":
+        #     results = results.sort_values(by=["review rate number"], ascending=True)
+        # elif order == "descRate":
+        #     results = results.sort_values(by=["review rate number"], ascending=False)
+        # elif order == "ascMinumNights":
+        #     results = results.sort_values(by=["minimum nights"], ascending=True)
+        # elif order == "descMinumNights":
+        #     results = results.sort_values(by=["minimum nights"], ascending=False)
+        # elif order == "ascMaximumOccupancy":
+        #     results = results.sort_values(by=["maximum occupancy"], ascending=True)
+        # elif order == "descMaximumOccupancy":
+        #     results = results.sort_values(by=["maximum occupancy"], ascending=False)
+        # if room_type == "all":
+        #     return results
+        # elif room_type == "Entire home/apt":
+        #     return results[results["room type"]=="Entire home/apt"]
+        # elif room_type == "Hotel room":
+        #     return results[results["room type"]=="Hotel room"]
+        # elif room_type == "Private room":
+        #     return results[results["room type"]=="Private room"]
+        # elif room_type == "Shared room":
+        #     return results[results["room type"]=="Shared room"]
+        # else:
+        #     return None
+        if len(results) == 0:
+            return "There is no attraction in this city."
+        return results
+    def run_for_annotation(self,
+            city: str,
+            ) -> DataFrame:
+        """Search for accommodations by city."""
+        results = self.data[self.data["city"] == extract_before_parenthesis(city)]
+        # results = results[results["date"] == date]
+        # if order == "ascPrice":
+        #     results = results.sort_values(by=["price"], ascending=True)
+        # elif order == "descPrice":
+        #     results = results.sort_values(by=["price"], ascending=False)
+        # elif order == "ascRate":
+        #     results = results.sort_values(by=["review rate number"], ascending=True)
+        # elif order == "descRate":
+        #     results = results.sort_values(by=["review rate number"], ascending=False)
+        # elif order == "ascMinumNights":
+        #     results = results.sort_values(by=["minimum nights"], ascending=True)
+        # elif order == "descMinumNights":
+        #     results = results.sort_values(by=["minimum nights"], ascending=False)
+        # elif order == "ascMaximumOccupancy":
+        #     results = results.sort_values(by=["maximum occupancy"], ascending=True)
+        # elif order == "descMaximumOccupancy":
+        #     results = results.sort_values(by=["maximum occupancy"], ascending=False)
+        # if room_type == "all":
+        #     return results
+        # elif room_type == "Entire home/apt":
+        #     return results[results["room type"]=="Entire home/apt"]
+        # elif room_type == "Hotel room":
+        #     return results[results["room type"]=="Hotel room"]
+        # elif room_type == "Private room":
+        #     return results[results["room type"]=="Private room"]
+        # elif room_type == "Shared room":
+        #     return results[results["room type"]=="Shared room"]
+        # else:
+        #     return None
+        return results

tools/accommodations/test.ipynb ADDED Viewed

	@@ -0,0 +1,2037 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ad7592e7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_2459435/230780042.py:2: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/Airbnb_Open_Data.csv')\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/Airbnb_Open_Data.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f97916a9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>NAME</th>\n",
+       "      <th>host id</th>\n",
+       "      <th>host_identity_verified</th>\n",
+       "      <th>host name</th>\n",
+       "      <th>neighbourhood group</th>\n",
+       "      <th>neighbourhood</th>\n",
+       "      <th>lat</th>\n",
+       "      <th>long</th>\n",
+       "      <th>country</th>\n",
+       "      <th>...</th>\n",
+       "      <th>service fee</th>\n",
+       "      <th>minimum nights</th>\n",
+       "      <th>number of reviews</th>\n",
+       "      <th>last review</th>\n",
+       "      <th>reviews per month</th>\n",
+       "      <th>review rate number</th>\n",
+       "      <th>calculated host listings count</th>\n",
+       "      <th>availability 365</th>\n",
+       "      <th>house_rules</th>\n",
+       "      <th>license</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1001254</td>\n",
+       "      <td>Clean &amp; quiet apt home by the park</td>\n",
+       "      <td>80014485718</td>\n",
+       "      <td>unconfirmed</td>\n",
+       "      <td>Madaline</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Kensington</td>\n",
+       "      <td>40.64749</td>\n",
+       "      <td>-73.97237</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$193</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>10/19/2021</td>\n",
+       "      <td>0.21</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>286.0</td>\n",
+       "      <td>Clean up and treat the home the way you'd like...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1002102</td>\n",
+       "      <td>Skylit Midtown Castle</td>\n",
+       "      <td>52335172823</td>\n",
+       "      <td>verified</td>\n",
+       "      <td>Jenna</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Midtown</td>\n",
+       "      <td>40.75362</td>\n",
+       "      <td>-73.98377</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$28</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>5/21/2022</td>\n",
+       "      <td>0.38</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>228.0</td>\n",
+       "      <td>Pet friendly but please confirm with me if the...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1002403</td>\n",
+       "      <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
+       "      <td>78829239556</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Elise</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Harlem</td>\n",
+       "      <td>40.80902</td>\n",
+       "      <td>-73.94190</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$124</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>352.0</td>\n",
+       "      <td>I encourage you to use my kitchen, cooking and...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1002755</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>85098326012</td>\n",
+       "      <td>unconfirmed</td>\n",
+       "      <td>Garry</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Clinton Hill</td>\n",
+       "      <td>40.68514</td>\n",
+       "      <td>-73.95976</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$74</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>7/5/2019</td>\n",
+       "      <td>4.64</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>322.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1003689</td>\n",
+       "      <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
+       "      <td>92037596077</td>\n",
+       "      <td>verified</td>\n",
+       "      <td>Lyndon</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>East Harlem</td>\n",
+       "      <td>40.79851</td>\n",
+       "      <td>-73.94399</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$41</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>11/19/2018</td>\n",
+       "      <td>0.10</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>289.0</td>\n",
+       "      <td>Please no smoking in the house, porch or on th...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102594</th>\n",
+       "      <td>6092437</td>\n",
+       "      <td>Spare room in Williamsburg</td>\n",
+       "      <td>12312296767</td>\n",
+       "      <td>verified</td>\n",
+       "      <td>Krik</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Williamsburg</td>\n",
+       "      <td>40.70862</td>\n",
+       "      <td>-73.94651</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$169</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>227.0</td>\n",
+       "      <td>No Smoking No Parties or Events of any kind Pl...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102595</th>\n",
+       "      <td>6092990</td>\n",
+       "      <td>Best Location near Columbia U</td>\n",
+       "      <td>77864383453</td>\n",
+       "      <td>unconfirmed</td>\n",
+       "      <td>Mifan</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Morningside Heights</td>\n",
+       "      <td>40.80460</td>\n",
+       "      <td>-73.96545</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$167</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>7/6/2015</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>395.0</td>\n",
+       "      <td>House rules: Guests agree to the following ter...</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102596</th>\n",
+       "      <td>6093542</td>\n",
+       "      <td>Comfy, bright room in Brooklyn</td>\n",
+       "      <td>69050334417</td>\n",
+       "      <td>unconfirmed</td>\n",
+       "      <td>Megan</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Park Slope</td>\n",
+       "      <td>40.67505</td>\n",
+       "      <td>-73.98045</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$198</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102597</th>\n",
+       "      <td>6094094</td>\n",
+       "      <td>Big Studio-One Stop from Midtown</td>\n",
+       "      <td>11160591270</td>\n",
+       "      <td>unconfirmed</td>\n",
+       "      <td>Christopher</td>\n",
+       "      <td>Queens</td>\n",
+       "      <td>Long Island City</td>\n",
+       "      <td>40.74989</td>\n",
+       "      <td>-73.93777</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$109</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>10/11/2015</td>\n",
+       "      <td>0.10</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>386.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102598</th>\n",
+       "      <td>6094647</td>\n",
+       "      <td>585 sf Luxury Studio</td>\n",
+       "      <td>68170633372</td>\n",
+       "      <td>unconfirmed</td>\n",
+       "      <td>Rebecca</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Upper West Side</td>\n",
+       "      <td>40.76807</td>\n",
+       "      <td>-73.98342</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>...</td>\n",
+       "      <td>$206</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>102599 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             id                                              NAME   \n",
+       "0       1001254                Clean & quiet apt home by the park  \\\n",
+       "1       1002102                             Skylit Midtown Castle   \n",
+       "2       1002403               THE VILLAGE OF HARLEM....NEW YORK !   \n",
+       "3       1002755                                               NaN   \n",
+       "4       1003689  Entire Apt: Spacious Studio/Loft by central park   \n",
+       "...         ...                                               ...   \n",
+       "102594  6092437                        Spare room in Williamsburg   \n",
+       "102595  6092990                     Best Location near Columbia U   \n",
+       "102596  6093542                    Comfy, bright room in Brooklyn   \n",
+       "102597  6094094                  Big Studio-One Stop from Midtown   \n",
+       "102598  6094647                              585 sf Luxury Studio   \n",
+       "\n",
+       "            host id host_identity_verified    host name neighbourhood group   \n",
+       "0       80014485718            unconfirmed     Madaline            Brooklyn  \\\n",
+       "1       52335172823               verified        Jenna           Manhattan   \n",
+       "2       78829239556                    NaN        Elise           Manhattan   \n",
+       "3       85098326012            unconfirmed        Garry            Brooklyn   \n",
+       "4       92037596077               verified       Lyndon           Manhattan   \n",
+       "...             ...                    ...          ...                 ...   \n",
+       "102594  12312296767               verified         Krik            Brooklyn   \n",
+       "102595  77864383453            unconfirmed        Mifan           Manhattan   \n",
+       "102596  69050334417            unconfirmed        Megan            Brooklyn   \n",
+       "102597  11160591270            unconfirmed  Christopher              Queens   \n",
+       "102598  68170633372            unconfirmed      Rebecca           Manhattan   \n",
+       "\n",
+       "              neighbourhood       lat      long        country  ...   \n",
+       "0                Kensington  40.64749 -73.97237  United States  ...  \\\n",
+       "1                   Midtown  40.75362 -73.98377  United States  ...   \n",
+       "2                    Harlem  40.80902 -73.94190  United States  ...   \n",
+       "3              Clinton Hill  40.68514 -73.95976  United States  ...   \n",
+       "4               East Harlem  40.79851 -73.94399  United States  ...   \n",
+       "...                     ...       ...       ...            ...  ...   \n",
+       "102594         Williamsburg  40.70862 -73.94651  United States  ...   \n",
+       "102595  Morningside Heights  40.80460 -73.96545  United States  ...   \n",
+       "102596           Park Slope  40.67505 -73.98045  United States  ...   \n",
+       "102597     Long Island City  40.74989 -73.93777  United States  ...   \n",
+       "102598      Upper West Side  40.76807 -73.98342  United States  ...   \n",
+       "\n",
+       "       service fee minimum nights number of reviews last review   \n",
+       "0            $193            10.0               9.0  10/19/2021  \\\n",
+       "1             $28            30.0              45.0   5/21/2022   \n",
+       "2            $124             3.0               0.0         NaN   \n",
+       "3             $74            30.0             270.0    7/5/2019   \n",
+       "4             $41            10.0               9.0  11/19/2018   \n",
+       "...            ...            ...               ...         ...   \n",
+       "102594       $169             1.0               0.0         NaN   \n",
+       "102595       $167             1.0               1.0    7/6/2015   \n",
+       "102596       $198             3.0               0.0         NaN   \n",
+       "102597       $109             2.0               5.0  10/11/2015   \n",
+       "102598       $206             1.0               0.0         NaN   \n",
+       "\n",
+       "        reviews per month review rate number calculated host listings count   \n",
+       "0                    0.21                4.0                            6.0  \\\n",
+       "1                    0.38                4.0                            2.0   \n",
+       "2                     NaN                5.0                            1.0   \n",
+       "3                    4.64                4.0                            1.0   \n",
+       "4                    0.10                3.0                            1.0   \n",
+       "...                   ...                ...                            ...   \n",
+       "102594                NaN                3.0                            1.0   \n",
+       "102595               0.02                2.0                            2.0   \n",
+       "102596                NaN                5.0                            1.0   \n",
+       "102597               0.10                3.0                            1.0   \n",
+       "102598                NaN                3.0                            1.0   \n",
+       "\n",
+       "        availability 365                                        house_rules   \n",
+       "0                  286.0  Clean up and treat the home the way you'd like...  \\\n",
+       "1                  228.0  Pet friendly but please confirm with me if the...   \n",
+       "2                  352.0  I encourage you to use my kitchen, cooking and...   \n",
+       "3                  322.0                                                NaN   \n",
+       "4                  289.0  Please no smoking in the house, porch or on th...   \n",
+       "...                  ...                                                ...   \n",
+       "102594             227.0  No Smoking No Parties or Events of any kind Pl...   \n",
+       "102595             395.0  House rules: Guests agree to the following ter...   \n",
+       "102596             342.0                                                NaN   \n",
+       "102597             386.0                                                NaN   \n",
+       "102598              69.0                                                NaN   \n",
+       "\n",
+       "       license  \n",
+       "0          NaN  \n",
+       "1          NaN  \n",
+       "2          NaN  \n",
+       "3          NaN  \n",
+       "4          NaN  \n",
+       "...        ...  \n",
+       "102594     NaN  \n",
+       "102595     NaN  \n",
+       "102596     NaN  \n",
+       "102597     NaN  \n",
+       "102598     NaN  \n",
+       "\n",
+       "[102599 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e21af5d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flight = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "966feef9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flight = flight.to_dict(orient = 'split')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "3f4fe062",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dict = data.to_dict(orient = 'split')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "33213ac0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[2, '2022-04-04', '15:14', '16:36', 251.0, 'Durango', 'Denver', 100]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "flight['data'][2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9cef6161",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "nan\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(str(data_dict['data'][3][24]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c5f81f43",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "city_set = set()\n",
+    "cnt = 0\n",
+    "for unit in data_dict['data']:\n",
+    "    if str(unit[24]) != 'nan':\n",
+    "        cnt += 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "533a5aa6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "50468"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cnt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "bfce5f56",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "set()"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "city_set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "230b760c",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Sample larger than population or is negative",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrandom\u001b[39;00m\n\u001b[1;32m      2\u001b[0m city_set \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(city_set)\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcity_set\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/random.py:449\u001b[0m, in \u001b[0;36mRandom.sample\u001b[0;34m(self, population, k, counts)\u001b[0m\n\u001b[1;32m    447\u001b[0m randbelow \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow\n\u001b[1;32m    448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m k \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m n:\n\u001b[0;32m--> 449\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSample larger than population or is negative\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    450\u001b[0m result \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m k\n\u001b[1;32m    451\u001b[0m setsize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m21\u001b[39m        \u001b[38;5;66;03m# size of a small set minus size of an empty list\u001b[39;00m\n",
+      "\u001b[0;31mValueError\u001b[0m: Sample larger than population or is negative"
+     ]
+    }
+   ],
+   "source": [
+    "import random\n",
+    "city_set = list(city_set)\n",
+    "print(random.sample(city_set,1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "61eddd5f",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'dict' object has no attribute 'to_dict'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data_dict \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m(orient \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msplit\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'to_dict'"
+     ]
+    }
+   ],
+   "source": [
+    "data_dict = data.to_dict(orient = 'split')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "3292c450",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Unnamed: 0',\n",
+       " 'NAME',\n",
+       " 'room type',\n",
+       " 'price',\n",
+       " 'minimum nights',\n",
+       " 'review rate number',\n",
+       " 'house_rules',\n",
+       " 'maximum occupancy',\n",
+       " 'city']"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_dict['columns']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "cfaa21d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5047"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(data_dict['data'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "2980362d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type_set = set()\n",
+    "for unit in data_dict['data']:\n",
+    "    type_set.add(unit[2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "f5e36fbb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Entire home/apt', 'Private room', 'Shared room'}"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type_set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "bf1231c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'data_dict' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata_dict\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;241m147\u001b[39m]\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'data_dict' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "data_dict['data'][147]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "f993b894",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "set()"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type_set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "916e9470",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 NAME\n",
+      "7 lat\n",
+      "8 long\n",
+      "13 room type\n",
+      "15 price\n",
+      "17 minimum nights\n",
+      "21 review rate number\n",
+      "24 house_rules\n"
+     ]
+    }
+   ],
+   "source": [
+    "for idx, unit in enumerate(data_dict['columns']):\n",
+    "    if unit in ['NAME','lat', 'long', 'room type', 'price','minimum nights','review rate number','house_rules']:\n",
+    "        print(idx,unit)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "1213484d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "51764c1a3739416289913ec613816cc7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_3241846/557604333.py:23: DeprecationWarning: Sampling from a set deprecated\n",
+      "since Python 3.9 and will be removed in a subsequent version.\n",
+      "  tmp_dict[\"city\"] = random.sample(city_set,1)[0]\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Sample larger than population or is negative",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[73], line 23\u001b[0m\n\u001b[1;32m     21\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreview rate number\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m21\u001b[39m]\n\u001b[1;32m     22\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhouse_rules\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m24\u001b[39m]\n\u001b[0;32m---> 23\u001b[0m tmp_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcity\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcity_set\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m     24\u001b[0m new_data\u001b[38;5;241m.\u001b[39mappend(tmp_dict)\n",
+      "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/random.py:449\u001b[0m, in \u001b[0;36mRandom.sample\u001b[0;34m(self, population, k, counts)\u001b[0m\n\u001b[1;32m    447\u001b[0m randbelow \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow\n\u001b[1;32m    448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m k \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m n:\n\u001b[0;32m--> 449\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSample larger than population or is negative\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    450\u001b[0m result \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m k\n\u001b[1;32m    451\u001b[0m setsize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m21\u001b[39m        \u001b[38;5;66;03m# size of a small set minus size of an empty list\u001b[39;00m\n",
+      "\u001b[0;31mValueError\u001b[0m: Sample larger than population or is negative"
+     ]
+    }
+   ],
+   "source": [
+    "from tqdm.autonotebook import tqdm\n",
+    "import random\n",
+    "new_data = []\n",
+    "for idx, unit in tqdm(enumerate(data_dict['data'])):\n",
+    "    tmp_dict = {k:\"\" for k in ['NAME','room type', 'price','minimum nights','review rate number','house_rules']}\n",
+    "    tmp_dict[\"NAME\"] = unit[1]\n",
+    "    tmp_dict[\"room type\"] = unit[13]\n",
+    "    if unit[13] == \"Shared room\":\n",
+    "        tmp_dict[\"maximum occupancy\"] = 1\n",
+    "    elif unit[13] == \"Hotel room\":\n",
+    "        tmp_dict[\"maximum occupancy\"] = random.randint(1, 2)\n",
+    "    elif unit[13] == \"Private room\":\n",
+    "        tmp_dict[\"maximum occupancy\"] = random.randint(1, 2)\n",
+    "    elif unit[13] == \"Entire home/apt\":\n",
+    "        try:\n",
+    "            tmp_dict[\"maximum occupancy\"] = random.randint(2, max(3,eval(unit[15].replace(\"$\",\"\").replace(\",\",\"\"))//100))\n",
+    "        except:\n",
+    "            tmp_dict[\"maximum occupancy\"] = random.randint(2, max(3,unit[15]//100))\n",
+    "    tmp_dict[\"price\"] = unit[15].replace(\"$\",\"\").replace(\",\",\"\")\n",
+    "    tmp_dict[\"minimum nights\"] = unit[17]\n",
+    "    tmp_dict[\"review rate number\"] = unit[21]\n",
+    "    tmp_dict[\"house_rules\"] = unit[24]\n",
+    "    tmp_dict[\"city\"] = random.sample(city_set,1)[0]\n",
+    "    new_data.append(tmp_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "fd3e8257",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "102599"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(new_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "bfb243c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(new_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "af7e3411",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "71d21fea",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>NAME</th>\n",
+       "      <th>room type</th>\n",
+       "      <th>price</th>\n",
+       "      <th>minimum nights</th>\n",
+       "      <th>review rate number</th>\n",
+       "      <th>house_rules</th>\n",
+       "      <th>maximum occupancy</th>\n",
+       "      <th>city</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Clean &amp; quiet apt home by the park</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$966</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Clean up and treat the home the way you'd like...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Des Moines</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Skylit Midtown Castle</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$142</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Pet friendly but please confirm with me if the...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Wilmington</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$620</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>I encourage you to use my kitchen, cooking and...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>St. George</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$368</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Kalamazoo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$204</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>Please no smoking in the house, porch or on th...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Cheyenne</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102594</th>\n",
+       "      <td>Spare room in Williamsburg</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$844</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>No Smoking No Parties or Events of any kind Pl...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>White Plains</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102595</th>\n",
+       "      <td>Best Location near Columbia U</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$837</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>House rules: Guests agree to the following ter...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Mosinee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102596</th>\n",
+       "      <td>Comfy, bright room in Brooklyn</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$988</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Amarillo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102597</th>\n",
+       "      <td>Big Studio-One Stop from Midtown</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$546</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Binghamton</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102598</th>\n",
+       "      <td>585 sf Luxury Studio</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$1,032</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Flint</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>102599 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    NAME        room type   \n",
+       "0                     Clean & quiet apt home by the park     Private room  \\\n",
+       "1                                  Skylit Midtown Castle  Entire home/apt   \n",
+       "2                    THE VILLAGE OF HARLEM....NEW YORK !     Private room   \n",
+       "3                                                    NaN  Entire home/apt   \n",
+       "4       Entire Apt: Spacious Studio/Loft by central park  Entire home/apt   \n",
+       "...                                                  ...              ...   \n",
+       "102594                        Spare room in Williamsburg     Private room   \n",
+       "102595                     Best Location near Columbia U     Private room   \n",
+       "102596                    Comfy, bright room in Brooklyn     Private room   \n",
+       "102597                  Big Studio-One Stop from Midtown  Entire home/apt   \n",
+       "102598                              585 sf Luxury Studio  Entire home/apt   \n",
+       "\n",
+       "          price  minimum nights  review rate number   \n",
+       "0         $966             10.0                 4.0  \\\n",
+       "1         $142             30.0                 4.0   \n",
+       "2         $620              3.0                 5.0   \n",
+       "3         $368             30.0                 4.0   \n",
+       "4         $204             10.0                 3.0   \n",
+       "...         ...             ...                 ...   \n",
+       "102594    $844              1.0                 3.0   \n",
+       "102595    $837              1.0                 2.0   \n",
+       "102596    $988              3.0                 5.0   \n",
+       "102597    $546              2.0                 3.0   \n",
+       "102598  $1,032              1.0                 3.0   \n",
+       "\n",
+       "                                              house_rules  maximum occupancy   \n",
+       "0       Clean up and treat the home the way you'd like...                  1  \\\n",
+       "1       Pet friendly but please confirm with me if the...                  2   \n",
+       "2       I encourage you to use my kitchen, cooking and...                  2   \n",
+       "3                                                     NaN                  2   \n",
+       "4       Please no smoking in the house, porch or on th...                  3   \n",
+       "...                                                   ...                ...   \n",
+       "102594  No Smoking No Parties or Events of any kind Pl...                  1   \n",
+       "102595  House rules: Guests agree to the following ter...                  2   \n",
+       "102596                                                NaN                  2   \n",
+       "102597                                                NaN                  4   \n",
+       "102598                                                NaN                  7   \n",
+       "\n",
+       "                city  \n",
+       "0         Des Moines  \n",
+       "1         Wilmington  \n",
+       "2         St. George  \n",
+       "3          Kalamazoo  \n",
+       "4           Cheyenne  \n",
+       "...              ...  \n",
+       "102594  White Plains  \n",
+       "102595       Mosinee  \n",
+       "102596      Amarillo  \n",
+       "102597    Binghamton  \n",
+       "102598         Flint  \n",
+       "\n",
+       "[102599 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "0ec56283",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "5dc27048",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>NAME</th>\n",
+       "      <th>room type</th>\n",
+       "      <th>price</th>\n",
+       "      <th>minimum nights</th>\n",
+       "      <th>review rate number</th>\n",
+       "      <th>house_rules</th>\n",
+       "      <th>maximum occupancy</th>\n",
+       "      <th>city</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Clean &amp; quiet apt home by the park</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$966</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Clean up and treat the home the way you'd like...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Des Moines</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Skylit Midtown Castle</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$142</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Pet friendly but please confirm with me if the...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Wilmington</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$620</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>I encourage you to use my kitchen, cooking and...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>St. George</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$368</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Kalamazoo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$204</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>Please no smoking in the house, porch or on th...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Cheyenne</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102594</th>\n",
+       "      <td>102594</td>\n",
+       "      <td>Spare room in Williamsburg</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$844</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>No Smoking No Parties or Events of any kind Pl...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>White Plains</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102595</th>\n",
+       "      <td>102595</td>\n",
+       "      <td>Best Location near Columbia U</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$837</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>House rules: Guests agree to the following ter...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Mosinee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102596</th>\n",
+       "      <td>102596</td>\n",
+       "      <td>Comfy, bright room in Brooklyn</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>$988</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Amarillo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102597</th>\n",
+       "      <td>102597</td>\n",
+       "      <td>Big Studio-One Stop from Midtown</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$546</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Binghamton</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>102598</th>\n",
+       "      <td>102598</td>\n",
+       "      <td>585 sf Luxury Studio</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>$1,032</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Flint</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>102599 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        Unnamed: 0                                              NAME   \n",
+       "0                0                Clean & quiet apt home by the park  \\\n",
+       "1                1                             Skylit Midtown Castle   \n",
+       "2                2               THE VILLAGE OF HARLEM....NEW YORK !   \n",
+       "3                3                                               NaN   \n",
+       "4                4  Entire Apt: Spacious Studio/Loft by central park   \n",
+       "...            ...                                               ...   \n",
+       "102594      102594                        Spare room in Williamsburg   \n",
+       "102595      102595                     Best Location near Columbia U   \n",
+       "102596      102596                    Comfy, bright room in Brooklyn   \n",
+       "102597      102597                  Big Studio-One Stop from Midtown   \n",
+       "102598      102598                              585 sf Luxury Studio   \n",
+       "\n",
+       "              room type    price  minimum nights  review rate number   \n",
+       "0          Private room    $966             10.0                 4.0  \\\n",
+       "1       Entire home/apt    $142             30.0                 4.0   \n",
+       "2          Private room    $620              3.0                 5.0   \n",
+       "3       Entire home/apt    $368             30.0                 4.0   \n",
+       "4       Entire home/apt    $204             10.0                 3.0   \n",
+       "...                 ...      ...             ...                 ...   \n",
+       "102594     Private room    $844              1.0                 3.0   \n",
+       "102595     Private room    $837              1.0                 2.0   \n",
+       "102596     Private room    $988              3.0                 5.0   \n",
+       "102597  Entire home/apt    $546              2.0                 3.0   \n",
+       "102598  Entire home/apt  $1,032              1.0                 3.0   \n",
+       "\n",
+       "                                              house_rules  maximum occupancy   \n",
+       "0       Clean up and treat the home the way you'd like...                  1  \\\n",
+       "1       Pet friendly but please confirm with me if the...                  2   \n",
+       "2       I encourage you to use my kitchen, cooking and...                  2   \n",
+       "3                                                     NaN                  2   \n",
+       "4       Please no smoking in the house, porch or on th...                  3   \n",
+       "...                                                   ...                ...   \n",
+       "102594  No Smoking No Parties or Events of any kind Pl...                  1   \n",
+       "102595  House rules: Guests agree to the following ter...                  2   \n",
+       "102596                                                NaN                  2   \n",
+       "102597                                                NaN                  4   \n",
+       "102598                                                NaN                  7   \n",
+       "\n",
+       "                city  \n",
+       "0         Des Moines  \n",
+       "1         Wilmington  \n",
+       "2         St. George  \n",
+       "3          Kalamazoo  \n",
+       "4           Cheyenne  \n",
+       "...              ...  \n",
+       "102594  White Plains  \n",
+       "102595       Mosinee  \n",
+       "102596      Amarillo  \n",
+       "102597    Binghamton  \n",
+       "102598         Flint  \n",
+       "\n",
+       "[102599 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "bebb9c93",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filtered_data = data[data.iloc[:, -3].notna()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "bd010fc9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dict_representation = filtered_data.to_dict(orient='split')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "e84db5c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "50468"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(dict_representation['data'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "31eaadf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_df = filtered_data.sample(frac=0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "33998ec6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "25396015",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5047"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(sample_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "17d054b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "64db8d6c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dict = data.to_dict(orient = 'split')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "b32b2f0c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 Unnamed: 0.1\n",
+      "1 Unnamed: 0\n",
+      "2 NAME\n",
+      "3 room type\n",
+      "4 price\n",
+      "5 minimum nights\n",
+      "6 review rate number\n",
+      "7 house_rules\n",
+      "8 maximum occupancy\n",
+      "9 city\n"
+     ]
+    }
+   ],
+   "source": [
+    "for idx, unit in enumerate(data_dict['columns']):\n",
+    "    print(idx,unit)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fe415c1c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0,\n",
+       " 'Beautiful room upper manhttn.',\n",
+       " 'Private room',\n",
+       " 131.0,\n",
+       " 1.0,\n",
+       " 2.0,\n",
+       " 'No smoking. No pets. ',\n",
+       " 1,\n",
+       " 'Christiansted']"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_dict['data'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "38cb5c5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "new_data = []\n",
+    "for idx, unit in enumerate(data_dict['data']):\n",
+    "    tmp_dict = {k:j for k,j in zip(['NAME','room type', 'price','minimum nights','review rate number','house_rules','maximum occupancy','city'],unit[1:])}\n",
+    "    if type(unit[4]) == str:\n",
+    "        tmp_dict[\"price\"] = eval(unit[4].replace(\"$\",\"\").replace(\",\",\"\"))\n",
+    "    house_rules_number = random.choice([0,1,1,1,2,2,3])\n",
+    "    tmp_dict['house_rules'] = \" & \".join(x for x in random.sample([\"No parties\",\"No smoking\",\"No children under 10\",\"No pets\",\"No visitors\"],house_rules_number))\n",
+    "    tmp_dict['city'] = tmp_dict['city'].split('/')[0]\n",
+    "    new_data.append(tmp_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "ae3d551e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'NAME': 'BIG room with bath & balcony in BK!',\n",
+       " 'room type': 'Private room',\n",
+       " 'price': 1123.0,\n",
+       " 'minimum nights': 1.0,\n",
+       " 'review rate number': 4.0,\n",
+       " 'house_rules': 'No parties',\n",
+       " 'maximum occupancy': 2,\n",
+       " 'city': 'Louisville'}"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_data[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "6fac856c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "----------\n",
+      "No pets & No visitors & No smoking\n",
+      "----------\n",
+      "No parties & No visitors\n",
+      "----------\n",
+      "No children under 10 & No pets & No smoking\n",
+      "----------\n",
+      "No parties & No pets & No visitors\n",
+      "----------\n",
+      "No pets & No children under 10\n",
+      "----------\n",
+      "No children under 10 & No parties & No pets\n",
+      "----------\n",
+      "No visitors\n",
+      "----------\n",
+      "No parties & No children under 10\n",
+      "----------\n",
+      "No children under 10 & No smoking & No visitors\n",
+      "----------\n",
+      "No children under 10 & No parties & No smoking\n",
+      "----------\n",
+      "No pets & No smoking & No children under 10\n",
+      "----------\n",
+      "No pets & No visitors\n",
+      "----------\n",
+      "No visitors & No pets\n",
+      "----------\n",
+      "No children under 10 & No smoking & No pets\n",
+      "----------\n",
+      "No smoking & No parties & No pets\n",
+      "----------\n",
+      "No visitors & No children under 10 & No parties\n",
+      "----------\n",
+      "No parties & No children under 10 & No smoking\n",
+      "----------\n",
+      "No visitors & No children under 10 & No smoking\n",
+      "----------\n",
+      "No pets & No parties\n",
+      "----------\n",
+      "No smoking & No parties\n",
+      "----------\n",
+      "No smoking & No children under 10\n",
+      "----------\n",
+      "No parties & No children under 10 & No visitors\n",
+      "----------\n",
+      "No children under 10 & No smoking\n",
+      "----------\n",
+      "No visitors & No pets & No smoking\n",
+      "----------\n",
+      "No pets\n",
+      "----------\n",
+      "No children under 10 & No pets\n",
+      "----------\n",
+      "No visitors & No smoking\n",
+      "----------\n",
+      "No smoking\n",
+      "----------\n",
+      "No parties & No smoking & No children under 10\n",
+      "----------\n",
+      "No parties & No smoking\n",
+      "----------\n",
+      "No smoking & No visitors & No parties\n",
+      "----------\n",
+      "No pets & No smoking\n",
+      "----------\n",
+      "No pets & No smoking & No parties\n",
+      "----------\n",
+      "No smoking & No children under 10 & No visitors\n",
+      "----------\n",
+      "No parties & No smoking & No visitors\n",
+      "----------\n",
+      "No visitors & No parties\n",
+      "----------\n",
+      "No visitors & No children under 10\n",
+      "----------\n",
+      "No parties & No smoking & No pets\n",
+      "----------\n",
+      "No children under 10 & No pets & No visitors\n",
+      "----------\n",
+      "No smoking & No pets & No parties\n",
+      "----------\n",
+      "No children under 10 & No smoking & No parties\n",
+      "----------\n",
+      "No visitors & No children under 10 & No pets\n",
+      "----------\n",
+      "No children under 10 & No parties\n",
+      "----------\n",
+      "No pets & No parties & No visitors\n",
+      "----------\n",
+      "No children under 10 & No visitors & No parties\n",
+      "----------\n",
+      "No parties & No pets\n",
+      "----------\n",
+      "No visitors & No parties & No pets\n",
+      "----------\n",
+      "No smoking & No pets & No visitors\n",
+      "----------\n",
+      "No smoking & No pets\n",
+      "----------\n",
+      "No visitors & No smoking & No children under 10\n",
+      "----------\n",
+      "No pets & No children under 10 & No parties\n",
+      "----------\n",
+      "No visitors & No pets & No children under 10\n",
+      "----------\n",
+      "No pets & No children under 10 & No smoking\n",
+      "----------\n",
+      "No parties & No visitors & No children under 10\n",
+      "----------\n",
+      "No pets & No smoking & No visitors\n",
+      "----------\n",
+      "No pets & No parties & No smoking\n",
+      "----------\n",
+      "No parties & No visitors & No smoking\n",
+      "----------\n",
+      "No pets & No visitors & No children under 10\n",
+      "----------\n",
+      "No parties & No visitors & No pets\n",
+      "----------\n",
+      "No children under 10\n",
+      "----------\n",
+      "No children under 10 & No pets & No parties\n",
+      "----------\n",
+      "No children under 10 & No visitors & No smoking\n",
+      "----------\n",
+      "No smoking & No children under 10 & No parties\n",
+      "----------\n",
+      "No pets & No parties & No children under 10\n",
+      "----------\n",
+      "No children under 10 & No visitors & No pets\n",
+      "----------\n",
+      "No parties & No pets & No smoking\n",
+      "----------\n",
+      "No pets & No children under 10 & No visitors\n",
+      "----------\n",
+      "No parties & No children under 10 & No pets\n",
+      "----------\n",
+      "No parties & No pets & No children under 10\n",
+      "----------\n",
+      "No smoking & No parties & No visitors\n",
+      "----------\n",
+      "No parties\n",
+      "----------\n",
+      "No visitors & No pets & No parties\n",
+      "----------\n",
+      "No children under 10 & No visitors\n",
+      "----------\n",
+      "No smoking & No children under 10 & No pets\n",
+      "----------\n",
+      "No smoking & No parties & No children under 10\n",
+      "----------\n",
+      "No visitors & No smoking & No parties\n",
+      "----------\n",
+      "No pets & No visitors & No parties\n",
+      "----------\n",
+      "No smoking & No visitors\n",
+      "----------\n",
+      "No smoking & No visitors & No children under 10\n",
+      "----------\n",
+      "No visitors & No smoking & No pets\n",
+      "----------\n",
+      "No smoking & No visitors & No pets\n",
+      "----------\n",
+      "No visitors & No parties & No smoking\n",
+      "----------\n",
+      "No smoking & No pets & No children under 10\n",
+      "----------\n",
+      "No children under 10 & No parties & No visitors\n",
+      "----------\n",
+      "No visitors & No parties & No children under 10\n",
+      "----------\n"
+     ]
+    }
+   ],
+   "source": [
+    "maximum_occupancy_set = set()\n",
+    "for unit in new_data:\n",
+    "    maximum_occupancy_set.add(unit['house_rules'])\n",
+    "for unit in maximum_occupancy_set:\n",
+    "    print(unit)\n",
+    "    print(\"----------\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "8056052a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>NAME</th>\n",
+       "      <th>room type</th>\n",
+       "      <th>price</th>\n",
+       "      <th>minimum nights</th>\n",
+       "      <th>review rate number</th>\n",
+       "      <th>house_rules</th>\n",
+       "      <th>maximum occupancy</th>\n",
+       "      <th>city</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Beautiful room upper manhttn.</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>131.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>No smoking</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Christiansted</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Roomy and Comftable Room</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>548.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>No children under 10 &amp; No parties</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Laredo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>BIG room with bath &amp; balcony in BK!</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>1123.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>No parties</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Louisville</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4A-</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>225.0</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>No pets</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Greensboro</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Nice and Comfortable Private Room</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>761.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>No smoking &amp; No parties</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Cape Girardeau</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5042</th>\n",
+       "      <td>Amazing LOFT in Prime Williamsburg</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>No pets</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Trenton</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5043</th>\n",
+       "      <td>Private Queen Bedroom in Brooklyn</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>1032.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>No pets</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Des Moines</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5044</th>\n",
+       "      <td>Bushwick / Bed Sty Retreat</td>\n",
+       "      <td>Private room</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>No children under 10 &amp; No visitors &amp; No smoking</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Scottsbluff</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5045</th>\n",
+       "      <td>Charming Mid-Century Studio</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>1115.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>No pets &amp; No children under 10</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Butte</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5046</th>\n",
+       "      <td>3 Bed/ 2 Bath Full Apt. BK Heights</td>\n",
+       "      <td>Entire home/apt</td>\n",
+       "      <td>396.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>No smoking</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Norfolk</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5047 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                     NAME        room type   price   \n",
+       "0           Beautiful room upper manhttn.     Private room   131.0  \\\n",
+       "1                Roomy and Comftable Room     Private room   548.0   \n",
+       "2     BIG room with bath & balcony in BK!     Private room  1123.0   \n",
+       "3                                     4A-  Entire home/apt   225.0   \n",
+       "4       Nice and Comfortable Private Room     Private room   761.0   \n",
+       "...                                   ...              ...     ...   \n",
+       "5042   Amazing LOFT in Prime Williamsburg     Private room   249.0   \n",
+       "5043    Private Queen Bedroom in Brooklyn     Private room  1032.0   \n",
+       "5044           Bushwick / Bed Sty Retreat     Private room   546.0   \n",
+       "5045          Charming Mid-Century Studio  Entire home/apt  1115.0   \n",
+       "5046   3 Bed/ 2 Bath Full Apt. BK Heights  Entire home/apt   396.0   \n",
+       "\n",
+       "      minimum nights  review rate number   \n",
+       "0                1.0                 2.0  \\\n",
+       "1               10.0                 5.0   \n",
+       "2                1.0                 4.0   \n",
+       "3               30.0                 4.0   \n",
+       "4                2.0                 1.0   \n",
+       "...              ...                 ...   \n",
+       "5042             5.0                 5.0   \n",
+       "5043             1.0                 1.0   \n",
+       "5044             2.0                 4.0   \n",
+       "5045             2.0                 5.0   \n",
+       "5046             2.0                 1.0   \n",
+       "\n",
+       "                                          house_rules  maximum occupancy   \n",
+       "0                                          No smoking                  1  \\\n",
+       "1                   No children under 10 & No parties                  2   \n",
+       "2                                          No parties                  2   \n",
+       "3                                             No pets                  3   \n",
+       "4                             No smoking & No parties                  2   \n",
+       "...                                               ...                ...   \n",
+       "5042                                          No pets                  2   \n",
+       "5043                                          No pets                  1   \n",
+       "5044  No children under 10 & No visitors & No smoking                  2   \n",
+       "5045                   No pets & No children under 10                  7   \n",
+       "5046                                       No smoking                  3   \n",
+       "\n",
+       "                city  \n",
+       "0      Christiansted  \n",
+       "1             Laredo  \n",
+       "2         Louisville  \n",
+       "3         Greensboro  \n",
+       "4     Cape Girardeau  \n",
+       "...              ...  \n",
+       "5042         Trenton  \n",
+       "5043      Des Moines  \n",
+       "5044     Scottsbluff  \n",
+       "5045           Butte  \n",
+       "5046         Norfolk  \n",
+       "\n",
+       "[5047 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "54423e0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(new_data)\n",
+    "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5767aa80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.rename(columns={'old_name1': 'new_name1', 'old_name2': 'new_name2'}, inplace=True)\n",
+    "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

tools/accommodations/test.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from tools.accommodations.apis import Hotels
+import pandas as pd
+# 设置显示所有列
+pd.set_option('display.max_columns', 100)
+# 设置显示所有行
+pd.set_option('display.max_rows', 100)
+hotel = Hotels('/home/xj/toolAugEnv/code/toolConstraint/database/hotels/clean_hotels_2022.csv')
+data = hotel.run('New York')
+print(data)

tools/attractions/__pycache__/apis.cpython-39.pyc ADDED Viewed

Binary file (1.55 kB). View file

tools/attractions/apis.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import pandas as pd
+from pandas import DataFrame
+from typing import Optional
+from annotation.src.utils import extract_before_parenthesis
+class Attractions:
+    def __init__(self, path="../database/attractions/attractions.csv"):
+        self.path = path
+        self.data = pd.read_csv(self.path).dropna()[['Name','Latitude','Longitude','Address','Phone','Website',"City"]]
+        print("Attractions loaded.")
+    def load_db(self):
+        self.data = pd.read_csv(self.path)
+    def run(self,
+            city: str,
+            ) -> DataFrame:
+        """Search for Accommodations by city and date."""
+        results = self.data[self.data["City"] == city]
+        # the results should show the index
+        results = results.reset_index(drop=True)
+        if len(results) == 0:
+            return "There is no attraction in this city."
+        return results
+    def run_for_annotation(self,
+            city: str,
+            ) -> DataFrame:
+        """Search for Accommodations by city and date."""
+        results = self.data[self.data["City"] == extract_before_parenthesis(city)]
+        # the results should show the index
+        results = results.reset_index(drop=True)
+        return results

tools/attractions/test.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from tools.attractions.apis import Attractions
+import pandas as pd
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+unique_cities = Attractions(path="../../database/attractions/attractions.csv").data['City'].unique()
+df = Attractions(path="../../database/attractions/attractions.csv").data
+print(len(df))
+citySet = open('../../database/background/citySet.txt','r').read().split('\n')
+cnt = 0
+for city in unique_cities:
+    if city not in citySet:
+        df = df[df['City'] != city]
+print(len(df))
+df.to_csv('../../database/attractions/attractions2.csv', index=False)

tools/cities/__pycache__/apis.cpython-39.pyc ADDED Viewed

Binary file (1.1 kB). View file

tools/cities/apis.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pandas import DataFrame
+class Cities:
+    def __init__(self ,path="../database/background/citySet_with_states.txt") -> None:
+        self.path = path
+        self.load_data()
+        print("Cities loaded.")
+    def load_data(self):
+        cityStateMapping = open(self.path, "r").read().strip().split("\n")
+        self.data = {}
+        for unit in cityStateMapping:
+            city, state = unit.split("\t")
+            if state not in self.data:
+                self.data[state] = [city]
+            else:
+                self.data[state].append(city)
+    def run(self, state) -> dict:
+        if state not in self.data:
+            return ValueError("Invalid State")
+        else:
+            return self.data[state]

tools/cities/test.py ADDED Viewed

File without changes