TravelPlannerLeaderboard / utils /query_element_selection.py
hsaest's picture
Upload folder using huggingface_hub
6159f52 verified
raw
history blame
10.5 kB
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
os.chdir(os.path.dirname(os.path.abspath(__file__)))
import random
from utils.budget_estimation import budget_calc
import json
from datetime import datetime, timedelta
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
import numpy as np
google_distance = GoogleDistanceMatrix()
city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n')
state_city_map = {}
for city in city_set:
state = city.split('\t')[1]
if state not in state_city_map:
state_city_map[state] = [city.split('\t')[0]]
else:
state_city_map[state].append(city.split('\t')[0])
visiting_city_map = {3:1,5:2,7:3}
def round_to_hundreds(num):
return round(num / 100) * 100
def select_consecutive_dates(num_days, start_date=datetime(2022, 3, 1), end_date=datetime(2022, 4, 1)):
"""
Selects consecutive dates within the given range.
"""
# Generate a list of all possible dates within the range
delta = end_date - start_date
all_dates = [start_date + timedelta(days=i) for i in range(delta.days)]
# Get the latest possible starting date for the consecutive days
latest_start = len(all_dates) - num_days
# Randomly select a starting point
start_index = random.randint(0, latest_start)
# Extract the consecutive dates
consecutive_dates = all_dates[start_index:start_index+num_days]
return consecutive_dates
def get_org_dest(days:int):
if days == 3:
city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n')
org = random.choice(city_set)
while True:
dest = random.choice(city_set)
if dest.split('\t')[1] != org.split('\t')[1]:
break
final_org = org.split('\t')[0]
final_des = dest.split('\t')[0]
elif days in [5,7]:
state_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n')
org = random.choice(state_set)
while True:
dest = random.choice(state_set)
if dest != org and "None" not in dest and dest.split('\t')[1] != org.split('\t')[1] and len(state_city_map[dest.split('\t')[1]]) > 3:
break
final_org = org.split('\t')[0]
final_des = dest.split('\t')[1]
return final_org, final_des
def easy_level_element_selection(day_list):
"""Selects the element to be used in the easy level query."""
days = random.choice(day_list)
query_dict = None
date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)]
final_org, final_des = get_org_dest(days)
budget = budget_calc(final_org, final_des, date=date, days=days )
local_constraint_list = ["house rule", "cuisine","room type",'transportation']
local_constrain_record = {key:None for key in local_constraint_list}
if days == 3:
final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2)
elif days == 5:
final_budget = round_to_hundreds(budget["average"])
elif days == 7:
final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2))
query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days] ,"date":date, "people_number": 1, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"easy"}
return query_dict
def middle_level_element_selection(day_list):
days = random.choice(day_list)
date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)]
people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]]))
local_constraint_list = ["house rule", "cuisine","room type"]
local_constrain_record = {key:None for key in local_constraint_list}
local_constrain_record['transportation'] = None
final_org, final_des = get_org_dest(days)
local_constraint_type = random.choice(local_constraint_list)
if local_constraint_type == "flight time":
local_constraint = random.choice(["morning", "afternoon", "evening"])
local_constrain_record["flight time"] = local_constraint
# elif local_constraint_type == "rating":
# local_constraint = random.choice([3, 3,5,4,4.5])
# local_constrain_record["rating"] = local_constraint
elif local_constraint_type == "room type":
if people_number <= 2:
local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"])
else:
local_constraint = random.choice(["private room", "entire room"])
local_constrain_record["room type"] = local_constraint
elif local_constraint_type == "house rule":
local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"])
local_constrain_record["house rule"] = local_constraint
elif local_constraint_type == "cuisine":
# choice_number = random.choice([2,3,4,5])
local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 2)
local_constrain_record["cuisine"] = local_constraint
budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number)
if days == 3:
final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.75)
elif days == 5:
final_budget = round_to_hundreds(budget["average"] * people_number * 0.75)
elif days == 7:
final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.75)
query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"middle"}
return query_dict
def hard_level_element_selection(day_list):
days = random.choice(day_list)
date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)]
people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]]))
# local_constraint_list = ["flight time", "house rule", "cuisine","room type", "transportation"]
local_constraint_list = ["house rule", "cuisine","room type","transportation"]
probabilities = [0.3, 0.1, 0.3, 0.3]
final_org, final_des = get_org_dest(days)
# result = google_distance.run(final_org, final_des)
# if result != {} and 'day' not in result["duration"]:
# local_constraint_list.append()
local_constrain_record = {key:None for key in local_constraint_list}
local_constraint_type_list = np.random.choice(local_constraint_list, size=3, replace=False, p=probabilities).tolist()
for local_constraint_type in local_constraint_type_list:
if local_constraint_type == "flight time":
local_constraint = random.choice(["morning", "afternoon", "evening"])
local_constrain_record["flight time"] = local_constraint
elif local_constraint_type == "transportation":
local_constraint = random.choice(["no flight", "no self-driving"])
local_constrain_record["transportation"] = local_constraint
elif local_constraint_type == "room type":
if people_number <= 2:
local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"])
else:
local_constraint = random.choice(["private room", "entire room"])
local_constrain_record["room type"] = local_constraint
elif local_constraint_type == "house rule":
local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"])
local_constrain_record["house rule"] = local_constraint
elif local_constraint_type == "cuisine":
# choice_number = random.choice([2,3,4,5])
local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 4)
local_constrain_record["cuisine"] = local_constraint
budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number,local_constraint=local_constrain_record)
if days == 3:
final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.5)
elif days == 5:
final_budget = round_to_hundreds(budget["average"] * people_number * 0.5)
elif days == 7:
final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.5)
query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget, "query": None,"level":"hard"}
return query_dict
def generate_elements(number:int, level="easy", day_list=[3,5,7]):
"""Generate the elements for the easy level query."""
query_list = []
while len(query_list) < number:
print(len(query_list))
try:
if level == "easy":
query = easy_level_element_selection(day_list)
if query not in query_list:
query_list.append(query)
elif level == "middle":
query = middle_level_element_selection(day_list)
if query not in query_list:
query_list.append(query)
elif level == "hard":
query = hard_level_element_selection(day_list)
if query not in query_list:
query_list.append(query)
except ValueError:
continue
return query_list
def main():
"""Generate the elements for the different level query."""
# save query_list as jsonl file
for num, day_list in zip([160,160,160], [[3],[5],[7]]):
query_list = generate_elements(num,"middle",day_list=day_list)
with open('../data/query/final_annotation_middle.jsonl', 'a+') as f:
for query in query_list:
# print(query)
json.dump(query, f)
f.write('\n')
f.close()
if __name__ == "__main__":
main()