|
import sys |
|
import os |
|
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) |
|
os.chdir(os.path.dirname(os.path.abspath(__file__))) |
|
import random |
|
from utils.budget_estimation import budget_calc |
|
import json |
|
from datetime import datetime, timedelta |
|
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix |
|
import numpy as np |
|
|
|
google_distance = GoogleDistanceMatrix() |
|
|
|
city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n') |
|
|
|
state_city_map = {} |
|
|
|
for city in city_set: |
|
state = city.split('\t')[1] |
|
if state not in state_city_map: |
|
state_city_map[state] = [city.split('\t')[0]] |
|
else: |
|
state_city_map[state].append(city.split('\t')[0]) |
|
|
|
visiting_city_map = {3:1,5:2,7:3} |
|
|
|
def round_to_hundreds(num): |
|
return round(num / 100) * 100 |
|
|
|
def select_consecutive_dates(num_days, start_date=datetime(2022, 3, 1), end_date=datetime(2022, 4, 1)): |
|
""" |
|
Selects consecutive dates within the given range. |
|
""" |
|
|
|
delta = end_date - start_date |
|
all_dates = [start_date + timedelta(days=i) for i in range(delta.days)] |
|
|
|
|
|
latest_start = len(all_dates) - num_days |
|
|
|
|
|
start_index = random.randint(0, latest_start) |
|
|
|
|
|
consecutive_dates = all_dates[start_index:start_index+num_days] |
|
|
|
return consecutive_dates |
|
|
|
|
|
def get_org_dest(days:int): |
|
if days == 3: |
|
city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n') |
|
|
|
org = random.choice(city_set) |
|
|
|
while True: |
|
dest = random.choice(city_set) |
|
if dest.split('\t')[1] != org.split('\t')[1]: |
|
break |
|
|
|
final_org = org.split('\t')[0] |
|
final_des = dest.split('\t')[0] |
|
|
|
elif days in [5,7]: |
|
|
|
state_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n') |
|
org = random.choice(state_set) |
|
|
|
while True: |
|
dest = random.choice(state_set) |
|
if dest != org and "None" not in dest and dest.split('\t')[1] != org.split('\t')[1] and len(state_city_map[dest.split('\t')[1]]) > 3: |
|
break |
|
final_org = org.split('\t')[0] |
|
final_des = dest.split('\t')[1] |
|
|
|
return final_org, final_des |
|
|
|
|
|
def easy_level_element_selection(day_list): |
|
"""Selects the element to be used in the easy level query.""" |
|
days = random.choice(day_list) |
|
query_dict = None |
|
date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)] |
|
final_org, final_des = get_org_dest(days) |
|
budget = budget_calc(final_org, final_des, date=date, days=days ) |
|
local_constraint_list = ["house rule", "cuisine","room type",'transportation'] |
|
local_constrain_record = {key:None for key in local_constraint_list} |
|
if days == 3: |
|
final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2) |
|
elif days == 5: |
|
final_budget = round_to_hundreds(budget["average"]) |
|
elif days == 7: |
|
final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2)) |
|
|
|
query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days] ,"date":date, "people_number": 1, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"easy"} |
|
return query_dict |
|
|
|
|
|
|
|
def middle_level_element_selection(day_list): |
|
days = random.choice(day_list) |
|
date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)] |
|
people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]])) |
|
local_constraint_list = ["house rule", "cuisine","room type"] |
|
local_constrain_record = {key:None for key in local_constraint_list} |
|
local_constrain_record['transportation'] = None |
|
final_org, final_des = get_org_dest(days) |
|
|
|
local_constraint_type = random.choice(local_constraint_list) |
|
|
|
if local_constraint_type == "flight time": |
|
local_constraint = random.choice(["morning", "afternoon", "evening"]) |
|
local_constrain_record["flight time"] = local_constraint |
|
|
|
|
|
|
|
|
|
|
|
elif local_constraint_type == "room type": |
|
if people_number <= 2: |
|
local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"]) |
|
else: |
|
local_constraint = random.choice(["private room", "entire room"]) |
|
local_constrain_record["room type"] = local_constraint |
|
|
|
elif local_constraint_type == "house rule": |
|
local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"]) |
|
local_constrain_record["house rule"] = local_constraint |
|
|
|
elif local_constraint_type == "cuisine": |
|
|
|
local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 2) |
|
local_constrain_record["cuisine"] = local_constraint |
|
|
|
budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number) |
|
|
|
if days == 3: |
|
final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.75) |
|
elif days == 5: |
|
final_budget = round_to_hundreds(budget["average"] * people_number * 0.75) |
|
elif days == 7: |
|
final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.75) |
|
|
|
query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"middle"} |
|
return query_dict |
|
|
|
|
|
|
|
def hard_level_element_selection(day_list): |
|
days = random.choice(day_list) |
|
date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)] |
|
people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]])) |
|
|
|
local_constraint_list = ["house rule", "cuisine","room type","transportation"] |
|
probabilities = [0.3, 0.1, 0.3, 0.3] |
|
final_org, final_des = get_org_dest(days) |
|
|
|
|
|
|
|
|
|
|
|
local_constrain_record = {key:None for key in local_constraint_list} |
|
|
|
local_constraint_type_list = np.random.choice(local_constraint_list, size=3, replace=False, p=probabilities).tolist() |
|
|
|
for local_constraint_type in local_constraint_type_list: |
|
if local_constraint_type == "flight time": |
|
local_constraint = random.choice(["morning", "afternoon", "evening"]) |
|
local_constrain_record["flight time"] = local_constraint |
|
|
|
elif local_constraint_type == "transportation": |
|
local_constraint = random.choice(["no flight", "no self-driving"]) |
|
local_constrain_record["transportation"] = local_constraint |
|
|
|
elif local_constraint_type == "room type": |
|
if people_number <= 2: |
|
local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"]) |
|
else: |
|
local_constraint = random.choice(["private room", "entire room"]) |
|
local_constrain_record["room type"] = local_constraint |
|
|
|
elif local_constraint_type == "house rule": |
|
local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"]) |
|
local_constrain_record["house rule"] = local_constraint |
|
|
|
elif local_constraint_type == "cuisine": |
|
|
|
local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 4) |
|
local_constrain_record["cuisine"] = local_constraint |
|
|
|
budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number,local_constraint=local_constrain_record) |
|
|
|
if days == 3: |
|
final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.5) |
|
elif days == 5: |
|
final_budget = round_to_hundreds(budget["average"] * people_number * 0.5) |
|
elif days == 7: |
|
final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.5) |
|
|
|
query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget, "query": None,"level":"hard"} |
|
|
|
return query_dict |
|
|
|
|
|
def generate_elements(number:int, level="easy", day_list=[3,5,7]): |
|
"""Generate the elements for the easy level query.""" |
|
query_list = [] |
|
while len(query_list) < number: |
|
print(len(query_list)) |
|
try: |
|
if level == "easy": |
|
query = easy_level_element_selection(day_list) |
|
if query not in query_list: |
|
query_list.append(query) |
|
elif level == "middle": |
|
query = middle_level_element_selection(day_list) |
|
if query not in query_list: |
|
query_list.append(query) |
|
elif level == "hard": |
|
query = hard_level_element_selection(day_list) |
|
if query not in query_list: |
|
query_list.append(query) |
|
except ValueError: |
|
continue |
|
return query_list |
|
|
|
def main(): |
|
"""Generate the elements for the different level query.""" |
|
|
|
|
|
for num, day_list in zip([160,160,160], [[3],[5],[7]]): |
|
query_list = generate_elements(num,"middle",day_list=day_list) |
|
|
|
with open('../data/query/final_annotation_middle.jsonl', 'a+') as f: |
|
for query in query_list: |
|
|
|
json.dump(query, f) |
|
f.write('\n') |
|
f.close() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |