import sys import os sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) os.chdir(os.path.dirname(os.path.abspath(__file__))) import random from utils.budget_estimation import budget_calc import json from datetime import datetime, timedelta from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix import numpy as np google_distance = GoogleDistanceMatrix() city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n') state_city_map = {} for city in city_set: state = city.split('\t')[1] if state not in state_city_map: state_city_map[state] = [city.split('\t')[0]] else: state_city_map[state].append(city.split('\t')[0]) visiting_city_map = {3:1,5:2,7:3} def round_to_hundreds(num): return round(num / 100) * 100 def select_consecutive_dates(num_days, start_date=datetime(2022, 3, 1), end_date=datetime(2022, 4, 1)): """ Selects consecutive dates within the given range. """ # Generate a list of all possible dates within the range delta = end_date - start_date all_dates = [start_date + timedelta(days=i) for i in range(delta.days)] # Get the latest possible starting date for the consecutive days latest_start = len(all_dates) - num_days # Randomly select a starting point start_index = random.randint(0, latest_start) # Extract the consecutive dates consecutive_dates = all_dates[start_index:start_index+num_days] return consecutive_dates def get_org_dest(days:int): if days == 3: city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n') org = random.choice(city_set) while True: dest = random.choice(city_set) if dest.split('\t')[1] != org.split('\t')[1]: break final_org = org.split('\t')[0] final_des = dest.split('\t')[0] elif days in [5,7]: state_set = open('/home/user/app/database/background/citySet_with_states.txt').read().strip().split('\n') org = random.choice(state_set) while True: dest = random.choice(state_set) if dest != org and "None" not in dest and dest.split('\t')[1] != org.split('\t')[1] and len(state_city_map[dest.split('\t')[1]]) > 3: break final_org = org.split('\t')[0] final_des = dest.split('\t')[1] return final_org, final_des def easy_level_element_selection(day_list): """Selects the element to be used in the easy level query.""" days = random.choice(day_list) query_dict = None date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)] final_org, final_des = get_org_dest(days) budget = budget_calc(final_org, final_des, date=date, days=days ) local_constraint_list = ["house rule", "cuisine","room type",'transportation'] local_constrain_record = {key:None for key in local_constraint_list} if days == 3: final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2) elif days == 5: final_budget = round_to_hundreds(budget["average"]) elif days == 7: final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2)) query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days] ,"date":date, "people_number": 1, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"easy"} return query_dict def middle_level_element_selection(day_list): days = random.choice(day_list) date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)] people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]])) local_constraint_list = ["house rule", "cuisine","room type"] local_constrain_record = {key:None for key in local_constraint_list} local_constrain_record['transportation'] = None final_org, final_des = get_org_dest(days) local_constraint_type = random.choice(local_constraint_list) if local_constraint_type == "flight time": local_constraint = random.choice(["morning", "afternoon", "evening"]) local_constrain_record["flight time"] = local_constraint # elif local_constraint_type == "rating": # local_constraint = random.choice([3, 3,5,4,4.5]) # local_constrain_record["rating"] = local_constraint elif local_constraint_type == "room type": if people_number <= 2: local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"]) else: local_constraint = random.choice(["private room", "entire room"]) local_constrain_record["room type"] = local_constraint elif local_constraint_type == "house rule": local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"]) local_constrain_record["house rule"] = local_constraint elif local_constraint_type == "cuisine": # choice_number = random.choice([2,3,4,5]) local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 2) local_constrain_record["cuisine"] = local_constraint budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number) if days == 3: final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.75) elif days == 5: final_budget = round_to_hundreds(budget["average"] * people_number * 0.75) elif days == 7: final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.75) query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"middle"} return query_dict def hard_level_element_selection(day_list): days = random.choice(day_list) date = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)] people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]])) # local_constraint_list = ["flight time", "house rule", "cuisine","room type", "transportation"] local_constraint_list = ["house rule", "cuisine","room type","transportation"] probabilities = [0.3, 0.1, 0.3, 0.3] final_org, final_des = get_org_dest(days) # result = google_distance.run(final_org, final_des) # if result != {} and 'day' not in result["duration"]: # local_constraint_list.append() local_constrain_record = {key:None for key in local_constraint_list} local_constraint_type_list = np.random.choice(local_constraint_list, size=3, replace=False, p=probabilities).tolist() for local_constraint_type in local_constraint_type_list: if local_constraint_type == "flight time": local_constraint = random.choice(["morning", "afternoon", "evening"]) local_constrain_record["flight time"] = local_constraint elif local_constraint_type == "transportation": local_constraint = random.choice(["no flight", "no self-driving"]) local_constrain_record["transportation"] = local_constraint elif local_constraint_type == "room type": if people_number <= 2: local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"]) else: local_constraint = random.choice(["private room", "entire room"]) local_constrain_record["room type"] = local_constraint elif local_constraint_type == "house rule": local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"]) local_constrain_record["house rule"] = local_constraint elif local_constraint_type == "cuisine": # choice_number = random.choice([2,3,4,5]) local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 4) local_constrain_record["cuisine"] = local_constraint budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number,local_constraint=local_constrain_record) if days == 3: final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.5) elif days == 5: final_budget = round_to_hundreds(budget["average"] * people_number * 0.5) elif days == 7: final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.5) query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget, "query": None,"level":"hard"} return query_dict def generate_elements(number:int, level="easy", day_list=[3,5,7]): """Generate the elements for the easy level query.""" query_list = [] while len(query_list) < number: print(len(query_list)) try: if level == "easy": query = easy_level_element_selection(day_list) if query not in query_list: query_list.append(query) elif level == "middle": query = middle_level_element_selection(day_list) if query not in query_list: query_list.append(query) elif level == "hard": query = hard_level_element_selection(day_list) if query not in query_list: query_list.append(query) except ValueError: continue return query_list def main(): """Generate the elements for the different level query.""" # save query_list as jsonl file for num, day_list in zip([160,160,160], [[3],[5],[7]]): query_list = generate_elements(num,"middle",day_list=day_list) with open('../data/query/final_annotation_middle.jsonl', 'a+') as f: for query in query_list: # print(query) json.dump(query, f) f.write('\n') f.close() if __name__ == "__main__": main()