|
from annotation.src.utils import get_valid_name_city,extract_before_parenthesis,extract_numbers_from_filenames |
|
from tools.flights.apis import Flights |
|
from tools.accommodations.apis import Accommodations |
|
from tools.restaurants.apis import Restaurants |
|
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix |
|
from tools.attractions.apis import Attractions |
|
import math |
|
import json |
|
import re |
|
import numpy as np |
|
import os |
|
import sys |
|
from tqdm import tqdm |
|
import argparse |
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) |
|
os.chdir(os.path.dirname(os.path.abspath(__file__))) |
|
|
|
|
|
flight = Flights() |
|
accommodation = Accommodations() |
|
restaurants = Restaurants() |
|
googleDistanceMatrix = GoogleDistanceMatrix() |
|
attractions = Attractions() |
|
|
|
|
|
def load_line_json_data(filename): |
|
data = [] |
|
with open(filename, 'r', encoding='utf-8') as f: |
|
for line in f.read().strip().split('\n'): |
|
unit = json.loads(line) |
|
data.append(unit) |
|
return data |
|
|
|
|
|
def convert_bool_values(item): |
|
if isinstance(item, dict): |
|
|
|
return {key: convert_bool_values(value) for key, value in item.items()} |
|
elif isinstance(item, list): |
|
|
|
return [convert_bool_values(value) for value in item] |
|
elif isinstance(item, tuple): |
|
|
|
return tuple(convert_bool_values(value) for value in item) |
|
elif isinstance(item, np.bool_): |
|
|
|
return bool(item) |
|
else: |
|
|
|
return item |
|
|
|
|
|
|
|
|
|
def extract_from_to(text: str): |
|
""" |
|
Extracts 'A' and 'B' from the format "from A to B" in the given text, with B ending at a comma or the end of the string. |
|
|
|
Args: |
|
- text (str): The input string. |
|
|
|
Returns: |
|
- tuple: A tuple containing 'A' and 'B'. If no match is found, returns (None, None). |
|
""" |
|
pattern = r"from\s+(.+?)\s+to\s+([^,]+)(?=[,\s]|$)" |
|
matches = re.search(pattern, text) |
|
return matches.groups() if matches else (None, None) |
|
|
|
|
|
def get_total_cost(question, tested_data): |
|
total_cost = 0 |
|
for i in range(min(question['days'],len(tested_data))): |
|
unit = tested_data[i] |
|
|
|
if unit['transportation'] and unit['transportation'] != '-': |
|
value = unit['transportation'] |
|
org_city, dest_city = extract_from_to(value) |
|
if org_city == None or dest_city == None: |
|
org_city, dest_city = extract_from_to(unit['current_city']) |
|
|
|
if org_city == None or dest_city == None: |
|
pass |
|
else: |
|
if 'flight number' in value.lower(): |
|
res = flight.data[flight.data['Flight Number'] == value.split('Flight Number: ')[1].split(',')[0]] |
|
if len(res) > 0: |
|
total_cost += res['Price'].values[0] * question['people_number'] |
|
|
|
elif 'self-driving' in value.lower() or 'taxi' in value.lower(): |
|
if 'self-driving' in value.lower(): |
|
|
|
cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'self-driving')['cost'] |
|
total_cost += cost * math.ceil(question['people_number'] * 1.0 / 5) |
|
else: |
|
cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'taxi')['cost'] |
|
total_cost += cost * math.ceil(question['people_number'] * 1.0 / 4) |
|
|
|
|
|
if unit['breakfast'] and unit['breakfast'] != '-': |
|
name, city = get_valid_name_city(unit['breakfast']) |
|
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] |
|
if len(res) > 0: |
|
total_cost += res['Average Cost'].values[0] * question['people_number'] |
|
|
|
|
|
|
|
if unit['lunch'] and unit['lunch'] != '-': |
|
name, city = get_valid_name_city(unit['lunch']) |
|
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] |
|
if len(res) > 0: |
|
total_cost += res['Average Cost'].values[0] * question['people_number'] |
|
|
|
|
|
if unit['dinner'] and unit['dinner'] != '-': |
|
name, city = get_valid_name_city(unit['dinner']) |
|
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] |
|
if len(res) > 0: |
|
total_cost += res['Average Cost'].values[0] * question['people_number'] |
|
|
|
|
|
if unit['accommodation'] and unit['accommodation'] != '-': |
|
name, city = get_valid_name_city(unit['accommodation']) |
|
res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)] |
|
if len(res) > 0: |
|
total_cost += res['price'].values[0] * math.ceil(question['people_number'] * 1.0 / res['maximum occupancy'].values[0]) |
|
|
|
return total_cost |
|
|
|
|
|
def is_valid_room_rule(question, tested_data): |
|
|
|
if question['local_constraint']['house rule'] is None: |
|
return None,None |
|
|
|
for i in range(min(question['days'],len(tested_data))): |
|
unit = tested_data[i] |
|
if unit['accommodation'] and unit['accommodation'] != '-': |
|
name, city = get_valid_name_city(unit['accommodation']) |
|
res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)] |
|
if len(res) > 0: |
|
if question['local_constraint']['house rule'] == 'smoking' and 'No smoking' in str(res['house_rules'].values[0]): |
|
return False, f"The house rule should be {question['local_constraint']['house rule']}." |
|
if question['local_constraint']['house rule'] == 'parities' and 'No parties' in str(res['house_rules'].values[0]): |
|
return False, f"The house rule should be {question['local_constraint']['house rule']}." |
|
if question['local_constraint']['house rule'] == 'children under 10' and 'No children under 10' in str(res['house_rules'].values[0]): |
|
return False, f"The house rule should be {question['local_constraint']['house rule']}." |
|
if question['local_constraint']['house rule'] == 'visitors' and 'No visitors' in str(res['house_rules'].values[0]): |
|
return False, f"The house rule should be {question['local_constraint']['house rule']}." |
|
if question['local_constraint']['house rule'] == 'pets' and 'No pets' in str(res['house_rules'].values[0]): |
|
return False, f"The house rule should be {question['local_constraint']['house rule']}." |
|
|
|
|
|
return True, None |
|
|
|
|
|
|
|
def is_valid_cuisine(question, tested_data): |
|
cuisine_set = set() |
|
if question['local_constraint']['cuisine']: |
|
for i in range(min(question['days'],len(tested_data))): |
|
unit = tested_data[i] |
|
|
|
if unit['breakfast'] and unit['breakfast'] != '-': |
|
name, city = get_valid_name_city(unit['breakfast']) |
|
if city == question['org']: |
|
continue |
|
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] |
|
if len(res) > 0: |
|
for cuisine in question['local_constraint']['cuisine']: |
|
if cuisine in res.iloc[0]['Cuisines']: |
|
cuisine_set.add(cuisine) |
|
|
|
if unit['lunch'] and unit['lunch'] != '-': |
|
name, city = get_valid_name_city(unit['lunch']) |
|
if city == question['org']: |
|
continue |
|
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] |
|
if len(res) > 0: |
|
for cuisine in question['local_constraint']['cuisine']: |
|
if cuisine in res.iloc[0]['Cuisines']: |
|
cuisine_set.add(cuisine) |
|
|
|
if unit['dinner'] and unit['dinner'] != '-': |
|
name, city = get_valid_name_city(unit['dinner']) |
|
if city == question['org']: |
|
continue |
|
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] |
|
if len(res) > 0: |
|
for cuisine in question['local_constraint']['cuisine']: |
|
if cuisine in res.iloc[0]['Cuisines']: |
|
cuisine_set.add(cuisine) |
|
|
|
if len(cuisine_set) == len(question['local_constraint']['cuisine']): |
|
return True, None |
|
else: |
|
|
|
for cuisine in question['local_constraint']['cuisine']: |
|
if cuisine not in cuisine_set: |
|
return False, f"The cuisine {cuisine} is not satisfied." |
|
|
|
else: |
|
return None,None |
|
|
|
|
|
def is_valid_transportation(question, tested_data): |
|
if question['local_constraint']['transportation'] is None: |
|
return None,None |
|
for i in range(min(question['days'],len(tested_data))): |
|
unit = tested_data[i] |
|
if unit['transportation'] and unit['transportation'] != '-': |
|
value = unit['transportation'] |
|
if question['local_constraint']['transportation'] == 'no flight' and 'Flight' in value: |
|
return False, f"The transportation should not be {question['local_constraint']['transportation']}." |
|
elif question['local_constraint']['transportation'] == 'no self-driving' and 'Self-driving' in value: |
|
return False, f"The transportation should not be {question['local_constraint']['transportation']}." |
|
|
|
return True, None |
|
|
|
|
|
def is_valid_room_type(question, tested_data): |
|
if question['local_constraint']['room type'] is None: |
|
return None,None |
|
for i in range(min(question['days'],len(tested_data))): |
|
unit = tested_data[i] |
|
if unit['accommodation'] and unit['accommodation'] != '-': |
|
name, city = get_valid_name_city(unit['accommodation']) |
|
res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)] |
|
if len(res) > 0: |
|
if question['local_constraint']['room type'] == 'not shared room' and res['room type'].values[0] == 'Shared room': |
|
return False, f"The room type should be {question['local_constraint']['room type']}." |
|
|
|
elif question['local_constraint']['room type'] == 'shared room' and res['room type'].values[0] != 'Shared room': |
|
return False, f"The room type should be {question['local_constraint']['room type']}." |
|
|
|
elif question['local_constraint']['room type'] == 'private room' and res['room type'].values[0] != 'Private room': |
|
return False, f"The room type should be {question['local_constraint']['room type']}." |
|
|
|
elif question['local_constraint']['room type'] == 'entire room' and res['room type'].values[0] != 'Entire home/apt': |
|
return False, f"The room type should be {question['local_constraint']['room type']}." |
|
|
|
return True, None |
|
|
|
|
|
def evaluation(query_data, tested_data): |
|
return_info = {} |
|
return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data) |
|
return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data) |
|
return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data) |
|
return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data) |
|
return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None) |
|
return return_info |
|
|
|
def boolean_evaluation(query_data, tested_data): |
|
return_info = {} |
|
return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data) |
|
return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data) |
|
return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data) |
|
return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data) |
|
return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None) |
|
for key in return_info: |
|
if return_info[key][0] == False: |
|
print(key) |
|
return False |
|
return True |
|
|
|
|