Spaces:

osunlp
/

TravelPlannerLeaderboard

Running

App Files Files Community

hsaest commited on Jan 13, 2024

Commit

3a3b852

verified ·

1 Parent(s): 02421e8

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

app.py +1 -2
commonsenseConstraint.py +735 -0
eval.py +181 -0
hardConstraint.py +266 -0
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import sys
 sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "./leaderboard/evaluation")))
 sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "./leaderboard")))
-print(sys.path)
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
 import json
 import datetime
@@ -19,7 +18,7 @@ from huggingface_hub import HfApi
 # InfoStrings
 # from scorer import question_scorer
 from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
-from evaluation.eval import eval_score
 TOKEN = os.environ.get("TOKEN", None)

 import sys
 sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "./leaderboard/evaluation")))
 sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "./leaderboard")))
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
 import json
 import datetime
 # InfoStrings
 # from scorer import question_scorer
 from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
+from eval import eval_score
 TOKEN = os.environ.get("TOKEN", None)

commonsenseConstraint.py ADDED Viewed

	@@ -0,0 +1,735 @@

+from annotation.src.utils import get_valid_name_city,extract_before_parenthesis,extract_numbers_from_filenames
+from tools.flights.apis import Flights
+from tools.accommodations.apis import Accommodations
+from tools.restaurants.apis import Restaurants
+from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
+from tools.attractions.apis import Attractions
+import math
+import json
+import re
+import os
+import sys
+from tqdm import tqdm
+import argparse
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+flight = Flights()
+accommodation = Accommodations()
+restaurants = Restaurants()
+googleDistanceMatrix = GoogleDistanceMatrix()
+attractions = Attractions()
+city_state_set = open('../database/background/citySet_with_states.txt','r').read().split('\n')
+city_state_map = {x:y for x,y in [unit.split('\t') for unit in city_state_set]}
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def count_consecutive_values(lst):
+    if not lst:
+        return []
+    result = []
+    current_string = lst[0]
+    count = 1
+    for i in range(1, len(lst)):
+        if lst[i] == current_string:
+            count += 1
+        else:
+            result.append((current_string, count))
+            current_string = lst[i]
+            count = 1
+    result.append((current_string, count))  # Add the last group of values
+    return result
+def transportation_match(text: str):
+    if 'taxi' in text.lower():
+        return 'Taxi'
+    elif 'self-driving' in text.lower():
+        return 'Self-driving'
+    elif 'flight' in text.lower():
+        return 'Flight'
+def extract_from_to(text: str):
+    """
+    Extracts 'A' and 'B' from the format "from A to B" in the given text, with B ending at a comma or the end of the string.
+    Args:
+    - text (str): The input string.
+    Returns:
+    - tuple: A tuple containing 'A' and 'B'. If no match is found, returns (None, None).
+    """
+    pattern = r"from\s+(.+?)\s+to\s+([^,]+)(?=[,\s]|$)"
+    matches = re.search(pattern, text)
+    return matches.groups() if matches else (None, None)
+def is_valid_city_sequence(city_list):
+    """
+    Checks if the city sequence is valid. A valid sequence has every city (except the first and last)
+    appearing consecutively, and no city should appear again once its sequence is over.
+    Args:
+    - city_list (list): List of cities.
+    Returns:
+    - bool: True if the sequence is valid, False otherwise.
+    """
+    # If the list has less than 3 cities, it's invalid.
+    if len(city_list) < 3:
+        return False
+    # Set to keep track of visited cities
+    visited_cities = set()
+    i = 0
+    while i < len(city_list):
+        city = city_list[i]
+        # If the city was already visited, it's invalid.
+        if city in visited_cities and (i != 0 and i != len(city_list) - 1):
+            return False
+        # Count the consecutive occurrences of the city
+        count = 0
+        while i < len(city_list) and city_list[i] == city:
+            count += 1
+            i += 1
+        # If the city appeared only once in the medium, it's invalid.
+        if count == 1 and 0 < i - 1 < len(city_list) - 1:
+            return False
+        visited_cities.add(city)
+    return True
+def is_reasonalbe_visiting_city(question, tested_data):
+    city_list = []
+    # print(tested_data)
+    for i in range(min(question['days'],len(tested_data))):
+        city_value = tested_data[i]['current_city']
+        if 'from' in city_value:
+            city1, city2 = extract_from_to(city_value)
+            city1 = extract_before_parenthesis(city1)
+            city2 = extract_before_parenthesis(city2)
+            if i==0 and  city1 != question['org']:
+                return False, f"The first day's city should be {question['org']}."
+            city_list += [city1, city2]
+        else:
+            city_list.append(extract_before_parenthesis(city_value))
+    if city_list[0] != city_list[-1]:
+        return False, "The trip should be a closed circle."
+    if not is_valid_city_sequence(city_list):
+        return False, "The city sequence is invalid."
+    for idx, city in enumerate(city_list):
+        if city not in city_state_map:
+            return False, f"{city} is not a valid city."
+        if idx not in [0,len(city_list)-1] and question['days'] >3 and city_state_map[city] != question['dest']:
+            return False, f"{city} is not in {question['dest']}."
+    return True, None
+def is_valid_restaurants(question, tested_data):
+    restaurants_list = []
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'breakfast' in unit and unit['breakfast'] and unit['breakfast'] != '-':
+            if unit['breakfast'] not in restaurants_list:
+                restaurants_list.append(unit['breakfast'])
+            else:
+                return False, f"The restaurant in day {i+1} breakfast is repeated."
+        # elif 'breakfast' not in unit :
+        #     return False, f"No Breakfast Info."
+        if 'lunch' in unit and unit['lunch'] and unit['lunch'] != '-':
+            if unit['lunch'] not in restaurants_list:
+                restaurants_list.append(unit['lunch'])
+            else:
+                return False, f"The restaurant in day {i+1} lunch {unit['lunch']} is repeated."
+        # elif 'lunch' not in unit:
+        #     return False, f"No Lunch Info."
+        if 'dinner' in unit and unit['dinner'] and unit['dinner'] != '-':
+            if unit['dinner'] not in restaurants_list:
+                restaurants_list.append(unit['dinner'])
+            else:
+                return False, f"The restaurant in day {i+1} dinner is repeated."
+        # elif 'dinner' not in unit:
+        #     return False, f"No Dinner Info."
+    return True, None
+def is_valid_attractions(question, tested_data):
+    attractions_list = []
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'attraction' in unit and unit['attraction'] and unit['attraction'] != '-':
+            for attraction in unit['attraction'].split(';')[:-1]:
+                if attraction not in attractions_list:
+                    attractions_list.append(attraction)
+                else:
+                    return False, f"The attraction '{attraction}' in day {i+1} is repeated."
+        # elif 'attraction' not in unit:
+        #     return False, f"No Attraction Info."
+    return True, None
+def is_valid_transportation(question, tested_data):
+    if tested_data[0]['transportation'] and tested_data[0]['transportation'] != '-':
+        transportation_list = [transportation_match(tested_data[0]['transportation'])]
+    else:
+        return False, "The transportation in day 1 should not be empty."
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'transportation' in unit and unit['transportation'] and unit['transportation'] != '-':
+            transportation_list.append(transportation_match(unit['transportation']))
+        # elif 'transportation' not in unit:
+        #     return False, f"No Transportation Info."
+    if (('Self-driving' in transportation_list) and ('Flight' in transportation_list)) or (('Taxi' in transportation_list) and ('Self-driving' in transportation_list)):
+        return False, "The transportation is conflicting."
+    return True, None
+def is_valid_information_in_current_city(question, tested_data):
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        current_city = unit['current_city']
+        final_city_list = []
+        if 'from' in current_city:
+            city1, city2 = extract_from_to(current_city)
+            city1 = extract_before_parenthesis(city1)
+            city2 = extract_before_parenthesis(city2)
+            final_city_list = [city1, city2]
+        else:
+            final_city_list = extract_before_parenthesis(current_city)
+        if 'transportation' in unit and unit['transportation'] and unit['transportation'] != '-':
+            for city in final_city_list:
+                if city not in unit['transportation']:
+                    # print(city)
+                    return False, f"The transportation in day {i+1} is invalid city choice."
+        # elif 'transportation' not in unit:
+        #     return False, f"No Transportation Info."
+        if 'breakfast' in unit and unit['breakfast'] and unit['breakfast'] != '-':
+            flag = False
+            for city in final_city_list:
+                if city  in unit['breakfast']:
+                    flag = True
+            if not flag:
+                return False, f"The breakfast in day {i+1} is invalid city choice."
+        # elif 'breakfast' not in unit:
+        #     return False, f"No Breakfast Info."
+        if 'lunch' in unit and unit['lunch'] and unit['lunch'] != '-':
+            flag = False
+            for city in final_city_list:
+                if city  in unit['lunch']:
+                    flag = True
+            if not flag:
+                return False, f"The lunch in day {i+1} is invalid city choice."
+        # elif 'lunch' not in unit:
+        #     return False, f"No Lunch Info."
+        if 'dinner' in unit and unit['dinner'] and unit['dinner'] != '-':
+            flag = False
+            for city in final_city_list:
+                if city  in unit['dinner']:
+                    flag = True
+            if not flag:
+                return False, f"The dinner in day {i+1} is invalid city choice."
+        # elif 'dinner' not in unit:
+        #     return False, f"No Dinner Info."
+        if 'attraction' in unit and unit['attraction'] and unit['attraction'] != '-':
+            attraction_list = unit['attraction'].split(';')[:-1]
+            for attraction in attraction_list:
+                flag = False
+                for city in final_city_list:
+                    if city  in attraction:
+                        flag = True
+                if not flag:
+                    return False, f"The attraction in day {i+1} is invalid city choice."
+        # elif 'attraction' not in unit:
+        #     return False, f"No Attraction Info."
+        if 'accommodation' in unit and unit['accommodation'] and unit['accommodation'] != '-':
+            if final_city_list[-1] not in unit['accommodation']:
+                return False, f"The accommodation in day {i+1} is invalid city choice."
+        # elif 'accommodation' not in unit:
+        #     return False, f"No Accommodation Info."
+    return True, None
+# hallucination
+def is_valid_information_in_sandbox(question, tested_data):
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['transportation'] and unit['transportation'] != '-':
+            value = unit['transportation']
+            org_city, dest_city = extract_from_to(value)
+            if org_city == None or dest_city == None:
+                org_city, dest_city = extract_from_to(unit['current_city'])
+            if 'flight number' in value.lower():
+                try:
+                    org_city = extract_before_parenthesis(org_city)
+                    dest_city = extract_before_parenthesis(dest_city)
+                except TypeError:
+                    raise ValueError("The transportation {} in day {} can not be parsed.".format(value,i+1))
+                # print(value)
+                if len(flight.data[(flight.data['Flight Number'] == value.split('Flight Number: ')[1].split(',')[0]) & (flight.data['OriginCityName']==org_city) & (flight.data['DestCityName']==dest_city)]) < 1:
+                     return False, f"The flight number in day {i+1} is invalid in the sandbox."
+            elif 'self-driving' in value.lower() or 'taxi' in value.lower():
+                try:
+                    org_city = extract_before_parenthesis(org_city)
+                    dest_city = extract_before_parenthesis(dest_city)
+                except TypeError:
+                    org_city = '-'
+                    dest_city = '-'
+                    print("The transportation {} in day {} can not be parsed and '-' will be used instead.".format(value,i+1))
+                if 'self-driving' in value.lower():
+                    if googleDistanceMatrix.run_for_evaluation(org_city, dest_city, mode='self-driving')['cost'] == None:
+                        return False, f"The self-driving in day {i+1} is invalid in the sandbox."
+                else:
+                    if googleDistanceMatrix.run_for_evaluation(org_city, dest_city, mode='taxi')['cost'] == None:
+                        return False, f"The taxi in day {i+1} is invalid in the sandbox."
+        if 'breakfast' in unit and unit['breakfast'] and unit['breakfast'] != '-':
+            name, city = get_valid_name_city(unit['breakfast'])
+            if len(restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]) < 1:
+                return False, f"The breakfast in day {i+1} is invalid in the sandbox."
+        # elif 'breakfast' not in unit:
+        #     return False, f"No Breakfast Info."
+        if 'lunch' in unit and unit['lunch'] and unit['lunch'] != '-':
+            name, city = get_valid_name_city(unit['lunch'])
+            if len(restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]) < 1:
+                return False, f"The lunch in day {i+1} is invalid in the sandbox."
+        # elif 'lunch' not in unit:
+        #     return False, f"No Lunch Info."
+        if 'dinner' in unit and unit['dinner'] and unit['dinner'] != '-':
+            name, city = get_valid_name_city(unit['dinner'])
+            if len(restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]) < 1:
+                return False, f"The dinner in day {i+1} is invalid in the sandbox."
+        # elif 'dinner' not in unit:
+        #     return False, f"No Dinner Info."
+        if 'attraction' in unit and unit['attraction'] and unit['attraction'] != '-':
+            attractions_list = unit['attraction'].split(';')[:-1]
+            for attraction in attractions_list:
+                name, city = get_valid_name_city(attraction)
+                if len(attractions.data[(attractions.data['Name'].astype(str).str.contains(re.escape(name))) & (attractions.data['City'] == city)]) < 1:
+                    return False, f"The attraction {attraction} in day {i+1} is invalid in the sandbox."
+        # elif 'attraction' not in unit:
+        #     return False, f"No Attraction Info."
+        if 'accommodation' in unit and unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            # print(name,city)
+            # print(accommodation.data[accommodation.data['NAME'].astype(str).str.contains(re.escape(name))])
+            if len(accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]) < 1:
+                return False, f"The accommodation in day {i+1} is invalid in the sandbox."
+        # elif 'accommodation' not in unit:
+        #     return False, f"No Accommodation Info."
+    return True, None
+def is_valid_accommodaton(question, tested_data):
+    data = []
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'accommodation' not in unit:
+            return False, f"No Accommodation Info."
+        data.append(unit['accommodation'])
+    # data = [unit['accommodation'] for unit in tested_data]
+    consectutive_accommodation = count_consecutive_values(data)
+    for unit in consectutive_accommodation:
+        # print(unit)
+        if unit and unit[0] not in  ['-',''] :
+            name, city = get_valid_name_city(unit[0])
+            # print(unit[0],name,city)
+            # try:
+            if len(accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]) == 1 and unit[1] <  accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)].iloc[0]['minimum nights']:
+                return False, f"The accommodation {unit[0]} do not obey the minumum nights rule."
+            # can not parse data
+            # except re.error:
+            #     continue
+    return True, None
+def is_valid_visiting_city_number(question, tested_data):
+    city_set = set()
+    for i in range(min(question['days'],len(tested_data))):
+        city_value = tested_data[i]['current_city']
+        if 'from' in city_value:
+            city1, city2 = extract_from_to(city_value)
+            city1 = extract_before_parenthesis(city1)
+            city2 = extract_before_parenthesis(city2)
+            if i==0 and  city1 != question['org']:
+                return False, f"The first day's city should be {question['org']}."
+            city_set.add(city1)
+            city_set.add(city2)
+        else:
+            city_set.add(extract_before_parenthesis(city_value))
+    city_set.discard(question['org'])
+    if len(city_set) != question['visiting_city_number']:
+        return False, f"The number of visiting cities should be {question['visiting_city_number']}."
+    return True, None
+def is_valid_days(question, tested_data):
+    lens = 0
+    for i in range(min(question['days'],len(tested_data))):
+        if tested_data[i] != {} and tested_data[i]['current_city'] != "You don't need to fill in the information for this or later days.":
+            lens += 1
+    if lens != question['days']:
+        # print(lens)
+        return False, f"The number of days should be {question['days']}."
+    else:
+        return True, None
+def is_not_absent(question, tested_data):
+    needed_info = 6 * question['days']
+    total_valid_info = 0
+    if not is_valid_days(question, tested_data)[0]:
+        return False, "Invalid Days"
+    if not is_valid_visiting_city_number(question, tested_data)[0]:
+        return False, "Invalid City Number"
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if 'transportation' not in unit:
+            return False, f"No Transportation Info."
+        if 'breakfast' not in unit:
+            return False, f"No Breakfast Info."
+        if 'lunch' not in unit:
+            return False, f"No Lunch Info."
+        if 'dinner' not in unit:
+            return False, f"No Dinner Info."
+        if 'attraction' not in unit:
+            return False, f"No Attraction Info."
+        if 'accommodation' not in unit:
+            return False, f"No Accommodation Info."
+        if ('from ' in unit['current_city'] or 'to ' in unit['current_city']) and unit['transportation'] in ['','-']:
+            return False, f"No transportation in day {i+1} is not allowed."
+        if ('from ' not in unit['current_city'] and  ' to ' not in unit['current_city']) and unit['attraction'] in ['','-']:
+            return False, f"No attaction in day {i+1} is not allowed."
+        if i != question['days'] - 1 and unit['accommodation'] in ['','-']:
+            return False, f"No accommodation in day {i+1} is not allowed."
+        if (unit['breakfast'] in ['','-'] or unit['lunch'] in ['','-'] or unit['dinner'] in ['','-']) and 'from ' not in unit['current_city']:
+            return False, f"No meal in day {i+1} is not allowed."
+        for key in unit:
+            if unit[key] and unit[key] != '-':
+                total_valid_info += 1
+    if total_valid_info * 1.0 / needed_info < 0.5:
+        return False, f"The absent information is more than 50%."
+    return True, None
+def evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['is_reasonalbe_visiting_city'] = is_reasonalbe_visiting_city(query_data, tested_data)
+    return_info['is_valid_restaurants'] = is_valid_restaurants(query_data, tested_data)
+    return_info['is_valid_attractions'] = is_valid_attractions(query_data, tested_data)
+    return_info['is_valid_accommodation'] = is_valid_accommodaton(query_data, tested_data)
+    return_info['is_valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['is_valid_information_in_current_city'] = is_valid_information_in_current_city(query_data, tested_data)
+    return_info['is_valid_information_in_sandbox'] = is_valid_information_in_sandbox(query_data, tested_data)
+    return_info['is_not_absent'] = is_not_absent(query_data, tested_data)
+    return return_info
+def boolean_evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['is_reasonalbe_visiting_city'] = is_reasonalbe_visiting_city(query_data, tested_data)
+    return_info['is_valid_restaurants'] = is_valid_restaurants(query_data, tested_data)
+    return_info['is_valid_accommodation'] = is_valid_accommodaton(query_data, tested_data)
+    return_info['is_valid_attractions'] = is_valid_attractions(query_data, tested_data)
+    return_info['is_valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['is_valid_information_in_current_city'] = is_valid_information_in_current_city(query_data, tested_data)
+    return_info['is_valid_information_in_sandbox'] = is_valid_information_in_sandbox(query_data, tested_data)
+    return_info['is_not_absent'] = is_not_absent(query_data, tested_data)
+    for key in return_info:
+        if return_info[key][0] == False:
+            print(return_info[key][1])
+            return False
+    return True
+# if __name__ == '__main__':
+#     number_list = extract_numbers_from_filenames('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz')
+#     # json_data = json.load(open('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/x/annotation_4.json'))
+#     query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/lrz.jsonl')
+#     for idx in number_list:
+#         json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz/annotation_{idx}.json'))
+#         print(str(idx), evaluation(query_data[idx-1], json_data))
+#     # json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/plan_{idx}.json'))
+#     # query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/test.jsonl')[idx-1]
+#     # help me write all function name in this file, just the name
+#     #
+#     # list all function name in this file
+#     # ['is_reasonalbe_visiting_city', 'is_valiable_restaurants', 'is_valiable_attractions', 'is_valiable_transportation', 'is_valid_information_in_current_city', 'is_valid_information_in_sandbox']
+#     # print(is_valiable_restaurants(query_data, json_data))
+# if __name__ == "__main__":
+#     user = 'zk'
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     for idx in idx_number_list:
+#         print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json'))
+#         # generated_plan = generated_plan[:-1]
+#         if generated_plan[-1]['gpt-3.5-turbo-16k-result'] != 'Plan Fail':
+#             info_box = evaluation(query_data, generated_plan[-1]['gpt-3.5-turbo-16k-result'])
+#             generated_plan[-1]['toolAug-commonsense'] = info_box
+#         else:
+#             generated_plan[-1]['toolAug-commonsense'] = None
+#             info_box = None
+#         commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+#         with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json','w') as f:
+#             json.dump(generated_plan,f)
+#     with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/commonsense_statistic.json','w') as f:
+#         json.dump(commonsense_statistic,f)
+# if __name__ == "__main__":
+#     user = 'all'
+#     model_type = ['chatgpt','gpt4','greedy_search'][2]
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     idx_number_list = [i for i in range(1,501)]
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     for idx in idx_number_list:
+#         print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json'))
+#         # generated_plan = generated_plan[:-1]
+#         if model_type == 'greedy_search':
+#             info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
+#         else:
+#             info_box = evaluation(query_data, generated_plan[-1][f'{model_type}_human_collected_info_results_parsed'])
+#         generated_plan[-1][f'{model_type}_with_human_collected_commonsense'] = info_box
+#         commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+#         with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json','w') as f:
+#             json.dump(generated_plan,f)
+#     with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/{model_type}_with_human_collected_commonsense_statistic.json','w') as f:
+#         json.dump(commonsense_statistic,f)
+# if __name__ == "__main__":
+#     user = 'all'
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     hardConstraint_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     not_satified = []
+#     for idx in tqdm(idx_number_list):
+#         # print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}/annotation_{idx}.json'))
+#         if not boolean_evaluation(query_data, generated_plan):
+#             not_satified.append(idx)
+#             print(idx)
+#         generated_plan = generated_plan[:-1]
+#     print(not_satified)
+if __name__ == "__main__":
+    set_type = ["train",'dev','test'][0]
+    query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/query/query.jsonl')
+    # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan')
+    commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+    not_satified = []
+    # print( idx_number_list)
+    for idx in tqdm(range(1,len(query_data_list)+1)):
+        # print(idx)
+        query_data = query_data_list[idx-1]
+        generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan/plan_{idx}.json'))
+        try:
+            store_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json'))
+        except FileNotFoundError:
+            store_plan = [{}]
+        info_box = evaluation(query_data,generated_plan[1])
+        # if not boolean_evaluation(query_data, generated_plan[1]):
+        #     not_satified.append(idx)
+        #     print(idx)
+        # print(store_plan[-1])
+        store_plan[-1][f'human_anno_commonsense_constraint'] = info_box
+        with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json','w') as f:
+             json.dump(store_plan,f)
+        commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+    print(not_satified)
+    with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/human_anno_commonsense_constraint.json','w') as f:
+        json.dump(commonsense_statistic,f)
+# if __name__ == "__main__":
+#     user = 'all'
+#     model_type = ['chatgpt','gpt4'][1]
+#     query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
+#     # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     idx_number_list = [i for i in range(1,501)]
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     cnt = 0
+#     for idx in idx_number_list:
+#         # print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre/{user}/plan_{idx}.json'))[-1]['gpt4_human_collected_info_results_parsed']
+#         # generated_plan = generated_plan[:-1]
+#         if not boolean_evaluation(query_data, generated_plan):
+#             cnt += 1
+#             print(idx)
+#     print(cnt)
+# if __name__ == "__main__":
+#     parser = argparse.ArgumentParser(description="")
+#     # model_type = ['gpt-3.5-turbo-1106','gpt-4-1106-preview','greedy_search','mistral-7B-32K','gemini2','mixtral','gpt-3.5-turbo-11062'][-1]
+#     # method = ['direct','cot','react','reflexion','tool-use'][-1]
+#     # set_type = ['dev','test'][0]
+#     parser.add_argument("--model_type", type=str, default="gpt-3.5-turbo-1106")
+#     parser.add_argument("--method", type=str, default="direct")
+#     parser.add_argument("--set_type", type=str, default="dev")
+#     args = parser.parse_args()
+#     directory = f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{args.set_type}'
+#     query_data_list = load_line_json_data(os.path.join(directory, 'query/query.jsonl'))
+#     # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
+#     idx_number_list = [i for i in range(1,len(query_data_list)+1)]
+#     commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+#     deliver_cnt = 0
+#     if args.method == 'tool-use':
+#         suffix = ''
+#     else:
+#         suffix = '_with_human_info'
+#     for idx in tqdm(idx_number_list):
+#         # print(idx)
+#         query_data = query_data_list[idx-1]
+#         generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json'))
+#         # generated_plan = generated_plan[:-1]
+#         if args.model_type == 'greedy_search':
+#             info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
+#         else:
+#             if args.method == 'tool-use':
+#                 suffix2 = ''
+#             else:
+#                 suffix2 = '_collected'
+#             if generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] and generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results']!='Max Token Length Exceeded.':
+#                 try:
+#                     info_box = evaluation(query_data, generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_results_parsed'])
+#                 except KeyError:
+#                     info_box = None
+#                     generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
+#                 except IndexError:
+#                     info_box = None
+#                     generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
+#             else:
+#                 info_box = None
+#         if info_box:
+#             deliver_cnt += 1
+#         generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_commonsense_constraint'] = info_box
+#         commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
+#         with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json','w') as f:
+#             json.dump(generated_plan,f)
+#     with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/{args.model_type}_{args.method}{suffix}_commonsense_constraint.json','w') as f:
+#         json.dump(commonsense_statistic,f)
+#     if args.set_type == 'dev':
+#         print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/180}" )
+#     elif args.set_type == 'test':
+#         print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/1000}" )

eval.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from commonsenseConstraint import evaluation as commonsense_eval
+from hardConstraint import evaluation as hard_eval
+import json
+from tqdm import tqdm
+from datasets import load_dataset
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def count_true_false(data):
+    """Count the number of true and false values in a list."""
+    true_count = data.count(True)
+    false_count = data.count(False)
+    return true_count, false_count
+def statistics(commonsense_statistic):
+    """Generate statistics for each level and day in the given data with a different structure."""
+    result = {level: {day: {} for day in commonsense_statistic[level]} for level in commonsense_statistic}
+    for level, days in commonsense_statistic.items():
+        for day, dicts in days.items():
+            for dct in dicts:
+                if dct:
+                    for key, data in dct.items():
+                        true_count, false_count = count_true_false(data)
+                        if key not in result[level][day]:
+                            result[level][day][key] = {"true": 0, "false": 0}
+                        result[level][day][key]["true"] += true_count
+                        result[level][day][key]["false"] += false_count
+    return result
+def eval_score(validation_or_test: str, file_path: str, TOKEN):
+    if validation_or_test == 'validation':
+        query_data_list  = load_dataset('osunlp/TravelBenchEval','validation',token=TOKEN)['validation']
+    elif validation_or_test == 'test':
+        query_data_list  = load_dataset('osunlp/TravelBenchEval','test',token=TOKEN)['test']
+    query_data_list = [x for x in query_data_list]
+    hardConstraint_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+    commonsenseConstraint_statistic = {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
+    tested_plans = load_line_json_data(file_path)
+    delivery_cnt = 0
+    plan_constraint_store = []
+    for idx in tqdm(range(0,len(query_data_list))):
+        query_data = query_data_list[idx]
+        tested_plan = tested_plans[idx]
+        if type(query_data) == str:
+            query_data = eval(query_data)
+        if type(tested_plan) == str:
+            tested_plan = eval(tested_plan)
+        if type(query_data['local_constraint']) == str:
+            query_data['local_constraint'] = eval(query_data['local_constraint'])
+        if tested_plan['plan']:
+            delivery_cnt += 1
+            commonsense_info_box = commonsense_eval(query_data,tested_plan['plan'])
+        else:
+            commonsense_info_box = None
+        if commonsense_info_box and commonsense_info_box['is_not_absent'][0] and commonsense_info_box['is_valid_information_in_sandbox'][0]:
+            hard_info_box = hard_eval(query_data,tested_plan['plan'])
+        else:
+            hard_info_box = None
+        plan_constraint_store.append({'commonsense_constraint':commonsense_info_box,'hard_constraint':hard_info_box})
+        commonsenseConstraint_statistic[query_data['level']][query_data['days']].append(commonsense_info_box)
+        hardConstraint_statistic[query_data['level']][query_data['days']].append(hard_info_box)
+    commonsenseConstraint_statistic_processed = statistics(commonsenseConstraint_statistic)
+    hardConstraint_statistic_processed = statistics(hardConstraint_statistic)
+    # print(commonsenseConstraint_statistic_processed)
+    # print(hardConstraint_statistic_processed)
+    constraint_record = {key: {day: {'house rule':0, 'cuisine':0, 'room type':0, 'transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
+    constraint_mapping = {'house rule':'valid_room_rule','cuisine':'valid_cuisine','room type':'valid_room_type','transportation':'valid_transportation'}
+    mapping_constraint_record = {key: {day: {'valid_room_rule':0, 'valid_cuisine':0, 'valid_room_type':0, 'valid_transportation':0} for day in [3,5,7]} for key in ['medium','hard']}
+    count_record = {key:{day:0 for day in [3,5,7]} for key in ['easy','medium','hard']}
+    for unit in query_data_list:
+        count_record[unit['level']][unit['days']] += 1
+        for key in constraint_record['medium'][3]:
+            if unit['local_constraint'][key] != None:
+                constraint_record[unit['level']][unit['days']][key] += 1
+                mapping_constraint_record[unit['level']][unit['days']][constraint_mapping[key]] += 1
+    data_record = {key:{day:[] for day in [3,5,7]} for key in ['easy','medium','hard']}
+    constraint_dis_record = {"commonsense":{"pass":0,"total":0},"hard":{"pass":0,"total":0}}
+    for constraint in ['commonsense','hard']:
+        if constraint == 'commonsense':
+            constraint_statistic = commonsenseConstraint_statistic_processed
+        elif constraint == 'hard':
+            constraint_statistic = hardConstraint_statistic_processed
+        key_dict = {'commonsense':['is_valid_information_in_current_city','is_valid_information_in_sandbox','is_reasonalbe_visiting_city','is_valid_restaurants','is_valid_transportation','is_valid_attractions','is_valid_accommodation','is_not_absent'],'hard':['valid_cost','valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']}
+        for key in constraint_statistic:
+            # level
+            for key2 in constraint_statistic[key]:
+                # day
+                # print(key2)
+                # key2 = eval(key2)
+                if key2 == -1:
+                    print(constraint_statistic[key])
+                    exit(0)
+                for key3 in key_dict[constraint]:
+                    data_record[key][key2].append('0/0')
+                    if key3 in constraint_statistic[key][key2]:
+                        constraint_dis_record[constraint]['pass'] += constraint_statistic[key][key2][key3]['true']
+                        if constraint == 'hard':
+                            if key == 'hard' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type','valid_transportation']:
+                                data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
+                                constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
+                            elif key == 'medium' and key3 in ['valid_room_rule','valid_cuisine','valid_room_type']:
+                                data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{mapping_constraint_record[key][key2][key3]}"
+                                constraint_dis_record[constraint]['total'] += mapping_constraint_record[key][key2][key3]
+                            else:
+                                data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
+                                if key3 in ['valid_cost','valid_visitng_city_number','valid_days']:
+                                    constraint_dis_record[constraint]['total'] += count_record[key][key2]
+                        else:
+                            data_record[key][key2][-1] = f"{constraint_statistic[key][key2][key3]['true']}/{count_record[key][key2]}"
+                            constraint_dis_record[constraint]['total'] += count_record[key][key2]
+    final_all_cnt = 0
+    final_commonsense_cnt = 0
+    final_hardConstraint_cnt = 0
+    final_all_cnt_map = {level:0 for level in ['easy','medium','hard']}
+    for idx in (range(0,len(query_data_list))):
+        if plan_constraint_store[idx]['commonsense_constraint']:
+            final_commonsense_pass = True
+            final_hardConstraint_pass = True
+            for item in plan_constraint_store[idx]['commonsense_constraint']:
+                if plan_constraint_store[idx]['commonsense_constraint'][item][0] is not None and not plan_constraint_store[idx]['commonsense_constraint'][item][0]:
+                    final_commonsense_pass = False
+                    break
+            if plan_constraint_store[idx]['hard_constraint'] is None:
+                continue
+            for item in plan_constraint_store[idx]['hard_constraint']:
+                if plan_constraint_store[idx]['hard_constraint'][item][0] is not None and  plan_constraint_store[idx]['hard_constraint'][item][0] == False:
+                    final_hardConstraint_pass = False
+                    break
+            if final_commonsense_pass:
+                final_commonsense_cnt += 1
+            if final_hardConstraint_pass:
+                final_hardConstraint_cnt += 1
+            if final_commonsense_pass and final_hardConstraint_pass:
+                final_all_cnt += 1
+                final_all_cnt_map[query_data_list[idx]['level']] += 1
+    result = {}
+    if validation_or_test == 'validation':
+        result['Delivery Rate'] = delivery_cnt / 180
+        result['Commonsense Constraint Micro Pass Rate'] = constraint_dis_record['commonsense']['pass'] / 1440
+        result['Commonsense Constraint Macro Pass Rate'] = final_commonsense_cnt / 180
+        result['Hard Constraint Micro Pass Rate'] = constraint_dis_record['hard']['pass'] / 420
+        result['Hard Constraint Macro Pass Rate'] = final_hardConstraint_cnt / 180
+        result['Final Pass Rate'] = final_all_cnt / 180
+    elif validation_or_test == 'test':
+        result['Delivery Rate'] = delivery_cnt / 1000
+        result['Commonsense Constraint Micro Pass Rate'] = constraint_dis_record['commonsense']['pass'] / 8000
+        result['Commonsense Constraint Macro Pass Rate'] = final_commonsense_cnt / 1000
+        result['Hard Constraint Micro Pass Rate'] = constraint_dis_record['hard']['pass'] / 2290
+        result['Hard Constraint Macro Pass Rate'] = final_hardConstraint_cnt / 1000
+        result['Final Pass Rate'] = final_all_cnt / 1000
+    return result

hardConstraint.py ADDED Viewed

	@@ -0,0 +1,266 @@

+from annotation.src.utils import get_valid_name_city,extract_before_parenthesis,extract_numbers_from_filenames
+from tools.flights.apis import Flights
+from tools.accommodations.apis import Accommodations
+from tools.restaurants.apis import Restaurants
+from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
+from tools.attractions.apis import Attractions
+import math
+import json
+import re
+import numpy as np
+import os
+import sys
+from tqdm import tqdm
+import argparse
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+flight = Flights()
+accommodation = Accommodations()
+restaurants = Restaurants()
+googleDistanceMatrix = GoogleDistanceMatrix()
+attractions = Attractions()
+def load_line_json_data(filename):
+    data = []
+    with open(filename, 'r', encoding='utf-8') as f:
+        for line in f.read().strip().split('\n'):
+            unit = json.loads(line)
+            data.append(unit)
+    return data
+def convert_bool_values(item):
+    if isinstance(item, dict):
+        # If the item is a dictionary, recurse on each value
+        return {key: convert_bool_values(value) for key, value in item.items()}
+    elif isinstance(item, list):
+        # If the item is a list, recurse on each item in the list
+        return [convert_bool_values(value) for value in item]
+    elif isinstance(item, tuple):
+        # If the item is a tuple, recurse on each item in the tuple and repackage as a tuple
+        return tuple(convert_bool_values(value) for value in item)
+    elif isinstance(item, np.bool_):  # Here we check for numpy's bool_ type
+        # If the item is a numpy bool_, convert it to a standard Python bool
+        return bool(item)
+    else:
+        # If the item is any other type, return it unchanged
+        return item
+def extract_from_to(text: str):
+    """
+    Extracts 'A' and 'B' from the format "from A to B" in the given text, with B ending at a comma or the end of the string.
+    Args:
+    - text (str): The input string.
+    Returns:
+    - tuple: A tuple containing 'A' and 'B'. If no match is found, returns (None, None).
+    """
+    pattern = r"from\s+(.+?)\s+to\s+([^,]+)(?=[,\s]|$)"
+    matches = re.search(pattern, text)
+    return matches.groups() if matches else (None, None)
+def get_total_cost(question, tested_data):
+    total_cost = 0
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        # transporation
+        if unit['transportation'] and  unit['transportation'] != '-':
+            value = unit['transportation']
+            org_city, dest_city = extract_from_to(value)
+            if org_city == None or dest_city == None:
+                org_city, dest_city = extract_from_to(unit['current_city'])
+            if org_city == None or dest_city == None:
+                pass
+            else:
+                if 'flight number' in value.lower():
+                    res = flight.data[flight.data['Flight Number'] == value.split('Flight Number: ')[1].split(',')[0]]
+                    if len(res) > 0:
+                        total_cost += res['Price'].values[0] * question['people_number']
+                elif 'self-driving' in value.lower() or 'taxi' in value.lower():
+                    if 'self-driving' in value.lower():
+                        # print(org_city,dest_city)
+                        cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'self-driving')['cost']
+                        total_cost += cost * math.ceil(question['people_number'] * 1.0 / 5)
+                    else:
+                        cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'taxi')['cost']
+                        total_cost += cost * math.ceil(question['people_number'] * 1.0 / 4)
+        # breakfast
+        if unit['breakfast'] and unit['breakfast'] != '-':
+            name, city = get_valid_name_city(unit['breakfast'])
+            res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+            if len(res) > 0:
+                total_cost += res['Average Cost'].values[0] * question['people_number']
+        # lunch
+        if unit['lunch'] and unit['lunch'] != '-':
+            name, city = get_valid_name_city(unit['lunch'])
+            res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+            if len(res) > 0:
+                total_cost += res['Average Cost'].values[0] * question['people_number']
+        # dinner
+        if unit['dinner'] and unit['dinner'] != '-':
+            name, city = get_valid_name_city(unit['dinner'])
+            res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+            if len(res) > 0:
+                total_cost += res['Average Cost'].values[0] * question['people_number']
+        # accommodation
+        if unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]
+            if len(res) > 0:
+                total_cost += res['price'].values[0] * math.ceil(question['people_number'] * 1.0 / res['maximum occupancy'].values[0])
+    # print(total_cost)
+    return total_cost
+def is_valid_room_rule(question, tested_data):
+    if question['local_constraint']['house rule'] is None:
+        return None,None
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]
+            if len(res) > 0:
+                if question['local_constraint']['house rule'] == 'smoking' and 'No smoking' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'parities' and 'No parties' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'children under 10' and 'No children under 10' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'visitors' and 'No visitors' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+                if question['local_constraint']['house rule'] == 'pets' and 'No pets' in str(res['house_rules'].values[0]):
+                    return False, f"The house rule should be {question['local_constraint']['house rule']}."
+    return True, None
+def is_valid_cuisine(question, tested_data):
+    cuisine_set = set()
+    if question['local_constraint']['cuisine']:
+        for i in range(min(question['days'],len(tested_data))):
+            unit = tested_data[i]
+            if unit['breakfast'] and unit['breakfast'] != '-':
+                name, city = get_valid_name_city(unit['breakfast'])
+                if city == question['org']:
+                    continue
+                res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+                if len(res) > 0:
+                    for cuisine in question['local_constraint']['cuisine']:
+                        if cuisine in res.iloc[0]['Cuisines']:
+                            cuisine_set.add(cuisine)
+            if unit['lunch'] and unit['lunch'] != '-':
+                name, city = get_valid_name_city(unit['lunch'])
+                if city == question['org']:
+                    continue
+                res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+                if len(res) > 0:
+                    for cuisine in question['local_constraint']['cuisine']:
+                        if cuisine in res.iloc[0]['Cuisines']:
+                            cuisine_set.add(cuisine)
+            if unit['dinner'] and unit['dinner'] != '-':
+                name, city = get_valid_name_city(unit['dinner'])
+                if city == question['org']:
+                    continue
+                res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)]
+                if len(res) > 0:
+                    for cuisine in question['local_constraint']['cuisine']:
+                        if cuisine in res.iloc[0]['Cuisines']:
+                            cuisine_set.add(cuisine)
+        if len(cuisine_set) == len(question['local_constraint']['cuisine']):
+            return True, None
+        else:
+            # judge which cuisine is not satisfied
+            for cuisine in question['local_constraint']['cuisine']:
+                if cuisine not in cuisine_set:
+                    return False, f"The cuisine {cuisine} is not satisfied."
+            # return False, f"The cuisine should be {question['local_constraint']['cuisine']}."
+    else:
+        return None,None
+def is_valid_transportation(question, tested_data):
+    if question['local_constraint']['transportation'] is None:
+        return None,None
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['transportation'] and unit['transportation'] != '-':
+            value = unit['transportation']
+            if question['local_constraint']['transportation'] == 'no flight' and 'Flight' in value:
+                return False, f"The transportation should not be {question['local_constraint']['transportation']}."
+            elif question['local_constraint']['transportation'] == 'no self-driving' and 'Self-driving'  in value:
+                return False, f"The transportation should not be {question['local_constraint']['transportation']}."
+    return True, None
+def is_valid_room_type(question, tested_data):
+    if question['local_constraint']['room type'] is None:
+        return None,None
+    for i in range(min(question['days'],len(tested_data))):
+        unit = tested_data[i]
+        if unit['accommodation'] and unit['accommodation'] != '-':
+            name, city = get_valid_name_city(unit['accommodation'])
+            res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)]
+            if len(res) > 0:
+                if question['local_constraint']['room type'] == 'not shared room' and res['room type'].values[0] == 'Shared room':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+                # "shared room", "not shared room", "private room", "entire room"
+                elif question['local_constraint']['room type'] == 'shared room' and res['room type'].values[0] != 'Shared room':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+                elif question['local_constraint']['room type'] == 'private room' and res['room type'].values[0] != 'Private room':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+                elif question['local_constraint']['room type'] == 'entire room' and res['room type'].values[0] != 'Entire home/apt':
+                    return False, f"The room type should be {question['local_constraint']['room type']}."
+    return True, None
+def evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data)
+    return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data)
+    return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data)
+    return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None)
+    return return_info
+def boolean_evaluation(query_data, tested_data):
+    return_info = {}
+    return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data)
+    return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data)
+    return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data)
+    return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data)
+    return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None)
+    for key in return_info:
+        if return_info[key][0] == False:
+            print(key)
+            return False
+    return True

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
 datasets==2.16.1
 gradio==3.50.2
-huggingface-hub==0.20.2
-APScheduler==3.10.1

 datasets==2.16.1
 gradio==3.50.2
+huggingface-hub==0.20.2