File size: 8,369 Bytes
acc4386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db3f915
acc4386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import json
import re
import os
import gradio as gr

def load_line_json_data(filename):
    data = []
    with open(filename, 'r', encoding='utf-8') as f:
        for line in f.read().strip().split('\n'):
            unit = json.loads(line)
            data.append(unit)
    return data

def extract_query_number(query_string):
    """
    Extract the number from a query string formatted as "Query X" or "Query X --- Done".
    
    Args:
    - query_string (str): The input string.
    
    Returns:
    - int: The extracted number if found, else None.
    """
    pattern = r"Query (\d+)"
    match = re.search(pattern, query_string)
    return int(match.group(1)) if match else None

def create_data_display(css_content,data,annotation_idx):
    return f"""
    <style>
    {css_content}
    </style>
    <div>
        <span class="query-highlighted"><strong>Query {annotation_idx}:</strong> {data[annotation_idx-1]['query']}</span><br>
        <span class="highlighted"><strong>Day:</strong> {data[annotation_idx-1]['days']}</span>
        <span class="highlighted"><strong>Visiting City Number:</strong> {data[annotation_idx-1]['visiting_city_number']}</span>
        <span class="highlighted"><strong>Date:</strong> {data[annotation_idx-1]['date']}</span>
        <span class="highlighted"><strong>Departure:</strong> {data[annotation_idx-1]['org']}</span>
        <span class="highlighted"><strong>Destination:</strong> {data[annotation_idx-1]['dest']}</span><br>
        <span class="highlighted-alt"><strong>People Number:</strong> {data[annotation_idx-1]['people_number']}</span>
        <span class="highlighted-alt"><strong>Budget:</strong> {data[annotation_idx-1]['budget']}</span>
        <span class="highlighted-alt"><strong>House Rule:</strong> {data[annotation_idx-1]['local_constraint']['house rule']}</span>
        <span class="highlighted-alt"><strong>Cuisine:</strong> {data[annotation_idx-1]['local_constraint']['cuisine']}</span>
        <span class="highlighted-alt"><strong>Room Type:</strong> {data[annotation_idx-1]['local_constraint']['room type']}</span>
        <span class="highlighted-alt"><strong>Transportation:</strong> {data[annotation_idx-1]['local_constraint']['transportation']}</span><br>
    </div>
    """

def judge_valid_info(info):
    if info == "" or not info or info == "You don't need to fill in the information for this or later days." :
        return False
    return True

def judge_submit_info(info, current_day, label, annotation_data, *tested_data):
    if info == "" or not info:
        raise gr.Error("Day {} {} is empty!".format(current_day, label))
    if info != "-":
        if label == "transportation":
            if not judge_valid_transportation(info, annotation_data):
                raise gr.Error("Day {} {} is invalid! Please note the transportation.".format(current_day, label))
        elif label == "accommodation":
            if not judge_valid_room_type(info, annotation_data, tested_data[0]):
                raise gr.Error("Day {} {} is invalid! Please note the room type.".format(current_day, label))
            
            if not  judge_valid_room_rule(info, annotation_data, tested_data[0]):
                raise gr.Error("Day {} {} is invalid! Please note the house rules.".format(current_day, label))
        
    return True


def judge_valid_transportation(info, annotation_data):
    if  annotation_data['local_constraint']['transportation'] == 'no flight' and 'Flight' in info:
        return False
    elif annotation_data['local_constraint']['transportation'] == 'no self-driving' and 'Self-driving'  in info:
        return False
    return True

def judge_valid_room_type(info, annotation_data, accommodation_data_all):
    accommodation_data_filtered = get_filtered_data(info, accommodation_data_all)
    if annotation_data['local_constraint']['room type'] == 'not shared room' and accommodation_data_filtered['room type'].values[0] == 'Shared room':
        return False
    # "shared room", "not shared room", "private room", "entire room"
    elif annotation_data['local_constraint']['room type'] == 'shared room' and accommodation_data_filtered['room type'].values[0] != 'Shared room':
        return False

    elif annotation_data['local_constraint']['room type'] == 'private room' and accommodation_data_filtered['room type'].values[0] != 'Private room':
        return False

    elif annotation_data['local_constraint']['room type'] == 'entire room' and accommodation_data_filtered['room type'].values[0] != 'Entire home/apt':
        return False

    return True

def judge_valid_room_rule(info, annotation_data, accommodation_data_all):
    accommodation_data_filtered = get_filtered_data(info, accommodation_data_all)
    if annotation_data['local_constraint']['house rule'] == 'smoking' and 'No smoking' in str(accommodation_data_filtered['house_rules'].values[0]):
        return False
    if annotation_data['local_constraint']['house rule'] == 'parities' and 'No parties' in str(accommodation_data_filtered['house_rules'].values[0]):
        return False
    if annotation_data['local_constraint']['house rule'] == 'children under 10' and 'No children under 10' in str(accommodation_data_filtered['house_rules'].values[0]):
        return False
    if annotation_data['local_constraint']['house rule'] == 'visitors' and 'No visitors' in str(accommodation_data_filtered['house_rules'].values[0]):
        return False
    if annotation_data['local_constraint']['house rule'] == 'pets' and 'No pets' in str(accommodation_data_filtered['house_rules'].values[0]):
        return False
    
    return True

def judge_valid_cuisine(info, annotation_data, restaurant_data_all, cuisine_set: set):
    if info != "-" and annotation_data['local_constraint']['cuisine'] is not None and annotation_data['org'] not in info:
        restaurant_data_filtered = get_filtered_data(info, restaurant_data_all,('Name','City'))
        for cuisine in annotation_data['local_constraint']['cuisine']:
            if cuisine in restaurant_data_filtered.iloc[0]['Cuisines']:
                cuisine_set.add(cuisine)
    return cuisine_set




def get_valid_name_city(info):
    # Modified the pattern to preserve spaces at the end of the name
    pattern = r'(.*?),\s*([^,]+)(\(\w[\w\s]*\))?$'
    match = re.search(pattern, info)
    if match:
        return match.group(1).strip(), extract_before_parenthesis(match.group(2).strip()).strip()
    else:
        print(f"{info} can not be parsed, '-' will be used instead.")
        return "-","-"

    
def extract_numbers_from_filenames(directory):
    # Define the pattern to match files
    pattern = r'annotation_(\d+).json'

    # List all files in the directory
    files = os.listdir(directory)

    # Extract numbers from filenames that match the pattern
    numbers = [int(re.search(pattern, file).group(1)) for file in files if re.match(pattern, file)]

    return numbers

def get_city_list(days, deparure_city, destination):
    city_list = []
    city_list.append(deparure_city)
    if days == 3:
        city_list.append(destination)
    else:
        city_set = open('/home/user/app/database/background/citySet_with_states.txt').read().split('\n')
        state_city_map = {}
        for unit in city_set:
            city, state = unit.split('\t')
            if state not in state_city_map:
                state_city_map[state] = []
            state_city_map[state].append(city)
        for city in state_city_map[destination]:
            if city != deparure_city:
                city_list.append(city + f"({destination})")
    return city_list

def get_filtered_data(component,data, column_name=('NAME','city')):
    name, city = get_valid_name_city(component)
    return data[(data[column_name[0]] == name) & (data[column_name[1]] == city)]

def extract_before_parenthesis(s):
    match = re.search(r'^(.*?)\([^)]*\)', s)
    return match.group(1) if match else s

def count_consecutive_values(lst):
    if not lst:
        return []

    result = []
    current_string = lst[0]
    count = 1

    for i in range(1, len(lst)):
        if lst[i] == current_string:
            count += 1
        else:
            result.append((current_string, count))
            current_string = lst[i]
            count = 1

    result.append((current_string, count))  # Add the last group of values
    return result