File size: 4,757 Bytes
879567b
 
 
e5e8c50
879567b
 
e5e8c50
 
879567b
e5e8c50
 
 
 
 
879567b
e5e8c50
 
c834b91
e5e8c50
 
c834b91
e5e8c50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
879567b
 
 
 
e5e8c50
879567b
 
 
1707604
 
b6add69
 
 
 
 
 
879567b
 
 
 
b6add69
879567b
 
b6add69
 
 
 
 
879567b
b6add69
 
 
 
 
879567b
 
e5e8c50
879567b
 
1707604
879567b
e5e8c50
879567b
 
 
 
 
 
8f7d1c9
b6add69
879567b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Load your model
model = SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v2')

# Load embeddings and DataFrames (replace these with your paths)
embeddings_hotels = np.load("embeddings_hotels.npy")
embeddings_ar = np.load("embeddings_ar.npy")
df_hotels = pd.read_csv("hotels.csv")
df_ar = pd.read_csv("arabic_data.csv")

def search_in_combined(query_text, model, k=5):
    query_embedding = model.encode(query_text, convert_to_tensor=True).cpu().numpy().reshape(1, -1)

    similarities_hotels = cosine_similarity(query_embedding, embeddings_hotels).flatten()
    similarities_ar = cosine_similarity(query_embedding, embeddings_ar).flatten()

    top_indices_hotels = np.argsort(similarities_hotels)[::-1][:k]
    top_indices_ar = np.argsort(similarities_ar)[::-1][:k]

    top_hotels = df_hotels.iloc[top_indices_hotels].copy()
    top_ar = df_ar.iloc[top_indices_ar].copy()

    top_hotels['similarity'] = similarities_hotels[top_indices_hotels]
    top_ar['similarity'] = similarities_ar[top_indices_ar]

    combined_top_results = pd.concat([top_hotels, top_ar], ignore_index=True)
    combined_top_results = combined_top_results.sort_values(by='similarity', ascending=False)

    combined_top_results['google_maps_url'] = combined_top_results.apply(
        lambda row: f"https://www.google.com/maps/search/?api=1&query={row['hotel_name'].replace(' ', '+')}"
        if 'hotel_name' in row and not pd.isna(row['hotel_name']) else '',
        axis=1
    )

    return combined_top_results.head(k)

def format_results(results):
    formatted_results = []
    for _, row in results.iterrows():
        if not pd.isna(row.get('hotel_name', '')):
            google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('hotel_name', 'N/A').replace(' ', '+')}"
            result = (
                f"<b>Hotel Name</b>: {row.get('hotel_name', 'N/A')}<br>"
                + (f"<b>Description</b>: {row.get('hotel_description', 'N/A')}<br>" if not pd.isna(row.get('hotel_description', '')) else "")
                + f"<b>Review Title</b>: {row.get('review_title', 'N/A')}<br>"
                f"<b>Review Text</b>: {row.get('review_text', 'N/A')}<br>"
                f"<b>Rating</b>: {row.get('rate', 'N/A')}<br>"
                f"<b>Trip Date</b>: {row.get('tripdate', 'N/A')}<br>"
                f"<b>Price Range</b>: {row.get('price_range', 'N/A')}<br>"
                f"<b>Location</b>: {row.get('locality', 'N/A')} , {row.get('country', 'N/A')}<br>"
                f"<b>Hotel Website URL</b>: <a href='{row.get('hotel_url', 'N/A')}' target='_blank'>Link</a><br>"
                f"<b>Google Maps</b>: <a href='{google_maps_url}' target='_blank'>View on Maps</a><br>"
                f"<b>Image</b>: <img src='{row.get('hotel_image', 'N/A')}' width='200' /><br>"
            )
        else:
            google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('name', 'N/A').replace(' ', '+')}"
            result = (
                f"<b>Name</b>: {row.get('name', 'N/A')}<br>"
                f"<b>Location</b>: {row.get('location', 'N/A')}<br>"
                f"<b>Price</b>: {row.get('price', 'N/A')}<br>"
                f"<b>Price For</b>: {row.get('price_for', 'N/A')}<br>"
                f"<b>Room Type</b>: {row.get('room_type', 'N/A')}<br>"
                f"<b>Beds</b>: {row.get('beds', 'N/A')}<br>"
                f"<b>Rating</b>: {row.get('rating', 'N/A')}<br>"
                f"<b>Rating Title</b>: {row.get('rating_title', 'N/A')}<br>"
                f"<b>Google Maps</b>: <a href='{google_maps_url}' target='_blank'>View on Maps</a><br>"
                f"<b>Number of Ratings</b>: {row.get('number_of_ratings', 'N/A')}<br>"
                f"<b>Hotel Website URL</b>: <a href='{row.get('url', 'N/A')}' target='_blank'>Link</a><br>"
                f"<b>Additional Info</b>: {row.get('cm', 'N/A')}<br>"
            )
        formatted_results.append(result)

    return "<br><br>".join(formatted_results)


def search_interface(query_text):
    results = search_in_combined(query_text, model, k=5)
    return format_results(results)

iface = gr.Interface(
    fn=search_interface,
    inputs=gr.Textbox(label="Enter your search query"),
    outputs=gr.HTML(label="Search Results"),
    title="Hotel Search Beta(v0.1)",
    description="Enter a query to search for your appropriate hotel. The results will show the top matches based on similarity and provide a Google Maps URL for hotel locations ,and anouther info about the hotel.",
    examples=["Riyadh", "Deluxe Room"]
)

iface.launch()