import gradio as gr from time import sleep import json from pymongo import MongoClient from bson import ObjectId from openai import OpenAI openai_client = OpenAI() import os ## Get the restaurants based on the search and location def get_restaurants(search, location, meters): try: uri = os.environ.get('MONGODB_ATLAS_URI') client = MongoClient(uri) db_name = 'whatscooking' collection_name = 'restaurants' restaurants_collection = client[db_name][collection_name] trips_collection = client[db_name]['smart_trips'] except: print("Error Connecting to the MongoDB Atlas Cluster") # Pre aggregate restaurants collection based on chosen location and radius, the output is stored into # trips_collection try: newTrip, pre_agg = pre_aggregate_meters(restaurants_collection, location, meters) ## Get openai embeddings response = openai_client.embeddings.create( input=search, model="text-embedding-3-small", dimensions=256 ) ## prepare the similarity search on current trip vectorQuery = { "$vectorSearch": { "index" : "vector_index", "queryVector": response.data[0].embedding, "path" : "embedding", "numCandidates": 10, "limit": 3, "filter": {"searchTrip": newTrip} }} ## Run the retrieved documents through a RAG system. restaurant_docs = list(trips_collection.aggregate([vectorQuery, {"$project": {"_id" : 0, "embedding": 0}}])) chat_response = openai_client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful restaurant assistant. You will get a context if the context is not relevat to the user query please address that and not provide by default the restaurants as is."}, { "role": "user", "content": f"Find me the 2 best restaurant and why based on {search} and {restaurant_docs}. explain trades offs and why I should go to each one. You can mention the third option as a possible alternative."} ] ) ## Removed the temporary documents trips_collection.delete_many({"searchTrip": newTrip}) if len(restaurant_docs) == 0: return "No restaurants found", '', str(pre_agg), str(vectorQuery) ## Build the map filter first_restaurant = restaurant_docs[0]['restaurant_id'] second_restaurant = restaurant_docs[1]['restaurant_id'] third_restaurant = restaurant_docs[2]['restaurant_id'] restaurant_string = f"'{first_restaurant}', '{second_restaurant}', '{third_restaurant}'" iframe = '' client.close() return chat_response.choices[0].message.content, iframe,str(pre_agg), str(vectorQuery) except Exception as e: print(e) return "Your query caused an error, please retry with allowed input only ...", '', str(pre_agg), str(vectorQuery) def pre_aggregate_meters(restaurants_collection, location, meters): ## Do the geo location preaggregate and assign the search trip id. tripId = ObjectId() pre_aggregate_pipeline = [{ "$geoNear": { "near": location, "distanceField": "distance", "maxDistance": meters, "spherical": True, }, }, { "$addFields": { "searchTrip" : tripId, "date" : tripId.generation_time } }, { "$merge": { "into": "smart_trips" } } ] result = restaurants_collection.aggregate(pre_aggregate_pipeline); sleep(3) return tripId, pre_aggregate_pipeline with gr.Blocks() as demo: gr.Markdown( """ # MongoDB's Vector Restaurant planner Start typing below to see the results. You can search a specific cuisine for you and choose 3 predefined locations. The radius specify the distance from the start search location. This space uses the dataset called [whatscooking.restaurants](https://huggingface.co/datasets/AIatMongoDB/whatscooking.restaurants) """) # Create the interface gr.Interface( get_restaurants, [gr.Textbox(placeholder="What type of dinner are you looking for?"), gr.Radio(choices=[ ("Timesquare Manhattan", { "type": "Point", "coordinates": [-73.98527039999999, 40.7589099] }), ("Westside Manhattan", { "type": "Point", "coordinates": [-74.013686, 40.701975] }), ("Downtown Manhattan", { "type": "Point", "coordinates": [-74.000468, 40.720777] }) ], label="Location", info="What location you need?"), gr.Slider(minimum=500, maximum=10000, randomize=False, step=5, label="Radius in meters")], [gr.Textbox(label="MongoDB Vector Recommendations", placeholder="Results will be displayed here"), "html", gr.Code(label="Pre-aggregate pipeline",language="json" ), gr.Code(label="Vector Query", language="json")] ) if __name__ == "__main__": demo.launch()