HomeMatch_Udacity_GenAI / Code /vector_database.py
Fra-Berkeley-account's picture
Fixed typo in vector_database.py
03197ad
"""
Module: vector_database
Description: Handles storing and searching real estate listings using ChromaDB and OpenAI embeddings.
"""
import json
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
class VectorDatabase:
"""Handles vector-based storage and retrieval of real estate listings using ChromaDB."""
def __init__(self, listings_path="Data/listings.json", db_path="Data/chroma_langchain_db"):
"""
Initializes the vector store by loading real estate listings and setting up ChromaDB.
Args:
listings_path (str): Path to the JSON file containing real estate listings.
db_path (str): Path to the directory where ChromaDB stores embeddings.
"""
self.listings_path = listings_path
self.db_path = db_path
self.embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
# Load and process listings
self.listings = self._load_listings()
self.documents = self._prepare_documents()
# Initialize ChromaDB for storage
self.vector_store = Chroma(
collection_name="real_estate_listings",
embedding_function=self.embedding_model,
persist_directory=self.db_path
)
def _load_listings(self):
"""
Loads real estate listings from a JSON file.
Returns:
list: A list of real estate listings.
"""
try:
with open(self.listings_path, "r") as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"❌ Error loading listings file: {e}")
return []
def _prepare_documents(self):
"""
Converts listings into Document objects with metadata.
Returns:
list: A list of Document objects with structured metadata.
"""
return [
Document(
page_content=listing["Description"], # Store property description
metadata={
"id": listing["id"],
"property_type": listing["Property Type"],
"neighborhood": listing["Neighborhood"],
"city": listing["City"],
"state": listing["State"],
"price": listing["Price"],
"house_size": listing["House Size"],
"bedrooms": listing["Bedrooms"],
"bathrooms": listing["Bathrooms"],
"neighborhood_description": listing["Neighborhood Description"],
"image_path": listing["image_path"]
}
)
for listing in self.listings
]
def store_listings(self):
"""
Stores real estate listings in ChromaDB.
"""
try:
self.vector_store.add_documents(self.documents)
print("βœ… Listings successfully stored in ChromaDB!")
except Exception as e:
print(f"❌ Error storing listings: {e}")
def format_user_prefs(self, user_prefs):
"""
Converts structured user preferences into a readable search query.
Args:
user_prefs (dict): Dictionary containing user preferences.
Returns:
str: A natural language query string for embedding.
"""
try:
return (
f"Looking for a property in {', '.join(user_prefs.get('city', []))}, {', '.join(user_prefs.get('state', []))}. "
f"House size preference: {user_prefs.get('house_size', 'any size')}. "
f"Maximum price: {user_prefs.get('max_price', '100000')}. "
f"Number of Bedrooms: {user_prefs.get('num_bedrooms', 3)}. "
f"Number of Bathrooms: {user_prefs.get('num_bathrooms', 3)}. "
f"Amenities: {', '.join(user_prefs.get('amenities', []))}. "
f"Property description: {user_prefs.get('description', 'no preference')}."
)
except Exception as e:
print(f"❌ Error formatting user preferences: {e}")
return ""
def search(self, user_prefs, k=5):
"""
Performs a similarity search based on user preferences and retrieves matching listings with images.
Args:
user_prefs (dict): Dictionary containing user search preferences.
k (int): Number of top matches to return.
Returns:
list: A list of dictionaries containing listing details and image paths.
"""
try:
# Convert user preferences into a natural language query
query = self.format_user_prefs(user_prefs)
# Generate embeddings for the query
query_embedding = self.embedding_model.embed_query(query)
if not isinstance(query_embedding, list):
raise ValueError("❌ Embedding function did not return a valid vector list.")
# Perform similarity search using the embedding
results = self.vector_store.similarity_search_by_vector(query_embedding, k=k)
# Extract relevant metadata, including image paths
listings_with_images = [
{
"description": doc.page_content,
"id": doc.metadata.get("id"),
"city": doc.metadata.get("city", "Unknown"),
"state": doc.metadata.get("state", "Unknown"),
"price": doc.metadata.get("price", "N/A"),
"bedrooms": doc.metadata.get("bedrooms", "N/A"),
"bathrooms": doc.metadata.get("bathrooms", "N/A"),
"house_size": doc.metadata.get("house_size", "N/A"),
"neighborhood": doc.metadata.get("neighborhood", "Unknown"),
"neighborhood_description": doc.metadata.get("neighborhood_description", ""),
"image_path": doc.metadata.get("image_path", "❌ No image available") # Ensure image path is included
}
for doc in results
]
return listings_with_images
except Exception as e:
print(f"❌ Error during search: {e}")
return []