search-alchemy / src /search /vector_search.py
RobertoBarrosoLuque
Add reranking
75361de
import numpy as np
import faiss
from typing import List, Dict
from pathlib import Path
from src.fireworks.inference import get_embedding, expand_query, rerank_results
from constants.constants import FAISS_INDEX, PRODUCTS_DF
_FILE_PATH = Path(__file__).parents[2]
def search_vector(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using vector embeddings and FAISS for semantic search.
This is Stage 2: semantic search using vector embeddings to understand
query meaning and intent beyond exact keyword matching.
Args:
query: Search query string
top_k: Number of top results to return (default: 10)
Returns:
List of dictionaries containing product information and scores
"""
query_embedding = get_embedding(query)
query_vector = np.array([query_embedding], dtype=np.float32)
faiss.normalize_L2(query_vector)
faiss_index = FAISS_INDEX[0]
distances, indices = faiss_index.search(query_vector, top_k)
# Convert L2 distances to similarity scores (0-1 range)
# After normalization, L2 distance = 2 * (1 - cosine_similarity)
# So cosine_similarity = 1 - (L2_distance / 2)
similarity_scores = 1 - (distances[0] / 2)
return [
{
"product_name": PRODUCTS_DF.iloc[idx]["Product Name"],
"description": PRODUCTS_DF.iloc[idx]["Description"],
"main_category": PRODUCTS_DF.iloc[idx]["MainCategory"],
"secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"],
"score": float(score),
}
for idx, score in zip(indices[0], similarity_scores)
]
def search_vector_with_expansion(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using vector embeddings and FAISS for semantic search with query expansion.
This is Stage 3: semantic search using vector embeddings to understand
query meaning and intent beyond exact keyword matching, with query expansion.
Args:
query: Search query string
top_k: Number of top results to return (default: 10)
Returns:
List of dictionaries containing product information and scores
"""
expanded_query = expand_query(query)
print(f"Original: {query}")
print(f"Expanded: {expanded_query}")
return search_vector(expanded_query, top_k)
def search_vector_with_reranking(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using vector embeddings and FAISS for semantic search with reranking.
This is Stage 4: semantic search using vector embeddings to understand
query meaning and intent beyond exact keyword matching, with reranking.
Args:
query: Search query string
top_k: Number of top results to return (default: 10)
Returns:
List of dictionaries containing product information with preserved cosine scores
"""
results = search_vector_with_expansion(query, top_k)
cosine_scores = {r["product_name"]: r["score"] for r in results}
reranked_results = rerank_results(query=query, results=results)
for r in reranked_results:
r["score"] = cosine_scores[r["product_name"]]
return reranked_results