Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import faiss | |
| from typing import List, Dict | |
| from pathlib import Path | |
| from src.fireworks.inference import get_embedding, expand_query, rerank_results | |
| from constants.constants import FAISS_INDEX, PRODUCTS_DF | |
| _FILE_PATH = Path(__file__).parents[2] | |
| def search_vector(query: str, top_k: int = 5) -> List[Dict[str, any]]: | |
| """ | |
| Search products using vector embeddings and FAISS for semantic search. | |
| This is Stage 2: semantic search using vector embeddings to understand | |
| query meaning and intent beyond exact keyword matching. | |
| Args: | |
| query: Search query string | |
| top_k: Number of top results to return (default: 10) | |
| Returns: | |
| List of dictionaries containing product information and scores | |
| """ | |
| query_embedding = get_embedding(query) | |
| query_vector = np.array([query_embedding], dtype=np.float32) | |
| faiss.normalize_L2(query_vector) | |
| faiss_index = FAISS_INDEX[0] | |
| distances, indices = faiss_index.search(query_vector, top_k) | |
| # Convert L2 distances to similarity scores (0-1 range) | |
| # After normalization, L2 distance = 2 * (1 - cosine_similarity) | |
| # So cosine_similarity = 1 - (L2_distance / 2) | |
| similarity_scores = 1 - (distances[0] / 2) | |
| return [ | |
| { | |
| "product_name": PRODUCTS_DF.iloc[idx]["Product Name"], | |
| "description": PRODUCTS_DF.iloc[idx]["Description"], | |
| "main_category": PRODUCTS_DF.iloc[idx]["MainCategory"], | |
| "secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"], | |
| "score": float(score), | |
| } | |
| for idx, score in zip(indices[0], similarity_scores) | |
| ] | |
| def search_vector_with_expansion(query: str, top_k: int = 5) -> List[Dict[str, any]]: | |
| """ | |
| Search products using vector embeddings and FAISS for semantic search with query expansion. | |
| This is Stage 3: semantic search using vector embeddings to understand | |
| query meaning and intent beyond exact keyword matching, with query expansion. | |
| Args: | |
| query: Search query string | |
| top_k: Number of top results to return (default: 10) | |
| Returns: | |
| List of dictionaries containing product information and scores | |
| """ | |
| expanded_query = expand_query(query) | |
| print(f"Original: {query}") | |
| print(f"Expanded: {expanded_query}") | |
| return search_vector(expanded_query, top_k) | |
| def search_vector_with_reranking(query: str, top_k: int = 5) -> List[Dict[str, any]]: | |
| """ | |
| Search products using vector embeddings and FAISS for semantic search with reranking. | |
| This is Stage 4: semantic search using vector embeddings to understand | |
| query meaning and intent beyond exact keyword matching, with reranking. | |
| Args: | |
| query: Search query string | |
| top_k: Number of top results to return (default: 10) | |
| Returns: | |
| List of dictionaries containing product information with preserved cosine scores | |
| """ | |
| results = search_vector_with_expansion(query, top_k) | |
| cosine_scores = {r["product_name"]: r["score"] for r in results} | |
| reranked_results = rerank_results(query=query, results=results) | |
| for r in reranked_results: | |
| r["score"] = cosine_scores[r["product_name"]] | |
| return reranked_results | |