search-alchemy / src /search /bm25_lexical_search.py
RobertoBarrosoLuque
Add stage 2 embeddings
385bc37
import bm25s
from typing import List, Dict
from pathlib import Path
from constants.constants import BM25_INDEX, PRODUCTS_DF
_FILE_PATH = Path(__file__).parents[2]
def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using BM25 lexical search (keyword matching).
This is Stage 1: traditional keyword-based ranking without semantic understanding.
Fast but misses semantic meaning and intent.
Args:
query: Search query string
top_k: Number of top results to return (default: 5)
Returns:
List of dictionaries containing product information and scores
"""
query_tokens = bm25s.tokenize(query, stopwords="en")
results, scores = BM25_INDEX.retrieve(query_tokens, k=top_k)
return [
{
"product_name": PRODUCTS_DF.iloc[idx]["Product Name"],
"description": PRODUCTS_DF.iloc[idx]["Description"],
"main_category": PRODUCTS_DF.iloc[idx]["MainCategory"],
"secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"],
"score": float(score),
}
for idx, score in zip(results[0], scores[0])
]