Spaces:
Running
Running
RobertoBarrosoLuque
commited on
Commit
·
4cba650
1
Parent(s):
15dacd4
Make queries very ambigous
Browse files- configs/prompt_library.yaml +41 -0
- src/app.py +23 -35
- src/config.py +11 -11
- src/fireworks/inference.py +53 -4
- src/search/vector_search.py +21 -6
configs/prompt_library.yaml
CHANGED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Prompt Library for Search Alchemy
|
| 2 |
+
# Contains system prompts and few-shot examples for various search enhancement stages
|
| 3 |
+
|
| 4 |
+
query_expansion:
|
| 5 |
+
system_prompt: |
|
| 6 |
+
You are a query expansion expert for e-commerce product search across categories: Toys & Games, Home & Kitchen, Clothing Shoes & Jewelry, Sports & Outdoors, and Baby Products.
|
| 7 |
+
|
| 8 |
+
Your task is to expand queries with relevant terms, synonyms, and product attributes that improve search quality.
|
| 9 |
+
|
| 10 |
+
Rules:
|
| 11 |
+
- Keep the original query terms
|
| 12 |
+
- Add 3-5 relevant synonyms, related terms, or product attributes
|
| 13 |
+
- Include age groups, materials, sizes, or usage contexts when relevant
|
| 14 |
+
- Use domain-specific terminology (STEM, montessori, organic, etc.)
|
| 15 |
+
- Return ONLY the expanded query text, no explanations
|
| 16 |
+
- Keep it concise (max 25 words)
|
| 17 |
+
|
| 18 |
+
Examples across categories:
|
| 19 |
+
|
| 20 |
+
Query: "learning toy for preschool kids"
|
| 21 |
+
Expanded: "learning educational toy preschool kids toddlers children ages 3-5 developmental montessori STEM activities play"
|
| 22 |
+
|
| 23 |
+
Query: "fun gift for child"
|
| 24 |
+
Expanded: "fun entertaining gift present toy for child kids children boy girl birthday holiday surprise"
|
| 25 |
+
|
| 26 |
+
Query: "cute nursery items"
|
| 27 |
+
Expanded: "cute nursery items baby room decor decorations accessories wall art bedding essentials infant newborn"
|
| 28 |
+
|
| 29 |
+
Query: "backyard play equipment"
|
| 30 |
+
Expanded: "backyard outdoor play equipment playground set swing slide jungle gym kids children active sports"
|
| 31 |
+
|
| 32 |
+
Query: "dress up outfit"
|
| 33 |
+
Expanded: "dress up costume outfit pretend play kids children toddler princess character role play accessories"
|
| 34 |
+
|
| 35 |
+
Query: "bedroom decoration items"
|
| 36 |
+
Expanded: "bedroom decoration items wall decor art accessories furniture bedding curtains lighting home design"
|
| 37 |
+
|
| 38 |
+
Query: "comfortable running shoes"
|
| 39 |
+
Expanded: "comfortable running shoes athletic sneakers jogging trainers cushioned lightweight breathable sports footwear"
|
| 40 |
+
|
| 41 |
+
Now expand the following query:
|
src/app.py
CHANGED
|
@@ -9,7 +9,7 @@ from config import (
|
|
| 9 |
EXAMPLE_QUERIES_BY_CATEGORY,
|
| 10 |
)
|
| 11 |
from src.search.bm25_lexical_search import search_bm25
|
| 12 |
-
from src.search.vector_search import search_vector
|
| 13 |
from src.data_prep.data_prep import load_clean_amazon_product_data
|
| 14 |
from src.constants.code_snippets import (
|
| 15 |
CODE_STAGE_1,
|
|
@@ -65,10 +65,8 @@ def format_results(results: List[Dict], stage_name: str, metrics: Dict) -> str:
|
|
| 65 |
stage_name: Name of the search stage
|
| 66 |
metrics: Dict with keys: semantic_match, diversity, latency_ms
|
| 67 |
"""
|
| 68 |
-
html_parts = [
|
| 69 |
-
|
| 70 |
-
# Performance metrics at the top with prominent styling
|
| 71 |
-
html_parts.append(
|
| 72 |
f"""
|
| 73 |
<div style="display: flex; gap: 20px; margin-bottom: 28px;">
|
| 74 |
<div class="metric-box" style="flex: 1;">
|
|
@@ -82,11 +80,13 @@ def format_results(results: List[Dict], stage_name: str, metrics: Dict) -> str:
|
|
| 82 |
<div style="color: #64748B; font-size: 0.8em; margin-top: 4px;">Response time</div>
|
| 83 |
</div>
|
| 84 |
</div>
|
| 85 |
-
"""
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
# Results section
|
| 89 |
-
html_parts.append('<div style="margin-top: 20px;">\n\n')
|
| 90 |
for idx, result in enumerate(results, 1):
|
| 91 |
category = f"{result.get('main_category', 'N/A')} > {result.get('secondary_category', 'N/A')}"
|
| 92 |
html_parts.append(
|
|
@@ -103,19 +103,22 @@ def format_results(results: List[Dict], stage_name: str, metrics: Dict) -> str:
|
|
| 103 |
return "".join(html_parts)
|
| 104 |
|
| 105 |
|
| 106 |
-
def run_search_function_and_time(query: str, func: Callable):
|
| 107 |
start = time.time()
|
| 108 |
results = func(query)
|
| 109 |
latency = int((time.time() - start) * 1000)
|
| 110 |
-
return results, latency
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
|
| 113 |
def search_stage_1(query: str) -> Tuple[str, Dict]:
|
| 114 |
"""Stage 1: Baseline BM25 keyword search."""
|
| 115 |
results, latency = run_search_function_and_time(query, search_bm25)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
semantic_match = min(1.0, avg_score / 10.0)
|
| 119 |
|
| 120 |
metrics = {
|
| 121 |
"semantic_match": semantic_match,
|
|
@@ -129,9 +132,7 @@ def search_stage_1(query: str) -> Tuple[str, Dict]:
|
|
| 129 |
def search_stage_2(query: str) -> Tuple[str, Dict]:
|
| 130 |
"""Stage 2: Vector Embeddings using FAISS."""
|
| 131 |
results, latency = run_search_function_and_time(query, search_vector)
|
| 132 |
-
|
| 133 |
-
avg_score = sum(r["score"] for r in results) / len(results) if results else 0
|
| 134 |
-
semantic_match = avg_score
|
| 135 |
|
| 136 |
metrics = {
|
| 137 |
"semantic_match": semantic_match,
|
|
@@ -139,34 +140,21 @@ def search_stage_2(query: str) -> Tuple[str, Dict]:
|
|
| 139 |
}
|
| 140 |
print(f"Searched vector embeddings for '{query}' in {latency}ms")
|
| 141 |
|
| 142 |
-
# Return top 5 for display
|
| 143 |
return format_results(results[:5], "Stage 2: Vector Embeddings", metrics), metrics
|
| 144 |
|
| 145 |
|
| 146 |
def search_stage_3(query: str) -> Tuple[str, Dict]:
|
| 147 |
-
"""Stage 3:
|
| 148 |
-
start_time = time.time()
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
{
|
| 153 |
-
"product_name": product["title"],
|
| 154 |
-
"description": product["description"],
|
| 155 |
-
"main_category": product["category"],
|
| 156 |
-
"secondary_category": "Placeholder",
|
| 157 |
-
"score": 0.78 + (idx * 0.03),
|
| 158 |
-
}
|
| 159 |
-
for idx, product in enumerate(SAMPLE_PRODUCTS[:5])
|
| 160 |
-
]
|
| 161 |
-
|
| 162 |
-
latency = int((time.time() - start_time) * 1000)
|
| 163 |
|
| 164 |
metrics = {
|
| 165 |
-
"semantic_match":
|
| 166 |
-
"latency_ms":
|
| 167 |
}
|
| 168 |
|
| 169 |
-
return format_results(results, "Stage 3:
|
| 170 |
|
| 171 |
|
| 172 |
def search_stage_4(query: str) -> Tuple[str, Dict]:
|
|
|
|
| 9 |
EXAMPLE_QUERIES_BY_CATEGORY,
|
| 10 |
)
|
| 11 |
from src.search.bm25_lexical_search import search_bm25
|
| 12 |
+
from src.search.vector_search import search_vector, search_vector_with_expansion
|
| 13 |
from src.data_prep.data_prep import load_clean_amazon_product_data
|
| 14 |
from src.constants.code_snippets import (
|
| 15 |
CODE_STAGE_1,
|
|
|
|
| 65 |
stage_name: Name of the search stage
|
| 66 |
metrics: Dict with keys: semantic_match, diversity, latency_ms
|
| 67 |
"""
|
| 68 |
+
html_parts = [
|
| 69 |
+
f"## 🔍 {stage_name}\n\n",
|
|
|
|
|
|
|
| 70 |
f"""
|
| 71 |
<div style="display: flex; gap: 20px; margin-bottom: 28px;">
|
| 72 |
<div class="metric-box" style="flex: 1;">
|
|
|
|
| 80 |
<div style="color: #64748B; font-size: 0.8em; margin-top: 4px;">Response time</div>
|
| 81 |
</div>
|
| 82 |
</div>
|
| 83 |
+
""",
|
| 84 |
+
'<div style="margin-top: 20px;">\n\n',
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
# Performance metrics at the top with prominent styling
|
| 88 |
|
| 89 |
# Results section
|
|
|
|
| 90 |
for idx, result in enumerate(results, 1):
|
| 91 |
category = f"{result.get('main_category', 'N/A')} > {result.get('secondary_category', 'N/A')}"
|
| 92 |
html_parts.append(
|
|
|
|
| 103 |
return "".join(html_parts)
|
| 104 |
|
| 105 |
|
| 106 |
+
def run_search_function_and_time(query: str, func: Callable, top_n: int = 5):
|
| 107 |
start = time.time()
|
| 108 |
results = func(query)
|
| 109 |
latency = int((time.time() - start) * 1000)
|
| 110 |
+
return results[:top_n], latency
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def get_average_score(results: List[Dict]) -> float:
|
| 114 |
+
return sum(r["score"] for r in results) / len(results) if results else 0
|
| 115 |
|
| 116 |
|
| 117 |
def search_stage_1(query: str) -> Tuple[str, Dict]:
|
| 118 |
"""Stage 1: Baseline BM25 keyword search."""
|
| 119 |
results, latency = run_search_function_and_time(query, search_bm25)
|
| 120 |
+
avg_score = get_average_score(results)
|
| 121 |
+
semantic_match = min(1.0, avg_score / len(results))
|
|
|
|
| 122 |
|
| 123 |
metrics = {
|
| 124 |
"semantic_match": semantic_match,
|
|
|
|
| 132 |
def search_stage_2(query: str) -> Tuple[str, Dict]:
|
| 133 |
"""Stage 2: Vector Embeddings using FAISS."""
|
| 134 |
results, latency = run_search_function_and_time(query, search_vector)
|
| 135 |
+
semantic_match = get_average_score(results)
|
|
|
|
|
|
|
| 136 |
|
| 137 |
metrics = {
|
| 138 |
"semantic_match": semantic_match,
|
|
|
|
| 140 |
}
|
| 141 |
print(f"Searched vector embeddings for '{query}' in {latency}ms")
|
| 142 |
|
|
|
|
| 143 |
return format_results(results[:5], "Stage 2: Vector Embeddings", metrics), metrics
|
| 144 |
|
| 145 |
|
| 146 |
def search_stage_3(query: str) -> Tuple[str, Dict]:
|
| 147 |
+
"""Stage 3: Query Expansion + Vector Embeddings."""
|
|
|
|
| 148 |
|
| 149 |
+
results, latency = run_search_function_and_time(query, search_vector_with_expansion)
|
| 150 |
+
semantic_match = get_average_score(results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
metrics = {
|
| 153 |
+
"semantic_match": semantic_match,
|
| 154 |
+
"latency_ms": latency,
|
| 155 |
}
|
| 156 |
|
| 157 |
+
return format_results(results[:5], "Stage 3: Query Expansion", metrics), metrics
|
| 158 |
|
| 159 |
|
| 160 |
def search_stage_4(query: str) -> Tuple[str, Dict]:
|
src/config.py
CHANGED
|
@@ -2,7 +2,7 @@ import gradio as gr
|
|
| 2 |
|
| 3 |
# Fireworks AI Model Configuration
|
| 4 |
EMBEDDING_MODEL = "accounts/fireworks/models/qwen3-embedding-8b"
|
| 5 |
-
LLM_MODEL = "accounts/fireworks/models/
|
| 6 |
RERANKER_MODEL = "fireworks/qwen3-reranker-8b"
|
| 7 |
|
| 8 |
GRADIO_THEME = gr.themes.Base(
|
|
@@ -187,27 +187,27 @@ summary:hover {
|
|
| 187 |
EXAMPLE_QUERIES_BY_CATEGORY = {
|
| 188 |
"Toys & Games": {
|
| 189 |
"clear": "magnetic construction building blocks educational toy",
|
| 190 |
-
"somewhat_ambiguous": "
|
| 191 |
-
"ambiguous": "
|
| 192 |
},
|
| 193 |
"Home & Kitchen": {
|
| 194 |
"clear": "kids octopus comforter bedding set full size",
|
| 195 |
-
"somewhat_ambiguous": "
|
| 196 |
-
"ambiguous": "bedroom
|
| 197 |
},
|
| 198 |
"Clothing, Shoes & Jewelry": {
|
| 199 |
"clear": "star wars stormtrooper halloween costume kids",
|
| 200 |
-
"somewhat_ambiguous": "
|
| 201 |
-
"ambiguous": "
|
| 202 |
},
|
| 203 |
"Sports & Outdoors": {
|
| 204 |
"clear": "55 inch trampoline with safety net enclosure",
|
| 205 |
-
"somewhat_ambiguous": "
|
| 206 |
-
"ambiguous": "
|
| 207 |
},
|
| 208 |
"Baby Products": {
|
| 209 |
"clear": "nursery wall decor quotes motivational stickers",
|
| 210 |
-
"somewhat_ambiguous": "
|
| 211 |
-
"ambiguous": "
|
| 212 |
},
|
| 213 |
}
|
|
|
|
| 2 |
|
| 3 |
# Fireworks AI Model Configuration
|
| 4 |
EMBEDDING_MODEL = "accounts/fireworks/models/qwen3-embedding-8b"
|
| 5 |
+
LLM_MODEL = "accounts/fireworks/models/qwen3-8b"
|
| 6 |
RERANKER_MODEL = "fireworks/qwen3-reranker-8b"
|
| 7 |
|
| 8 |
GRADIO_THEME = gr.themes.Base(
|
|
|
|
| 187 |
EXAMPLE_QUERIES_BY_CATEGORY = {
|
| 188 |
"Toys & Games": {
|
| 189 |
"clear": "magnetic construction building blocks educational toy",
|
| 190 |
+
"somewhat_ambiguous": "creative play for young children",
|
| 191 |
+
"ambiguous": "keep kids busy",
|
| 192 |
},
|
| 193 |
"Home & Kitchen": {
|
| 194 |
"clear": "kids octopus comforter bedding set full size",
|
| 195 |
+
"somewhat_ambiguous": "cozy items for child's room",
|
| 196 |
+
"ambiguous": "make bedroom nicer",
|
| 197 |
},
|
| 198 |
"Clothing, Shoes & Jewelry": {
|
| 199 |
"clear": "star wars stormtrooper halloween costume kids",
|
| 200 |
+
"somewhat_ambiguous": "pretend play clothing",
|
| 201 |
+
"ambiguous": "halloween party",
|
| 202 |
},
|
| 203 |
"Sports & Outdoors": {
|
| 204 |
"clear": "55 inch trampoline with safety net enclosure",
|
| 205 |
+
"somewhat_ambiguous": "active outdoor toy",
|
| 206 |
+
"ambiguous": "yard activity",
|
| 207 |
},
|
| 208 |
"Baby Products": {
|
| 209 |
"clear": "nursery wall decor quotes motivational stickers",
|
| 210 |
+
"somewhat_ambiguous": "baby room essentials",
|
| 211 |
+
"ambiguous": "expecting soon",
|
| 212 |
},
|
| 213 |
}
|
src/fireworks/inference.py
CHANGED
|
@@ -1,11 +1,21 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from openai import OpenAI
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from typing import List
|
| 5 |
-
from
|
|
|
|
| 6 |
|
| 7 |
load_dotenv()
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def create_client(api_key: str = None) -> OpenAI:
|
| 11 |
"""
|
|
@@ -19,10 +29,49 @@ def create_client(api_key: str = None) -> OpenAI:
|
|
| 19 |
)
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def get_embedding(text: str) -> List[float]:
|
| 23 |
"""
|
| 24 |
-
Get embedding for a given text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
-
|
| 27 |
-
response = client.embeddings.create(model=EMBEDDING_MODEL, input=text)
|
| 28 |
return response.data[0].embedding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import yaml
|
| 3 |
from openai import OpenAI
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
from typing import List
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from src.config import EMBEDDING_MODEL, LLM_MODEL
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
+
_FILE_PATH = Path(__file__).parents[2]
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_prompt_library():
|
| 15 |
+
"""Load prompts from YAML configuration."""
|
| 16 |
+
with open(_FILE_PATH / "configs" / "prompt_library.yaml", "r") as f:
|
| 17 |
+
return yaml.safe_load(f)
|
| 18 |
+
|
| 19 |
|
| 20 |
def create_client(api_key: str = None) -> OpenAI:
|
| 21 |
"""
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
|
| 32 |
+
CLIENT = create_client()
|
| 33 |
+
PROMPT_LIBRARY = load_prompt_library()
|
| 34 |
+
|
| 35 |
+
|
| 36 |
def get_embedding(text: str) -> List[float]:
|
| 37 |
"""
|
| 38 |
+
Get embedding for a given text using Fireworks AI embedding model.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
text: Input text to embed
|
| 42 |
+
|
| 43 |
+
Returns:
|
| 44 |
+
List of float values representing the embedding vector
|
| 45 |
"""
|
| 46 |
+
response = CLIENT.embeddings.create(model=EMBEDDING_MODEL, input=text)
|
|
|
|
| 47 |
return response.data[0].embedding
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def expand_query(query: str) -> str:
|
| 51 |
+
"""
|
| 52 |
+
Expand a search query using LLM with few-shot prompting.
|
| 53 |
+
|
| 54 |
+
Takes a user's search query and expands it with relevant terms, synonyms,
|
| 55 |
+
and related concepts to improve search recall and relevance.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
query: Original search query
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Expanded query string with additional relevant terms
|
| 62 |
+
"""
|
| 63 |
+
system_prompt = PROMPT_LIBRARY["query_expansion"]["system_prompt"]
|
| 64 |
+
|
| 65 |
+
response = CLIENT.chat.completions.create(
|
| 66 |
+
model=LLM_MODEL,
|
| 67 |
+
messages=[
|
| 68 |
+
{"role": "system", "content": system_prompt},
|
| 69 |
+
{"role": "user", "content": query},
|
| 70 |
+
],
|
| 71 |
+
temperature=0.3,
|
| 72 |
+
max_tokens=100,
|
| 73 |
+
reasoning_effort="none",
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
expanded = response.choices[0].message.content.strip()
|
| 77 |
+
return expanded
|
src/search/vector_search.py
CHANGED
|
@@ -2,7 +2,7 @@ import numpy as np
|
|
| 2 |
import faiss
|
| 3 |
from typing import List, Dict
|
| 4 |
from pathlib import Path
|
| 5 |
-
from src.fireworks.inference import get_embedding
|
| 6 |
from constants.constants import FAISS_INDEX, PRODUCTS_DF
|
| 7 |
|
| 8 |
_FILE_PATH = Path(__file__).parents[2]
|
|
@@ -25,13 +25,8 @@ def search_vector(query: str, top_k: int = 10) -> List[Dict[str, any]]:
|
|
| 25 |
query_embedding = get_embedding(query)
|
| 26 |
query_vector = np.array([query_embedding], dtype=np.float32)
|
| 27 |
|
| 28 |
-
# Normalize query vector for cosine similarity
|
| 29 |
faiss.normalize_L2(query_vector)
|
| 30 |
-
|
| 31 |
-
# Unpack FAISS index tuple (index, embeddings)
|
| 32 |
faiss_index = FAISS_INDEX[0]
|
| 33 |
-
|
| 34 |
-
# Search FAISS index
|
| 35 |
distances, indices = faiss_index.search(query_vector, top_k)
|
| 36 |
|
| 37 |
# Convert L2 distances to similarity scores (0-1 range)
|
|
@@ -49,3 +44,23 @@ def search_vector(query: str, top_k: int = 10) -> List[Dict[str, any]]:
|
|
| 49 |
}
|
| 50 |
for idx, score in zip(indices[0], similarity_scores)
|
| 51 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import faiss
|
| 3 |
from typing import List, Dict
|
| 4 |
from pathlib import Path
|
| 5 |
+
from src.fireworks.inference import get_embedding, expand_query
|
| 6 |
from constants.constants import FAISS_INDEX, PRODUCTS_DF
|
| 7 |
|
| 8 |
_FILE_PATH = Path(__file__).parents[2]
|
|
|
|
| 25 |
query_embedding = get_embedding(query)
|
| 26 |
query_vector = np.array([query_embedding], dtype=np.float32)
|
| 27 |
|
|
|
|
| 28 |
faiss.normalize_L2(query_vector)
|
|
|
|
|
|
|
| 29 |
faiss_index = FAISS_INDEX[0]
|
|
|
|
|
|
|
| 30 |
distances, indices = faiss_index.search(query_vector, top_k)
|
| 31 |
|
| 32 |
# Convert L2 distances to similarity scores (0-1 range)
|
|
|
|
| 44 |
}
|
| 45 |
for idx, score in zip(indices[0], similarity_scores)
|
| 46 |
]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def search_vector_with_expansion(query: str, top_k: int = 10) -> List[Dict[str, any]]:
|
| 50 |
+
"""
|
| 51 |
+
Search products using vector embeddings and FAISS for semantic search with query expansion.
|
| 52 |
+
|
| 53 |
+
This is Stage 3: semantic search using vector embeddings to understand
|
| 54 |
+
query meaning and intent beyond exact keyword matching, with query expansion.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
query: Search query string
|
| 58 |
+
top_k: Number of top results to return (default: 10)
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
List of dictionaries containing product information and scores
|
| 62 |
+
"""
|
| 63 |
+
expanded_query = expand_query(query)
|
| 64 |
+
print(f"Original: {query}")
|
| 65 |
+
print(f"Expanded: {expanded_query}")
|
| 66 |
+
return search_vector(expanded_query, top_k)
|