traning / searchHybridAsync.py
aikobay's picture
Update searchHybridAsync.py
9760f3f verified
raw
history blame contribute delete
32.7 kB
import os
import torch
import pandas as pd
import logging
import faiss
import numpy as np
import time
import gensim
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
from datasets import load_dataset
from huggingface_hub import login, hf_hub_download, HfApi, create_repo
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer
from joblib import Parallel, delayed
from tqdm import tqdm
import tempfile
import re
import sys
import asyncio
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
# โœ… ๋กœ๊ทธ ์„ค์ •
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# โœ… ์Šค๋ ˆ๋“œ ํ’€ ์„ค์ • (๋น„๋™๊ธฐ ์ž‘์—…์„ ์œ„ํ•œ)
thread_pool = ThreadPoolExecutor(max_workers=os.cpu_count() or 4)
# โœ… FastAPI ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
app = FastAPI(title="๐Ÿš€ KeyBERT + Word2Vec ๊ธฐ๋ฐ˜ FAISS ๊ฒ€์ƒ‰ API", version="1.2")
# โœ… GPU ์‚ฌ์šฉ ์—ฌ๋ถ€ ํ™•์ธ
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"๐Ÿš€ ์‹คํ–‰ ๋””๋ฐ”์ด์Šค: {device.upper()}")
# โœ… Hugging Face ๋กœ๊ทธ์ธ
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
if HF_API_TOKEN:
logger.info("๐Ÿ”‘ Hugging Face API ๋กœ๊ทธ์ธ ์ค‘...")
login(token=HF_API_TOKEN)
else:
logger.error("โŒ HF_API_TOKEN์ด ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์ผ๋ถ€ ๊ธฐ๋Šฅ์ด ์ œํ•œ๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
# โœ… Word2Vec ๋ชจ๋ธ ๋กœ๋“œ
word2vec_model = None
try:
logger.info("๐Ÿ”„ Word2Vec ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
MODEL_REPO = "aikobay/item-model"
model_path = hf_hub_download(repo_id=MODEL_REPO, filename="item_vectors.bin", repo_type="dataset")
word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
logger.info(f"โœ… Word2Vec ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ! ๋‹จ์–ด ์ˆ˜: {len(word2vec_model.key_to_index)}")
except Exception as e:
logger.error(f"โŒ Word2Vec ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
# โœ… KeyBERT ๋ชจ๋ธ ๋กœ๋“œ
logger.info("๐Ÿ”„ KeyBERT ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
kw_model = KeyBERT("paraphrase-multilingual-MiniLM-L12-v2")
original_embedding_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
logger.info("โœ… KeyBERT ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ!")
# โœ… ํ•œ๊ตญ์–ด ํŠนํ™” ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ๋กœ ๊ต์ฒด
embedding_model = None
try:
logger.info("๐Ÿ”„ ํ•œ๊ตญ์–ด ํŠนํ™” ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ๋กœ ๊ต์ฒด ์‹œ๋„...")
# ํ•œ๊ตญ์–ด ํŠนํ™” ๋ชจ๋ธ ๋กœ๋“œ ์‹œ๋„ (์‹คํŒจ์‹œ ๊ธฐ์กด ๋ชจ๋ธ ์œ ์ง€)
embedding_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
logger.info("โœ… ํ•œ๊ตญ์–ด ํŠนํ™” ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ!")
except Exception as e:
logger.warning(f"โš ๏ธ ํ•œ๊ตญ์–ด ํŠนํ™” ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ, ๊ธฐ์กด ๋ชจ๋ธ ์œ ์ง€: {e}")
embedding_model = original_embedding_model
# โœ… ์ง„ํ–‰ ์ค‘์ธ ๊ฒฝ๋งค ์ƒํ’ˆ ๋ฐ์ดํ„ฐ ๋กœ๋“œ
async def load_huggingface_jsonl(dataset_name, split="train"):
"""Hugging Face Hub์—์„œ ๋ฐ์ดํ„ฐ์…‹ ๋น„๋™๊ธฐ ๋กœ๋“œ"""
try:
# ์Šค๋ ˆ๋“œ ํ’€์—์„œ ์‹คํ–‰ํ•˜์—ฌ ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ
loop = asyncio.get_event_loop()
def _load_dataset():
repo_id = f"aikobay/{dataset_name}"
dataset = load_dataset(repo_id, split=split)
return dataset.to_pandas().dropna()
# ์Šค๋ ˆ๋“œ ํ’€์—์„œ ๋น„๋™๊ธฐ๋กœ ์‹คํ–‰
df = await loop.run_in_executor(thread_pool, _load_dataset)
return df
except Exception as e:
logger.error(f"โŒ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
return pd.DataFrame()
# ์ดˆ๊ธฐ ๋ฐ์ดํ„ฐ ๋กœ๋“œ - ๋น„๋™๊ธฐ ํ•จ์ˆ˜๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ํ˜ธ์ถœํ•˜์—ฌ ์‹œ์ž‘ ์‹œ ๋กœ๋“œ
active_sale_items = None
try:
# ๋น„๋™๊ธฐ ํ•จ์ˆ˜๋ฅผ ์‹œ์ž‘ ์‹œ ์‹คํ–‰ํ•˜๊ธฐ ์œ„ํ•œ ์ž„์‹œ ์ด๋ฒคํŠธ ๋ฃจํ”„ ์‚ฌ์šฉ
loop = asyncio.new_event_loop()
active_sale_items = loop.run_until_complete(load_huggingface_jsonl("initial_saleitem_dataset"))
loop.close()
if active_sale_items.empty:
logger.error("โŒ ๋ฐ์ดํ„ฐ์…‹์ด ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.")
exit(1)
logger.info(f"โœ… ๊ฒฝ๋งค ์ƒํ’ˆ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ! ์ด {len(active_sale_items)}๊ฐœ ์ƒํ’ˆ")
except Exception as e:
logger.error(f"โŒ ์ƒํ’ˆ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์‹คํŒจ: {e}")
exit(1)
# โœ… FAISS ์ธ๋ฑ์Šค ์ดˆ๊ธฐํ™”
faiss_index = None
indexed_items = []
# โœ… ๋ฉ€ํ‹ฐ์ฝ”์–ด ๋ฒกํ„ฐํ™” ํ•จ์ˆ˜
async def encode_texts_parallel(texts, batch_size=512):
"""๋ฉ€ํ‹ฐ ํ”„๋กœ์„ธ์‹ฑ์„ ํ™œ์šฉํ•œ ๋ฒกํ„ฐํ™” ์†๋„ ์ตœ์ ํ™” (๋น„๋™๊ธฐ ์ง€์›)"""
num_cores = os.cpu_count() # CPU ๊ฐœ์ˆ˜ ํ™•์ธ
logger.info(f"๐Ÿ”„ ๋ฉ€ํ‹ฐ์ฝ”์–ด ๋ฒกํ„ฐํ™” ์ง„ํ–‰ (์ฝ”์–ด ์ˆ˜: {num_cores})")
def encode_batch(batch):
return embedding_model.encode(batch, convert_to_numpy=True)
# ๋ฐฐ์น˜ ๋‹จ์œ„๋กœ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ
text_batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]
# ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ๋ฅผ ๋น„๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
loop = asyncio.get_event_loop()
embeddings = await loop.run_in_executor(
thread_pool,
lambda: Parallel(n_jobs=num_cores)(delayed(encode_batch)(batch) for batch in text_batches)
)
return np.vstack(embeddings).astype("float32")
# โœ… FAISS ์ธ๋ฑ์Šค ์ €์žฅ ํ•จ์ˆ˜ (Hugging Face Hub)
async def save_faiss_index():
"""FAISS ์ธ๋ฑ์Šค๋ฅผ Hugging Face Hub์— ์ €์žฅ (๋น„๋™๊ธฐ ์ง€์›)"""
global faiss_index, indexed_items
if faiss_index is None or not indexed_items:
logger.error("โŒ ์ €์žฅํ•  FAISS ์ธ๋ฑ์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
return False
try:
# ๋ ˆํฌ์ง€ํ† ๋ฆฌ ID
repo_id = os.getenv("HF_INDEX_REPO", "aikobay/saleitem_faiss_index")
# ๋น„๋™๊ธฐ ์ž‘์—…์„ ์œ„ํ•œ ๋ฃจํ”„
loop = asyncio.get_event_loop()
# ๋น„๋™๊ธฐ ์ž‘์—…์œผ๋กœ ์‹คํ–‰
def _save_index():
# HfApi ๊ฐ์ฒด ์ƒ์„ฑ
api = HfApi()
# ๋ ˆํฌ์ง€ํ† ๋ฆฌ ์กด์žฌ ์—ฌ๋ถ€ ํ™•์ธ ๋ฐ ์ƒ์„ฑ
try:
api.repo_info(repo_id=repo_id, repo_type="dataset")
logger.info(f"โœ… ๊ธฐ์กด ๋ ˆํฌ์ง€ํ† ๋ฆฌ ์‚ฌ์šฉ: {repo_id}")
except Exception:
logger.info(f"๐Ÿ”„ ๋ ˆํฌ์ง€ํ† ๋ฆฌ๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์•„ ์ƒˆ๋กœ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค: {repo_id}")
create_repo(
repo_id=repo_id,
repo_type="dataset",
private=True,
exist_ok=True
)
logger.info(f"โœ… ๋ ˆํฌ์ง€ํ† ๋ฆฌ ์ƒ์„ฑ ์™„๋ฃŒ: {repo_id}")
# ์ž„์‹œ ํŒŒ์ผ๋กœ ๋จผ์ € ๋กœ์ปฌ์— ์ €์žฅ
with tempfile.TemporaryDirectory() as temp_dir:
index_path = os.path.join(temp_dir, "faiss_index.bin")
items_path = os.path.join(temp_dir, "indexed_items.txt")
# FAISS ์ธ๋ฑ์Šค ์ €์žฅ
faiss.write_index(faiss_index, index_path)
# ์•„์ดํ…œ ๋ชฉ๋ก ์ €์žฅ
with open(items_path, "w", encoding="utf-8") as f:
f.write("\n".join(indexed_items))
# README ํŒŒ์ผ ์ƒ์„ฑ
readme_path = os.path.join(temp_dir, "README.md")
with open(readme_path, "w", encoding="utf-8") as f:
f.write(f"""# FAISS ์ธ๋ฑ์Šค ์ €์žฅ์†Œ
์ด ์ €์žฅ์†Œ๋Š” ์ƒํ’ˆ ๊ฒ€์ƒ‰์„ ์œ„ํ•œ FAISS ์ธ๋ฑ์Šค์™€ ๊ด€๋ จ ๋ฐ์ดํ„ฐ๋ฅผ ํฌํ•จํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
- ์ตœ์ข… ์—…๋ฐ์ดํŠธ: {pd.Timestamp.now()}
- ์ธ๋ฑ์Šค ํ•ญ๋ชฉ ์ˆ˜: {len(indexed_items)}
- ๋ชจ๋ธ: KeyBERT + Word2Vec
์ด ์ €์žฅ์†Œ๋Š” 'aikobay/initial_saleitem_dataset'์˜ ์ƒํ’ˆ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์ƒ์„ฑ๋œ ๋ฒกํ„ฐ ์ธ๋ฑ์Šค๋ฅผ ์ €์žฅํ•˜๊ธฐ ์œ„ํ•ด ์ž๋™ ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.
""")
# ํŒŒ์ผ ์—…๋กœ๋“œ
for file_path, file_name in [
(index_path, "faiss_index.bin"),
(items_path, "indexed_items.txt"),
(readme_path, "README.md")
]:
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=file_name,
repo_id=repo_id,
repo_type="dataset"
)
logger.info(f"โœ… FAISS ์ธ๋ฑ์Šค๊ฐ€ Hugging Face Hub์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ๋ ˆํฌ: {repo_id}")
return True
# ์Šค๋ ˆ๋“œ ํ’€์—์„œ ๋น„๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
result = await loop.run_in_executor(thread_pool, _save_index)
return result
except Exception as e:
logger.error(f"โŒ FAISS ์ธ๋ฑ์Šค Hub ์ €์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
# ๋กœ์ปฌ์— ๋ฐฑ์—… ์ €์žฅ ์‹œ๋„
try:
loop = asyncio.get_event_loop()
def _local_backup():
local_path = os.path.join(os.getcwd(), "faiss_index.bin")
faiss.write_index(faiss_index, local_path)
with open("indexed_items.txt", "w", encoding="utf-8") as f:
f.write("\n".join(indexed_items))
logger.info(f"โœ… FAISS ์ธ๋ฑ์Šค๊ฐ€ ๋กœ์ปฌ์— ๋ฐฑ์—… ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {local_path}")
return True
result = await loop.run_in_executor(thread_pool, _local_backup)
return result
except Exception as local_err:
logger.error(f"โŒ ๋กœ์ปฌ ๋ฐฑ์—… ์ €์žฅ๋„ ์‹คํŒจ: {local_err}")
return False
# โœ… FAISS ์ธ๋ฑ์Šค ๋กœ๋“œ ํ•จ์ˆ˜ (Hugging Face Hub)
async def load_faiss_index():
"""Hugging Face Hub์—์„œ FAISS ์ธ๋ฑ์Šค๋ฅผ ๋กœ๋“œ (๋น„๋™๊ธฐ ์ง€์›)"""
global faiss_index, indexed_items
# ๋ ˆํฌ์ง€ํ† ๋ฆฌ ID
repo_id = os.getenv("HF_INDEX_REPO", "aikobay/saleitem_faiss_index")
try:
# ๋น„๋™๊ธฐ ์ž‘์—…์„ ์œ„ํ•œ ๋ฃจํ”„
loop = asyncio.get_event_loop()
# ๋น„๋™๊ธฐ ์ž‘์—…์œผ๋กœ ์‹คํ–‰
def _load_index():
# ๋ ˆํฌ์ง€ํ† ๋ฆฌ ์กด์žฌ ํ™•์ธ
api = HfApi()
try:
api.repo_info(repo_id=repo_id, repo_type="dataset")
logger.info(f"โœ… FAISS ์ธ๋ฑ์Šค ๋ ˆํฌ์ง€ํ† ๋ฆฌ ํ™•์ธ: {repo_id}")
except Exception as repo_err:
logger.warning(f"โš ๏ธ ๋ ˆํฌ์ง€ํ† ๋ฆฌ๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค: {repo_err}")
raise FileNotFoundError("Hub ๋ ˆํฌ์ง€ํ† ๋ฆฌ๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค")
# Hub์—์„œ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
index_path = hf_hub_download(
repo_id=repo_id,
filename="faiss_index.bin",
repo_type="dataset"
)
items_path = hf_hub_download(
repo_id=repo_id,
filename="indexed_items.txt",
repo_type="dataset"
)
# ํŒŒ์ผ ๋กœ๋“œ
loaded_index = faiss.read_index(index_path)
with open(items_path, "r", encoding="utf-8") as f:
loaded_items = f.read().splitlines()
return loaded_index, loaded_items
# ์Šค๋ ˆ๋“œ ํ’€์—์„œ ๋น„๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
loaded_index, loaded_items = await loop.run_in_executor(thread_pool, _load_index)
# ์ „์—ญ ๋ณ€์ˆ˜์— ํ• ๋‹น
faiss_index = loaded_index
indexed_items = loaded_items
logger.info(f"โœ… FAISS ์ธ๋ฑ์Šค๊ฐ€ Hub์—์„œ ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ์ด {len(indexed_items)}๊ฐœ ์ƒํ’ˆ")
return True
except Exception as e:
logger.warning(f"โš ๏ธ Hub์—์„œ FAISS ์ธ๋ฑ์Šค ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
# ๋กœ์ปฌ ํŒŒ์ผ ํ™•์ธ
try:
loop = asyncio.get_event_loop()
def _load_local():
local_index_path = "faiss_index.bin"
local_items_path = "indexed_items.txt"
if os.path.exists(local_index_path) and os.path.exists(local_items_path):
loaded_index = faiss.read_index(local_index_path)
with open(local_items_path, "r", encoding="utf-8") as f:
loaded_items = f.read().splitlines()
return loaded_index, loaded_items
else:
logger.warning("โš ๏ธ ๋กœ์ปฌ FAISS ์ธ๋ฑ์Šค ํŒŒ์ผ์ด ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
return None, None
# ์Šค๋ ˆ๋“œ ํ’€์—์„œ ๋น„๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
result = await loop.run_in_executor(thread_pool, _load_local)
if result[0] is not None:
faiss_index, indexed_items = result
logger.info(f"โœ… ๋กœ์ปฌ FAISS ์ธ๋ฑ์Šค ๋กœ๋“œ ์„ฑ๊ณต. ์ด {len(indexed_items)}๊ฐœ ์ƒํ’ˆ")
return True
else:
return False
except Exception as local_err:
logger.error(f"โŒ ๋กœ์ปฌ FAISS ์ธ๋ฑ์Šค ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜: {local_err}")
return False
# โœ… FAISS ์ธ๋ฑ์Šค ๊ตฌ์ถ•
async def rebuild_faiss_index():
"""FAISS ์ธ๋ฑ์Šค๋ฅผ ์ƒˆ๋กญ๊ฒŒ ๊ตฌ์ถ• (๋น„๋™๊ธฐ ์ง€์›)"""
global faiss_index, indexed_items, active_sale_items
logger.info("๐Ÿ”„ FAISS ์ธ๋ฑ์Šค๋ฅผ ์žฌ๊ตฌ์ถ• ์ค‘...")
# ์ตœ์‹  ์ƒํ’ˆ ๋ฐ์ดํ„ฐ ๋กœ๋“œ
active_sale_items = await load_huggingface_jsonl("initial_saleitem_dataset")
if active_sale_items.empty:
logger.error("โŒ ์ƒํ’ˆ ๋ฐ์ดํ„ฐ๋ฅผ ๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
raise RuntimeError("์ƒํ’ˆ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์‹คํŒจ")
# ์ƒํ’ˆ๋ช… ๋ชฉ๋ก ์ถ”์ถœ
item_names = active_sale_items["ITEMNAME"].tolist()
indexed_items = item_names
logger.info(f"๐Ÿ”น ์ด {len(item_names)}๊ฐœ ์ƒํ’ˆ ๋ฒกํ„ฐํ™” ์‹œ์ž‘...")
# ๋ฒกํ„ฐํ™” ๋ฐ ์ธ๋ฑ์Šค ๊ตฌ์ถ• - ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ์‚ฌ์šฉ
item_vectors = await encode_texts_parallel(item_names)
# ๋ฒกํ„ฐ ์ •๊ทœํ™” (์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„๋ฅผ ์œ„ํ•ด)
norms = np.linalg.norm(item_vectors, axis=1, keepdims=True)
normalized_vectors = item_vectors / norms
# Inner Product ๊ธฐ๋ฐ˜ ์ธ๋ฑ์Šค ์‚ฌ์šฉ (์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„๋ฅผ ์œ„ํ•ด)
loop = asyncio.get_event_loop()
def _build_index():
index = faiss.IndexFlatIP(item_vectors.shape[1])
index.add(normalized_vectors)
return index
faiss_index = await loop.run_in_executor(thread_pool, _build_index)
logger.info(f"โœ… FAISS ์ธ๋ฑ์Šค ๊ตฌ์ถ• ์™„๋ฃŒ! ์ด {len(indexed_items)}๊ฐœ ํ•ญ๋ชฉ.")
# ๊ตฌ์ถ• ํ›„ Hub์— ์ €์žฅ
await save_faiss_index()
return True
# โœ… FAISS ์ธ๋ฑ์Šค ์ƒํƒœ ํ™•์ธ ๋ฐ ํ•„์š”์‹œ์—๋งŒ ๊ตฌ์ถ•
async def check_faiss_index():
"""FAISS ์ธ๋ฑ์Šค๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•˜๊ณ  ์—†์œผ๋ฉด ๊ตฌ์ถ• (๋น„๋™๊ธฐ ์ง€์›)"""
global faiss_index
if faiss_index is None:
# Hub์—์„œ ๋กœ๋“œ ์‹œ๋„
if not await load_faiss_index():
# ๋กœ๋“œ ์‹คํŒจ ์‹œ ์ƒˆ๋กœ ๊ตฌ์ถ•
logger.warning("โš ๏ธ ์ €์žฅ๋œ ์ธ๋ฑ์Šค๊ฐ€ ์—†์–ด ์ƒˆ๋กœ ๊ตฌ์ถ•ํ•ฉ๋‹ˆ๋‹ค.")
await rebuild_faiss_index()
# ๋ชจ๋“  ๊ณผ์ • ํ›„์—๋„ ์ธ๋ฑ์Šค๊ฐ€ None์ด๋ฉด ์˜ค๋ฅ˜
if faiss_index is None:
raise RuntimeError("FAISS ์ธ๋ฑ์Šค ์ดˆ๊ธฐํ™”์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
# โœ… KeyBERT ๊ธฐ๋ฐ˜ ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ ์ถ”์ถœ
async def extract_keywords(query: str, top_n: int = 3):
"""KeyBERT๋ฅผ ์‚ฌ์šฉํ•œ ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ ์ถ”์ถœ (๋น„๋™๊ธฐ ์ง€์›)"""
loop = asyncio.get_event_loop()
def _extract():
return kw_model.extract_keywords(query, keyphrase_ngram_range=(1,2), top_n=top_n)
keywords = await loop.run_in_executor(thread_pool, _extract)
return [k[0] for k in keywords]
# โœ… Word2Vec ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ํ™•์žฅ ํ•จ์ˆ˜
async def expand_keywords_with_word2vec(keywords: list, max_new=5):
"""Word2Vec ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ‚ค์›Œ๋“œ ํ™•์žฅ (๋น„๋™๊ธฐ ์ง€์›)"""
if word2vec_model is None:
logger.warning("โš ๏ธ Word2Vec ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•„ ํ™•์žฅ์„ ์ˆ˜ํ–‰ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
return keywords
expanded_keywords = list(keywords) # ๋ณต์‚ฌ๋ณธ ์ƒ์„ฑ
try:
loop = asyncio.get_event_loop()
def _expand():
result = list(keywords)
for keyword in keywords:
# ๋‹จ์–ด๊ฐ€ ๋ชจ๋ธ์— ์žˆ๋Š”์ง€ ํ™•์ธ
if keyword in word2vec_model:
# ์œ ์‚ฌ ๋‹จ์–ด ์ฐพ๊ธฐ
similar_words = word2vec_model.most_similar(keyword, topn=max_new)
result.extend([word for word, _ in similar_words])
elif len(keyword.split()) > 1:
# ๋ณตํ•ฉ์–ด์ธ ๊ฒฝ์šฐ ๊ฐœ๋ณ„ ๋‹จ์–ด๋กœ ์‹œ๋„
for word in keyword.split():
if word in word2vec_model and len(word) > 1:
similar_words = word2vec_model.most_similar(word, topn=2)
result.extend([w for w, _ in similar_words])
# ์ค‘๋ณต ์ œ๊ฑฐ
return list(set(result))
expanded_keywords = await loop.run_in_executor(thread_pool, _expand)
logger.info(f"๐Ÿ” Word2Vec ํ™•์žฅ ํ‚ค์›Œ๋“œ: {expanded_keywords}")
return expanded_keywords
except Exception as e:
logger.error(f"โŒ Word2Vec ํ‚ค์›Œ๋“œ ํ™•์žฅ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
return keywords
# โœ… FAISS ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
async def search_faiss_with_keywords(query: str, top_k: int = 5, keywords=None, expanded_keywords=None):
"""ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ FAISS ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ (๋น„๋™๊ธฐ + ๋ฐฐ์น˜ ์ธ์ฝ”๋”ฉ ์ ์šฉ)"""
await check_faiss_index()
start_time = time.time()
# ํ‚ค์›Œ๋“œ ์ถ”์ถœ
if keywords is None:
keywords = await extract_keywords(query)
logger.info(f"๐Ÿ” KeyBERT ์ถ”์ถœ ํ‚ค์›Œ๋“œ: {keywords}")
# ํ‚ค์›Œ๋“œ ํ™•์žฅ
if expanded_keywords is None:
expanded_keywords = await expand_keywords_with_word2vec(keywords)
loop = asyncio.get_event_loop()
# โœ… ์›๋ณธ ์ฟผ๋ฆฌ + ํ™•์žฅ ํ‚ค์›Œ๋“œ ๋ชจ๋‘ ํ•œ ๋ฒˆ์— ๋ฐฐ์น˜ ์ธ์ฝ”๋”ฉ
texts_to_encode = [query] + expanded_keywords
def _encode_batch():
vectors = embedding_model.encode(texts_to_encode, convert_to_numpy=True)
norms = np.linalg.norm(vectors, axis=1, keepdims=True)
return (vectors / norms).astype("float32")
all_vectors = await loop.run_in_executor(thread_pool, _encode_batch)
query_vector = np.array([all_vectors[0]])
keyword_vectors = all_vectors[1:] # ๋‚˜๋จธ์ง€๋Š” ํ™•์žฅ ํ‚ค์›Œ๋“œ์šฉ
# โœ… ์›๋ณธ ์ฟผ๋ฆฌ FAISS ๊ฒ€์ƒ‰
def _search_query():
return faiss_index.search(query_vector, top_k * 2)
distances, query_indices = await loop.run_in_executor(thread_pool, _search_query)
scored_results = {}
for i, dist in zip(query_indices[0], distances[0]):
if i < len(indexed_items):
item_name = indexed_items[i]
scored_results[item_name] = 2.0 * dist # ์ฟผ๋ฆฌ๋Š” ๊ฐ€์ค‘์น˜ 2๋ฐฐ
# โœ… ํ™•์žฅ ํ‚ค์›Œ๋“œ ๋ฒกํ„ฐ๋“ค์— ๋Œ€ํ•ด ๋ฐฐ์น˜ ๊ฒ€์ƒ‰
"""๋ฐฐ์น˜์ฒ˜๋ฆฌ ์ˆ˜์ •
async def batch_keyword_search(vectors):
tasks = []
for vec in vectors:
keyword_vector = np.array([vec])
def _search():
return faiss_index.search(keyword_vector, top_k)
tasks.append(loop.run_in_executor(thread_pool, _search))
return await asyncio.gather(*tasks)
keyword_results = await batch_keyword_search(keyword_vectors)
"""
def _batch_search_faiss():
# ๋นˆ ๋ฐฐ์—ด ์ฒดํฌ
if len(keyword_vectors) == 0:
return []
# ๋ชจ๋“  ํ‚ค์›Œ๋“œ ๋ฒกํ„ฐ๋ฅผ ํ•˜๋‚˜์˜ ๋ฐฐ์น˜๋กœ ๊ฒฐํ•ฉ (N๊ฐœ ๋ฒกํ„ฐ x D ์ฐจ์›)
batch_vectors = np.vstack(keyword_vectors)
# ํ•œ ๋ฒˆ์— ๋ฐฐ์น˜ ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
distances, indices = faiss_index.search(batch_vectors, top_k)
# ๋ฒกํ„ฐ๋ณ„ ๊ฒฐ๊ณผ ๋ถ„๋ฆฌํ•˜์—ฌ ๋ฐ˜ํ™˜
return [(distances[i], indices[i]) for i in range(len(keyword_vectors))]
# ์Šค๋ ˆ๋“œ ํ’€์—์„œ ๋ฐฐ์น˜ ๊ฒ€์ƒ‰ ์‹คํ–‰
keyword_results = await loop.run_in_executor(thread_pool, _batch_search_faiss)
# โœ… ์ ์ˆ˜ ๋ˆ„์  ์ฒ˜๋ฆฌ
""" ๋ฐฐ์น˜์ฒ˜๋ฆฌ ์ˆ˜์ •
for result in keyword_results:
k_distances, k_indices = result
for i, dist in zip(k_indices[0], k_distances[0]):
if i < len(indexed_items):
item_name = indexed_items[i]
if item_name in scored_results:
scored_results[item_name] += 0.5 * dist
else:
scored_results[item_name] = 0.5 * dist
"""
# โœ… ์ ์ˆ˜ ๋ˆ„์  ์ฒ˜๋ฆฌ
for k_distances, k_indices in keyword_results:
for i, dist in zip(k_indices, k_distances):
if i < len(indexed_items):
item_name = indexed_items[i]
if item_name in scored_results:
scored_results[item_name] += 0.5 * dist
else:
scored_results[item_name] = 0.5 * dist
# โœ… ์ ์ˆ˜ ์ •๋ ฌ ๋ฐ ํ•„ํ„ฐ๋ง
sorted_results = sorted(scored_results.items(), key=lambda x: x[1], reverse=True)
recommendations = []
min_score_threshold = 0.3
for item_name, score in sorted_results:
if score < min_score_threshold:
continue
try:
item_seq = active_sale_items.loc[active_sale_items["ITEMNAME"] == item_name, "ITEMSEQ"].values[0]
recommendations.append({"ITEMSEQ": item_seq, "ITEMNAME": item_name, "score": float(score)})
except Exception:
continue
# โœ… ์ง์ ‘ ๋งค์นญ ๋ณด์™„
if len(recommendations) < top_k:
def _find_direct_matches():
matches = []
for item_name in indexed_items:
if query.lower() in item_name.lower():
try:
item_seq = active_sale_items.loc[active_sale_items["ITEMNAME"] == item_name, "ITEMSEQ"].values[0]
if not any(r["ITEMNAME"] == item_name for r in recommendations):
matches.append({"ITEMSEQ": item_seq, "ITEMNAME": item_name, "score": 1.0})
except:
continue
return matches
direct_matches = await loop.run_in_executor(thread_pool, _find_direct_matches)
recommendations.extend(direct_matches)
logger.info(f"๐Ÿ” ๊ฒ€์ƒ‰ ์™„๋ฃŒ | ๊ฑธ๋ฆฐ ์‹œ๊ฐ„: {time.time() - start_time:.4f}์ดˆ | ๊ฒฐ๊ณผ ์ˆ˜: {len(recommendations)}")
return recommendations[:top_k]
# โœ… API ์š”์ฒญ ๋ชจ๋ธ
class RecommendRequest(BaseModel):
search_query: str
top_k: int = 5
use_expansion: bool = True # ํ‚ค์›Œ๋“œ ํ™•์žฅ ์‚ฌ์šฉ ์—ฌ๋ถ€
# โœ… ์ถ”์ฒœ API ์—”๋“œํฌ์ธํŠธ
@app.post("/api/recommend")
async def recommend(request: RecommendRequest, background_tasks: BackgroundTasks):
"""Word2Vec ๊ธฐ๋ฐ˜ FAISS ๊ฒ€์ƒ‰/์ถ”์ฒœ API (๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ)"""
try:
# ๋กœ๊ทธ์— ์š”์ฒญ ์ •๋ณด ๊ธฐ๋ก
logger.info(f"๐Ÿ“ ๊ฒ€์ƒ‰ ์š”์ฒญ: '{request.search_query}' (top_k: {request.top_k}, ํ™•์žฅ: {request.use_expansion})")
# ํ‚ค์›Œ๋“œ ์ถ”์ถœ (๋น„๋™๊ธฐ)
keywords = await extract_keywords(request.search_query)
# ํ‚ค์›Œ๋“œ ํ™•์žฅ ์‚ฌ์šฉ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ์ฒ˜๋ฆฌ (๋น„๋™๊ธฐ)
if request.use_expansion and word2vec_model is not None:
expanded_keywords = await expand_keywords_with_word2vec(keywords)
else:
expanded_keywords = keywords
logger.info(f"๐Ÿ” ํ‚ค์›Œ๋“œ ํ™•์žฅ ์—†์ด ์ง„ํ–‰: {keywords}")
# FAISS ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ (๋น„๋™๊ธฐ)
recommendations = await search_faiss_with_keywords(
request.search_query,
request.top_k,
keywords,
expanded_keywords
)
# ๊ฒฐ๊ณผ ๋กœ๊น…
if recommendations:
logger.info(f"๐Ÿ” ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: {[r['ITEMNAME'] for r in recommendations]}")
else:
logger.warning(f"โš ๏ธ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ: '{request.search_query}'")
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์ธ๋ฑ์Šค ์ƒํƒœ ํ™•์ธ ํƒœ์Šคํฌ ์ถ”๊ฐ€ (์‚ฌ์šฉ์ž ์‘๋‹ต์€ ์ง€์—ฐ๋˜์ง€ ์•Š์Œ)
background_tasks.add_task(check_index_health)
return {
"query": request.search_query,
"recommendations": recommendations,
"keywords": keywords,
"expanded_keywords": expanded_keywords
}
except Exception as e:
logger.error(f"โŒ ์ถ”์ฒœ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
raise HTTPException(status_code=500, detail=f"์ถ”์ฒœ ์˜ค๋ฅ˜: {str(e)}")
# ์ธ๋ฑ์Šค ์ƒํƒœ ํ™•์ธ ํ•จ์ˆ˜ (๋ฐฑ๊ทธ๋ผ์šด๋“œ ํƒœ์Šคํฌ์šฉ)
async def check_index_health():
"""์ธ๋ฑ์Šค ์ƒํƒœ๋ฅผ ์ฃผ๊ธฐ์ ์œผ๋กœ ํ™•์ธํ•˜๋Š” ๋ฐฑ๊ทธ๋ผ์šด๋“œ ํƒœ์Šคํฌ"""
try:
# ์ธ๋ฑ์Šค ์‚ฌ์šฉ ์ƒํƒœ ํ™•์ธ
if faiss_index is None:
logger.warning("โš ๏ธ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ฒดํฌ: FAISS ์ธ๋ฑ์Šค๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
await check_faiss_index()
# ์ถ”๊ฐ€์ ์ธ ์ƒํƒœ ํ™•์ธ ๋กœ์ง์„ ์—ฌ๊ธฐ์— ๊ตฌํ˜„ํ•  ์ˆ˜ ์žˆ์Œ
logger.debug("โœ… ์ธ๋ฑ์Šค ์ƒํƒœ ํ™•์ธ ์™„๋ฃŒ")
except Exception as e:
logger.error(f"โŒ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ธ๋ฑ์Šค ์ฒดํฌ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
# โœ… ์œ ์‚ฌ ๋‹จ์–ด ๊ฒ€์ƒ‰ API
@app.post("/api/similar_words")
async def similar_words(word: str, top_k: int = 10):
"""Word2Vec ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์œ ์‚ฌ ๋‹จ์–ด ๊ฒ€์ƒ‰ API (๋น„๋™๊ธฐ ์ง€์›)"""
try:
if word2vec_model is None:
return {"error": "Word2Vec ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
loop = asyncio.get_event_loop()
def _get_similar():
if word not in word2vec_model:
return []
similar = word2vec_model.most_similar(word, topn=top_k)
return [{"word": w, "similarity": float(s)} for w, s in similar]
result = await loop.run_in_executor(thread_pool, _get_similar)
if not result:
return {"word": word, "similar_words": [], "message": "๋‹จ์–ด๊ฐ€ ๋ชจ๋ธ์— ์—†์Šต๋‹ˆ๋‹ค."}
return {"word": word, "similar_words": result}
except Exception as e:
logger.error(f"โŒ ์œ ์‚ฌ ๋‹จ์–ด ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
raise HTTPException(status_code=500, detail=f"์œ ์‚ฌ ๋‹จ์–ด ๊ฒ€์ƒ‰ ์˜ค๋ฅ˜: {str(e)}")
# โœ… FAISS ์ธ๋ฑ์Šค ๊ฐฑ์‹  API (๋ช…์‹œ์ ์œผ๋กœ ์š”์ฒญํ•  ๋•Œ๋งŒ ์‹คํ–‰)
@app.post("/api/update_index")
async def update_index(background_tasks: BackgroundTasks):
"""FAISS ์ธ๋ฑ์Šค๋ฅผ ์ƒˆ๋กญ๊ฒŒ ๊ตฌ์ถ• (๋ช…์‹œ์  ์š”์ฒญ ์‹œ์—๋งŒ, ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ)"""
try:
# ์ธ๋ฑ์Šค ์žฌ๊ตฌ์ถ•์„ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ํƒœ์Šคํฌ๋กœ ์‹คํ–‰
background_tasks.add_task(rebuild_and_log_index)
return {"message": "โœ… FAISS ์ธ๋ฑ์Šค ์—…๋ฐ์ดํŠธ๊ฐ€ ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.exception("โŒ [API] ์ธ๋ฑ์Šค ์—…๋ฐ์ดํŠธ ์ฒ˜๋ฆฌ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ")
raise HTTPException(status_code=500, detail=f"์ธ๋ฑ์Šค ์—…๋ฐ์ดํŠธ ์‹คํŒจ: {str(e)}")
# ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—…์šฉ ์ธ๋ฑ์Šค ์žฌ๊ตฌ์ถ• ํ•จ์ˆ˜
async def rebuild_and_log_index():
"""๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์ธ๋ฑ์Šค๋ฅผ ์žฌ๊ตฌ์ถ•ํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ๋กœ๊น…"""
try:
logger.info("๐Ÿ”„ ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์ธ๋ฑ์Šค ์žฌ๊ตฌ์ถ• ์‹œ์ž‘")
start_time = time.time()
await rebuild_faiss_index()
elapsed = time.time() - start_time
logger.info(f"โœ… ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ธ๋ฑ์Šค ์žฌ๊ตฌ์ถ• ์™„๋ฃŒ! ์†Œ์š” ์‹œ๊ฐ„: {elapsed:.2f}์ดˆ")
except Exception as e:
logger.error(f"โŒ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ธ๋ฑ์Šค ์žฌ๊ตฌ์ถ• ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
# โœ… ์ธ๋ฑ์Šค ๋””๋ฒ„๊น… API
@app.get("/api/debug_index")
async def debug_index(query: str, top_k: int = 20):
"""์ธ๋ฑ์Šค ๋””๋ฒ„๊น…์„ ์œ„ํ•œ API (๋น„๋™๊ธฐ ์ง€์›)"""
try:
await check_faiss_index()
loop = asyncio.get_event_loop()
# ์›๋ณธ ๋ฒกํ„ฐ ์ƒ์„ฑ (๋น„๋™๊ธฐ)
def _get_vector():
vector = embedding_model.encode(query, convert_to_numpy=True).astype("float32")
norm = np.linalg.norm(vector)
normalized_vector = vector / norm
return normalized_vector, norm
normalized_vector, norm = await loop.run_in_executor(thread_pool, _get_vector)
# ์›๋ณธ ์ฟผ๋ฆฌ๋กœ ๊ฒ€์ƒ‰ (๋น„๋™๊ธฐ)
def _search():
return faiss_index.search(np.array([normalized_vector]), top_k)
distances, indices = await loop.run_in_executor(thread_pool, _search)
# ๊ฒฐ๊ณผ ๋งคํ•‘
results = []
for i, (idx, dist) in enumerate(zip(indices[0], distances[0])):
if idx < len(indexed_items):
item_name = indexed_items[idx]
results.append({
"rank": i + 1,
"index": int(idx),
"item_name": item_name,
"distance/score": float(dist)
})
# ๋ฐ์ดํ„ฐ์…‹์— ํ•ด๋‹น ๋‹จ์–ด๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ (๋น„๋™๊ธฐ)
def _find_matches():
contains = [item for item in indexed_items if query.lower() in item.lower()][:5]
exact = [item for item in indexed_items if query.lower() == item.lower()]
return contains, exact
contains_query, exact_matches = await loop.run_in_executor(thread_pool, _find_matches)
return {
"query": query,
"vector_norm": float(norm),
"contains_query": contains_query,
"exact_matches": exact_matches,
"results": results
}
except Exception as e:
logger.error(f"โŒ ์ธ๋ฑ์Šค ๋””๋ฒ„๊น… ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
raise HTTPException(status_code=500, detail=f"์ธ๋ฑ์Šค ๋””๋ฒ„๊น… ์˜ค๋ฅ˜: {str(e)}")
# โœ… ๋ฌธ์ž์—ด ํฌํ•จ ๊ฒ€์ƒ‰ API
@app.get("/api/text_search")
async def text_search(query: str, top_k: int = 10):
"""๋‹จ์ˆœ ํ…์ŠคํŠธ ํฌํ•จ ๊ฒ€์ƒ‰ API (๋น„๋™๊ธฐ ์ง€์›)"""
try:
loop = asyncio.get_event_loop()
# ๋น„๋™๊ธฐ ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
def _text_search():
# ๋‹จ์ˆœ ํ…์ŠคํŠธ ํฌํ•จ ๊ฒ€์ƒ‰
matched_items = []
for idx, item_name in enumerate(indexed_items):
if query.lower() in item_name.lower():
try:
item_seq = active_sale_items.loc[active_sale_items["ITEMNAME"] == item_name, "ITEMSEQ"].values[0]
matched_items.append({"ITEMSEQ": item_seq, "ITEMNAME": item_name, "match_type": "contains"})
except (IndexError, KeyError):
continue
# ์ •ํ™•ํžˆ ์ผ์น˜ํ•˜๋Š” ํ•ญ๋ชฉ์„ ์•ž์œผ๋กœ
exact_matches = []
partial_matches = []
for item in matched_items:
if query.lower() == item["ITEMNAME"].lower():
item["match_type"] = "exact"
exact_matches.append(item)
else:
partial_matches.append(item)
# ๊ฒฐํ•ฉ ๋ฐ ์ œํ•œ
return exact_matches + partial_matches
# ๋น„๋™๊ธฐ์ ์œผ๋กœ ๊ฒ€์ƒ‰ ์‹คํ–‰
results = await loop.run_in_executor(thread_pool, _text_search)
logger.info(f"๐Ÿ” ํ…์ŠคํŠธ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: {len(results)}๊ฐœ ์ฐพ์Œ, ์ฟผ๋ฆฌ: '{query}'")
return {
"query": query,
"total_matches": len(results),
"results": results[:top_k]
}
except Exception as e:
logger.error(f"โŒ ํ…์ŠคํŠธ ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
raise HTTPException(status_code=500, detail=f"ํ…์ŠคํŠธ ๊ฒ€์ƒ‰ ์˜ค๋ฅ˜: {str(e)}")
# โœ… FastAPI ์‹คํ–‰
if __name__ == "__main__":
# ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์ €์žฅ๋œ ์ธ๋ฑ์Šค ๋กœ๋“œ ์‹œ๋„
try:
# ๋น„๋™๊ธฐ ํ•จ์ˆ˜๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ํ˜ธ์ถœํ•˜๊ธฐ ์œ„ํ•œ ์ž„์‹œ ์ด๋ฒคํŠธ ๋ฃจํ”„ ์‚ฌ์šฉ
loop = asyncio.new_event_loop()
if not loop.run_until_complete(load_faiss_index()):
logger.warning("โš ๏ธ ๊ธฐ์กด ์ธ๋ฑ์Šค ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค. ์ฆ‰์‹œ ์ƒˆ ์ธ๋ฑ์Šค๋ฅผ ๊ตฌ์ถ•ํ•ฉ๋‹ˆ๋‹ค.")
# ์ธ๋ฑ์Šค ์ฆ‰์‹œ ์žฌ๊ตฌ์ถ•
loop.run_until_complete(rebuild_faiss_index())
logger.info("โœ… FAISS ์ธ๋ฑ์Šค ์ƒ์„ฑ ์™„๋ฃŒ!")
else:
logger.info("โœ… ๊ธฐ์กด ์ธ๋ฑ์Šค๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๋กœ๋“œํ–ˆ์Šต๋‹ˆ๋‹ค.")
loop.close()
except Exception as e:
logger.error(f"โŒ ์ธ๋ฑ์Šค ์ดˆ๊ธฐ ๊ตฌ์ถ• ์‹คํŒจ: {e}")
logger.warning("โš ๏ธ ์ธ๋ฑ์Šค ์—†์ด ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. ๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ์ œํ•œ๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)