Spaces:

aikobay
/

traning

Sleeping

App Files Files Community

traning / searchAsyncSingleTuning.py

aikobay

Create searchAsyncSingleTuning.py

7ae8d1e verified about 1 month ago

raw

history blame contribute delete

35.1 kB

	import os
	import torch
	import pandas as pd
	import logging
	import faiss
	import numpy as np
	import time
	import gensim
	from fastapi import FastAPI, HTTPException, BackgroundTasks
	from pydantic import BaseModel
	from datasets import load_dataset
	from huggingface_hub import login, hf_hub_download, HfApi, create_repo
	from keybert import KeyBERT
	from sentence_transformers import SentenceTransformer
	from joblib import Parallel, delayed
	from tqdm import tqdm
	import tempfile
	import re
	import sys
	import asyncio
	import gc
	from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

	# ✅ 로그 설정
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# ✅ 스레드 풀 최적화 (작업자 수 감소로 오버헤드 감소)
	thread_pool = ThreadPoolExecutor(max_workers=min(32, os.cpu_count() * 2))

	# ✅ 메모리 관리 전역 변수
	last_gc_time = time.time()
	request_count = 0
	CLEANUP_INTERVAL = 100 # 100 요청마다 메모리 정리

	# ✅ FastAPI 인스턴스 생성
	app = FastAPI(title="🚀 KeyBERT + Word2Vec 기반 FAISS 검색 API", version="1.2")

	# ✅ GPU 사용 여부 확인
	device = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"🚀 실행 디바이스: {device.upper()}")

	# ✅ Hugging Face 로그인
	HF_API_TOKEN = os.getenv("HF_API_TOKEN")
	if HF_API_TOKEN:
	logger.info("🔑 Hugging Face API 로그인 중...")
	login(token=HF_API_TOKEN)
	else:
	logger.error("❌ HF_API_TOKEN이 설정되지 않았습니다. 일부 기능이 제한될 수 있습니다.")

	# ✅ Word2Vec 모델 로드
	word2vec_model = None
	try:
	logger.info("🔄 Word2Vec 모델 로드 중...")
	MODEL_REPO = "aikobay/item-model"
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename="item_vectors.bin", repo_type="dataset")
	word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
	logger.info(f"✅ Word2Vec 모델 로드 완료! 단어 수: {len(word2vec_model.key_to_index)}")
	except Exception as e:
	logger.error(f"❌ Word2Vec 모델 로드 실패: {e}")

	# ✅ KeyBERT 모델 로드
	logger.info("🔄 KeyBERT 모델 로드 중...")
	kw_model = KeyBERT("paraphrase-multilingual-MiniLM-L12-v2")
	original_embedding_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
	logger.info("✅ KeyBERT 모델 로드 완료!")

	# ✅ 한국어 특화 임베딩 모델로 교체
	embedding_model = None
	try:
	logger.info("🔄 한국어 특화 임베딩 모델로 교체 시도...")
	# 한국어 특화 모델 로드 시도 (실패시 기존 모델 유지)
	embedding_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
	logger.info("✅ 한국어 특화 임베딩 모델 로드 완료!")
	except Exception as e:
	logger.warning(f"⚠️ 한국어 특화 모델 로드 실패, 기존 모델 유지: {e}")
	embedding_model = original_embedding_model

	# GPU에 모델 로드 및 최적화
	if device == "cuda":
	try:
	embedding_model.to(device)
	# 모델을 평가 모드로 전환
	embedding_model.eval()
	logger.info("✅ 임베딩 모델을 GPU에 로드 완료!")
	except Exception as e:
	logger.error(f"❌ GPU 모델 초기화 오류: {e}")

	# ✅ 진행 중인 경매 상품 데이터 로드
	async def load_huggingface_jsonl(dataset_name, split="train"):
	"""Hugging Face Hub에서 데이터셋 비동기 로드"""
	try:
	# 스레드 풀에서 실행하여 비동기 처리
	loop = asyncio.get_event_loop()

	def _load_dataset():
	repo_id = f"aikobay/{dataset_name}"
	dataset = load_dataset(repo_id, split=split)
	return dataset.to_pandas().dropna()

	# 스레드 풀에서 비동기로 실행
	df = await loop.run_in_executor(thread_pool, _load_dataset)
	return df
	except Exception as e:
	logger.error(f"❌ 데이터 로드 중 오류 발생: {e}")
	return pd.DataFrame()

	# 초기 데이터 로드 - 비동기 함수를 동기적으로 호출하여 시작 시 로드
	active_sale_items = None
	try:
	# 비동기 함수를 시작 시 실행하기 위한 임시 이벤트 루프 사용
	loop = asyncio.new_event_loop()
	active_sale_items = loop.run_until_complete(load_huggingface_jsonl("initial_saleitem_dataset"))
	loop.close()

	if active_sale_items.empty:
	logger.error("❌ 데이터셋이 비어 있습니다. 프로그램을 종료합니다.")
	exit(1)
	logger.info(f"✅ 경매 상품 데이터 로드 완료! 총 {len(active_sale_items)}개 상품")
	except Exception as e:
	logger.error(f"❌ 상품 데이터 로드 실패: {e}")
	exit(1)

	# ✅ FAISS 인덱스 초기화
	faiss_index = None
	indexed_items = []

	# ✅ 주기적 메모리 정리 함수
	async def cleanup_memory():
	"""주기적인 메모리 정리 수행"""
	global last_gc_time

	# 현재 시간 확인
	current_time = time.time()

	# 15초마다 메모리 정리 (너무 자주하면 성능 저하)
	if current_time - last_gc_time > 15:
	# 가비지 컬렉션 실행
	gc.collect()

	# GPU 메모리 정리
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	# 시간 업데이트
	last_gc_time = current_time
	logger.debug("🧹 메모리 정리 완료")

	return True

	return False

	# ✅ 멀티코어 벡터화 함수 - 메모리 누수 해결
	async def encode_texts_parallel(texts, batch_size=1024):
	"""GPU 활용 + 메모리 누수 방지 최적화 벡터화"""
	if not texts:
	return np.array([]).astype("float32")

	try:
	# 배치 크기 증가로 처리 효율 향상
	loop = asyncio.get_event_loop()

	def _encode_efficiently():
	# 벡터화 최적화 - GPU 활용 + 배치 사이즈 최적화
	with torch.no_grad(): # 그라디언트 계산 비활성화로 메모리 사용 감소
	return embedding_model.encode(
	texts,
	batch_size=batch_size,
	convert_to_numpy=True,
	show_progress_bar=False,
	device=device, # GPU 사용
	normalize_embeddings=True # 미리 정규화하여 중복 계산 방지
	)

	# 스레드 풀에서 실행
	embeddings = await loop.run_in_executor(thread_pool, _encode_efficiently)
	return embeddings.astype("float32")

	except Exception as e:
	logger.error(f"❌ 벡터화 오류: {str(e)}")
	return np.array([]).astype("float32")

	finally:
	# 명시적 GPU 메모리 정리 (중요)
	if device == "cuda":
	torch.cuda.empty_cache()


	# ✅ FAISS 인덱스 저장 함수 (Hugging Face Hub)
	async def save_faiss_index():
	"""FAISS 인덱스를 Hugging Face Hub에 저장 (비동기 지원)"""
	global faiss_index, indexed_items

	if faiss_index is None or not indexed_items:
	logger.error("❌ 저장할 FAISS 인덱스가 없습니다.")
	return False

	try:
	# 레포지토리 ID
	repo_id = os.getenv("HF_INDEX_REPO", "aikobay/saleitem_faiss_index")

	# 비동기 작업을 위한 루프
	loop = asyncio.get_event_loop()

	# 비동기 작업으로 실행
	def _save_index():
	# HfApi 객체 생성
	api = HfApi()

	# 레포지토리 존재 여부 확인 및 생성
	try:
	api.repo_info(repo_id=repo_id, repo_type="dataset")
	logger.info(f"✅ 기존 레포지토리 사용: {repo_id}")
	except Exception:
	logger.info(f"🔄 레포지토리가 존재하지 않아 새로 생성합니다: {repo_id}")
	create_repo(
	repo_id=repo_id,
	repo_type="dataset",
	private=True,
	exist_ok=True
	)
	logger.info(f"✅ 레포지토리 생성 완료: {repo_id}")

	# 임시 파일로 먼저 로컬에 저장
	with tempfile.TemporaryDirectory() as temp_dir:
	index_path = os.path.join(temp_dir, "faiss_index.bin")
	items_path = os.path.join(temp_dir, "indexed_items.txt")

	# FAISS 인덱스 저장
	faiss.write_index(faiss_index, index_path)

	# 아이템 목록 저장
	with open(items_path, "w", encoding="utf-8") as f:
	f.write("\n".join(indexed_items))

	# README 파일 생성
	readme_path = os.path.join(temp_dir, "README.md")
	with open(readme_path, "w", encoding="utf-8") as f:
	f.write(f"""# FAISS 인덱스 저장소
	이 저장소는 상품 검색을 위한 FAISS 인덱스와 관련 데이터를 포함하고 있습니다.
	- 최종 업데이트: {pd.Timestamp.now()}
	- 인덱스 항목 수: {len(indexed_items)}
	- 모델: KeyBERT + Word2Vec
	이 저장소는 'aikobay/initial_saleitem_dataset'의 상품 데이터를 기반으로 생성된 벡터 인덱스를 저장하기 위해 자동 생성되었습니다.
	""")

	# 파일 업로드
	for file_path, file_name in [
	(index_path, "faiss_index.bin"),
	(items_path, "indexed_items.txt"),
	(readme_path, "README.md")
	]:
	api.upload_file(
	path_or_fileobj=file_path,
	path_in_repo=file_name,
	repo_id=repo_id,
	repo_type="dataset"
	)

	logger.info(f"✅ FAISS 인덱스가 Hugging Face Hub에 저장되었습니다. 레포: {repo_id}")
	return True

	# 스레드 풀에서 비동기적으로 실행
	result = await loop.run_in_executor(thread_pool, _save_index)
	return result

	except Exception as e:
	logger.error(f"❌ FAISS 인덱스 Hub 저장 중 오류 발생: {e}")

	# 로컬에 백업 저장 시도
	try:
	loop = asyncio.get_event_loop()

	def _local_backup():
	local_path = os.path.join(os.getcwd(), "faiss_index.bin")
	faiss.write_index(faiss_index, local_path)
	with open("indexed_items.txt", "w", encoding="utf-8") as f:
	f.write("\n".join(indexed_items))
	logger.info(f"✅ FAISS 인덱스가 로컬에 백업 저장되었습니다: {local_path}")
	return True

	result = await loop.run_in_executor(thread_pool, _local_backup)
	return result
	except Exception as local_err:
	logger.error(f"❌ 로컬 백업 저장도 실패: {local_err}")
	return False

	# ✅ FAISS 인덱스 로드 함수 (Hugging Face Hub)
	async def load_faiss_index():
	"""Hugging Face Hub에서 FAISS 인덱스를 로드 (비동기 지원)"""
	global faiss_index, indexed_items

	# 레포지토리 ID
	repo_id = os.getenv("HF_INDEX_REPO", "aikobay/saleitem_faiss_index")

	try:
	# 비동기 작업을 위한 루프
	loop = asyncio.get_event_loop()

	# 비동기 작업으로 실행
	def _load_index():
	# 레포지토리 존재 확인
	api = HfApi()
	try:
	api.repo_info(repo_id=repo_id, repo_type="dataset")
	logger.info(f"✅ FAISS 인덱스 레포지토리 확인: {repo_id}")
	except Exception as repo_err:
	logger.warning(f"⚠️ 레포지토리가 존재하지 않습니다: {repo_err}")
	raise FileNotFoundError("Hub 레포지토리가 존재하지 않습니다")

	# Hub에서 파일 다운로드
	index_path = hf_hub_download(
	repo_id=repo_id,
	filename="faiss_index.bin",
	repo_type="dataset"
	)

	items_path = hf_hub_download(
	repo_id=repo_id,
	filename="indexed_items.txt",
	repo_type="dataset"
	)

	# 파일 로드
	loaded_index = faiss.read_index(index_path)
	with open(items_path, "r", encoding="utf-8") as f:
	loaded_items = f.read().splitlines()

	return loaded_index, loaded_items

	# 스레드 풀에서 비동기적으로 실행
	loaded_index, loaded_items = await loop.run_in_executor(thread_pool, _load_index)

	# 전역 변수에 할당
	faiss_index = loaded_index
	indexed_items = loaded_items

	logger.info(f"✅ FAISS 인덱스가 Hub에서 로드되었습니다. 총 {len(indexed_items)}개 상품")
	return True

	except Exception as e:
	logger.warning(f"⚠️ Hub에서 FAISS 인덱스 로드 중 오류 발생: {e}")

	# 로컬 파일 확인
	try:
	loop = asyncio.get_event_loop()

	def _load_local():
	local_index_path = "faiss_index.bin"
	local_items_path = "indexed_items.txt"

	if os.path.exists(local_index_path) and os.path.exists(local_items_path):
	loaded_index = faiss.read_index(local_index_path)
	with open(local_items_path, "r", encoding="utf-8") as f:
	loaded_items = f.read().splitlines()
	return loaded_index, loaded_items
	else:
	logger.warning("⚠️ 로컬 FAISS 인덱스 파일이 존재하지 않습니다.")
	return None, None

	# 스레드 풀에서 비동기적으로 실행
	result = await loop.run_in_executor(thread_pool, _load_local)

	if result[0] is not None:
	faiss_index, indexed_items = result
	logger.info(f"✅ 로컬 FAISS 인덱스 로드 성공. 총 {len(indexed_items)}개 상품")
	return True
	else:
	return False

	except Exception as local_err:
	logger.error(f"❌ 로컬 FAISS 인덱스 로드 중 오류: {local_err}")
	return False

	# ✅ FAISS 양자화 인덱스 구축 함수 (IVF 기반으로 변경)
	async def rebuild_faiss_index():
	"""FAISS 인덱스를 IVF 기반으로 새롭게 구축 (속도 최적화)"""
	global faiss_index, indexed_items, active_sale_items

	logger.info("🔄 FAISS 인덱스를 고속 IVF 기반으로 재구축 중...")

	# 최신 상품 데이터 로드
	active_sale_items = await load_huggingface_jsonl("initial_saleitem_dataset")
	if active_sale_items.empty:
	logger.error("❌ 상품 데이터를 로드할 수 없습니다.")
	raise RuntimeError("상품 데이터 로드 실패")

	# 상품명 목록 추출
	item_names = active_sale_items["ITEMNAME"].tolist()
	indexed_items = item_names

	# 간소화된 로깅
	total_items = len(item_names)
	logger.info(f"🔹 총 {total_items}개 상품 고속 벡터화 시작...")

	# 벡터화 최적화 - 배치 사이즈 증가
	item_vectors = await encode_texts_parallel(item_names, batch_size=2048)

	# GPU 메모리 정리
	if device == "cuda":
	torch.cuda.empty_cache()

	# IVF 기반 인덱스 구축 (속도 대폭 개선)
	loop = asyncio.get_event_loop()

	def _build_ivf_index():
	dimension = item_vectors.shape[1]
	# IVF 클러스터 수 - 데이터 크기에 따라 조정 (√n 규칙 사용)
	nlist = int(np.sqrt(total_items) * 4) # 클러스터 수 증가
	nlist = max(32, min(nlist, 1024)) # 최소 32, 최대 1024개 제한

	# 양자화 파라미터 - 차원 수에 맞게 조정
	M = min(64, dimension // 2) # 서브벡터 수
	nbits = 8 # 비트 수

	# 고속 IVF 인덱스 생성
	if total_items > 10000: # 벡터가 많으면 압축 기법 사용
	# IVF + PQ (Product Quantization) 조합 - 메모리 효율적
	quantizer = faiss.IndexFlatIP(dimension)
	index = faiss.IndexIVFPQ(quantizer, dimension, nlist, M, nbits)
	else:
	# 일반 IVF - 속도 향상
	quantizer = faiss.IndexFlatIP(dimension)
	index = faiss.IndexIVFFlat(quantizer, dimension, nlist)

	# 학습 및 추가
	index.train(item_vectors)
	index.add(item_vectors)

	# 검색 품질 향상을 위한 설정
	# nprobe = 몇 개의 클러스터를 검색할지 (높을수록 정확도 ↑, 속도 ↓)
	index.nprobe = min(32, nlist // 4) # 클러스터의 25% 검색

	logger.info(f"✅ IVF 인덱스 구축 완료: clusters={nlist}, nprobe={index.nprobe}")
	return index

	# 인덱스 구축 실행
	faiss_index = await loop.run_in_executor(thread_pool, _build_ivf_index)

	logger.info(f"✅ 고속 FAISS 인덱스 구축 완료! 총 {len(indexed_items)}개 항목")

	# 메모리 정리
	if device == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	# 구축 후 Hub에 저장
	await save_faiss_index()
	return True


	# ✅ FAISS 인덱스 상태 확인 및 필요시에만 구축
	async def check_faiss_index():
	"""FAISS 인덱스가 존재하는지 확인하고 없으면 구축 (비동기 지원)"""
	global faiss_index

	if faiss_index is None:
	# Hub에서 로드 시도
	if not await load_faiss_index():
	# 로드 실패 시 새로 구축
	logger.warning("⚠️ 저장된 인덱스가 없어 새로 구축합니다.")
	await rebuild_faiss_index()

	# 모든 과정 후에도 인덱스가 None이면 오류
	if faiss_index is None:
	raise RuntimeError("FAISS 인덱스 초기화에 실패했습니다.")

	# ✅ 최적화된 키워드 추출 함수
	async def extract_keywords(query: str, top_n: int = 2):
	"""KeyBERT 최적화 키워드 추출 (성능 중심)"""
	# 매우 짧은 쿼리는 그대로 반환 (처리 비용 절감)
	if len(query) <= 3:
	return [query]

	loop = asyncio.get_event_loop()

	def _optimized_extract():
	# 성능 중심 설정
	return kw_model.extract_keywords(
	query,
	keyphrase_ngram_range=(1, 1), # 단일 단어만 추출
	stop_words=["이", "그", "저", "을", "를", "에", "에서", "은", "는"], # 한국어 불용어
	use_mmr=True,
	diversity=0.5,
	top_n=top_n
	)

	try:
	keywords = await loop.run_in_executor(thread_pool, _optimized_extract)
	# 가중치가 너무 낮은 키워드 제외
	filtered = [(k, s) for k, s in keywords if s > 0.2]
	return [k[0] for k in filtered]
	except Exception as e:
	logger.error(f"❌ 키워드 추출 오류: {str(e)}")
	# 단어 분리로 폴백
	return query.split()[:2]


	# ✅ 최적화된 키워드 확장 함수 (단순화 및 효율 향상)
	async def expand_keywords_with_word2vec(keywords: list, max_new=2):
	"""Word2Vec 키워드 확장 최적화"""
	global word2vec_model

	if word2vec_model is None or not keywords:
	return keywords

	# 결과 저장을 위한 집합
	expanded = set(keywords)

	loop = asyncio.get_event_loop()

	def _expand_keywords():
	for keyword in keywords:
	# 단일 단어인 경우만 처리
	if keyword in word2vec_model:
	# 유사도가 높은 단어만 선택
	similar_words = word2vec_model.most_similar(keyword, topn=max_new)
	for word, score in similar_words:
	if score > 0.7: # 높은 유사도 임계값 적용
	expanded.add(word)

	# 결과 변환
	result = list(expanded)
	# 키워드가 너무 많으면 제한
	if len(result) > 5:
	return keywords + result[len(keywords):5]
	return result

	try:
	# 확장 실행
	expanded_keywords = await loop.run_in_executor(thread_pool, _expand_keywords)
	return expanded_keywords
	except Exception as e:
	logger.error(f"❌ Word2Vec 확장 오류: {str(e)}")
	return keywords # 오류 시 원본 키워드 반환


	# ✅ 배치 검색 통합 함수 - 한번에 검색으로 효율 향상
	async def unified_search(vectors, top_k=5):
	"""모든 벡터를 한 번에 검색하여 효율성 향상"""
	if vectors.size == 0:
	return []

	# nprobe 동적 조정 (서버 부하에 따라)
	global request_count
	if request_count % 100 == 0: # 100개 요청마다 조정
	if faiss_index.nprobe > 8: # 현재 값이 높으면
	faiss_index.nprobe = 8 # 낮은 값으로 설정 (속도 중시)

	loop = asyncio.get_event_loop()

	def _batch_search():
	# 모든 벡터를 한 번에 검색
	distances, indices = faiss_index.search(vectors, top_k)
	return distances, indices

	try:
	# 일괄 검색 수행
	distances, indices = await loop.run_in_executor(thread_pool, _batch_search)

	# 결과 정리
	results = []
	for i in range(len(indices)):
	items = []
	for j, (idx, dist) in enumerate(zip(indices[i], distances[i])):
	if idx < len(indexed_items):
	items.append((idx, float(dist)))
	results.append(items)

	return results
	except Exception as e:
	logger.error(f"❌ 검색 오류: {str(e)}")
	return []


	# ✅ 최적화된 search_faiss_with_keywords 함수
	async def search_faiss_with_keywords(query: str, top_k: int = 5, keywords=None):
	"""고속 키워드 기반 FAISS 검색 수행 (효율적 최적화)"""
	global faiss_index, indexed_items, request_count

	# FAISS 인덱스 확인 - 한 번만 실행
	if faiss_index is None:
	await check_faiss_index()

	# 타이머 시작
	start_time = time.time()

	# 요청 카운터 증가
	request_count += 1

	# 모든 요청을 일관된 방식으로 처리 (캐싱 제거)

	# 1. 키워드 추출
	if keywords is None:
	keywords = await extract_keywords(query)

	# 불필요한 확장 절차 제거 (성능 향상)

	# 2. 벡터 인코딩 - 모든 텍스트를 한 번에 처리
	search_texts = [query] + keywords

	try:
	# 벡터 인코딩 - 최적화된 함수 사용 (정규화 포함)
	all_vectors = await encode_texts_parallel(search_texts)

	if all_vectors.size == 0:
	logger.warning(f"⚠️ 벡터화 실패: {query}")
	return []

	# 3. 일괄 검색 수행 (효율적)
	search_results = await unified_search(all_vectors, top_k=top_k)

	if not search_results:
	return []

	# 4. 결과 통합 및 중복 제거
	all_results = {}

	# 쿼리 결과 처리 (가중치 높게)
	for idx, score in search_results[0]:
	if idx < len(indexed_items):
	all_results[idx] = score * 3.0 # 쿼리 결과에 가중치 3배

	# 키워드 결과 처리
	for i in range(1, len(search_results)):
	keyword_results = search_results[i]
	weight = 0.5 # 키워드 가중치

	for idx, score in keyword_results:
	if idx in all_results:
	# 기존 점수에 추가
	all_results[idx] = max(all_results[idx], score * weight)
	else:
	# 새 항목 추가
	all_results[idx] = score * weight

	# 5. 최종 처리 및 반환
	# 점수 기준 정렬
	sorted_items = sorted(all_results.items(), key=lambda x: x[1], reverse=True)

	# 최종 결과 변환 (최소한의 룩업으로 최적화)
	recommendations = []
	item_indices = [idx for idx, _ in sorted_items[:top_k]]

	# 배치로 항목 조회 (성능 향상)
	if item_indices:
	item_names = [indexed_items[idx] for idx in item_indices]
	# 효율적인 배치 조회
	items_df = active_sale_items[active_sale_items["ITEMNAME"].isin(item_names)]
	items_map = dict(zip(items_df["ITEMNAME"], items_df["ITEMSEQ"]))

	for idx, score in sorted_items[:top_k]:
	item_name = indexed_items[idx]
	if item_name in items_map:
	recommendations.append({
	"ITEMSEQ": items_map[item_name],
	"ITEMNAME": item_name,
	"score": float(score)
	})

	# 주기적 메모리 정리
	if request_count % CLEANUP_INTERVAL == 0:
	await cleanup_memory()

	# 처리 시간이 1초 이상인 경우에만 로깅
	elapsed = time.time() - start_time
	if elapsed > 1.0:
	logger.info(f"🔍 검색 완료 \| 소요시간: {elapsed:.2f}초 \| 결과: {len(recommendations)}개")

	return recommendations[:top_k]

	except Exception as e:
	logger.error(f"❌ 검색 프로세스 오류: {str(e)}")
	return []


	# ✅ 직접 매칭 분리 (성능 최적화)
	async def find_direct_matches(query, limit=5, existing_names=None):
	"""직접 텍스트 매칭 검색 (분리하여 최적화)"""
	loop = asyncio.get_event_loop()

	def _find_matches():
	matches = []
	query_lower = query.lower()
	existing = set(existing_names or [])

	# 데이터 인덱싱 최적화
	item_dict = {}
	for idx, item_name in enumerate(indexed_items):
	if len(matches) >= limit:
	break

	if item_name in existing:
	continue

	if query_lower in item_name.lower():
	item_dict[item_name] = idx

	# 한 번에 데이터프레임 조회
	if item_dict:
	mask = active_sale_items["ITEMNAME"].isin(item_dict.keys())
	filtered_items = active_sale_items[mask]

	for _, row in filtered_items.iterrows():
	if len(matches) >= limit:
	break

	matches.append({
	"ITEMSEQ": row["ITEMSEQ"],
	"ITEMNAME": row["ITEMNAME"],
	"score": 1.0
	})

	return matches

	# 스레드 풀에서 실행
	return await loop.run_in_executor(thread_pool, _find_matches)

	# ✅ API 요청 모델
	class RecommendRequest(BaseModel):
	search_query: str
	top_k: int = 5
	use_expansion: bool = True # 키워드 확장 사용 여부

	# ✅ 추천 API 엔드포인트 (다중 요청 처리 최적화)
	@app.post("/api/recommend")
	async def recommend(request: RecommendRequest, background_tasks: BackgroundTasks):
	"""고속 추천 API (메모리 관리 최적화 + 성능 개선)"""
	try:
	# 벤치마크용 타이머 시작
	start_time = time.time()

	# 파라미터 최적화 및 검증
	search_query = request.search_query.strip()
	if not search_query:
	raise HTTPException(status_code=400, detail="검색어를 입력해주세요")

	top_k = min(max(1, request.top_k), 20) # 1~20 범위로 제한

	# 항상 전체 검색 수행 (캐싱 없음)
	recommendations = await search_faiss_with_keywords(
	search_query,
	top_k
	)

	# 결과 반환 (간소화)
	result = {
	"query": search_query,
	"recommendations": recommendations
	}

	# 응답 시간 측정 (1초 이상만 로깅)
	elapsed = time.time() - start_time
	if elapsed > 1.0:
	logger.info(f"⏱️ API 응답 시간: {elapsed:.2f}초 \| 쿼리: '{search_query}'")

	return result

	except Exception as e:
	logger.error(f"❌ 추천 처리 오류: {str(e)}")
	raise HTTPException(status_code=500, detail=f"추천 처리 중 오류가 발생했습니다")

	# 인덱스 상태 확인 함수 (백그라운드 태스크용)
	async def check_index_health():
	"""인덱스 상태를 주기적으로 확인하는 백그라운드 태스크"""
	try:
	# 인덱스 사용 상태 확인
	if faiss_index is None:
	logger.warning("⚠️ 백그라운드 체크: FAISS 인덱스가 로드되지 않았습니다.")
	await check_faiss_index()

	# 추가적인 상태 확인 로직을 여기에 구현할 수 있음
	logger.debug("✅ 인덱스 상태 확인 완료")
	except Exception as e:
	logger.error(f"❌ 백그라운드 인덱스 체크 중 오류: {str(e)}")

	# ✅ 유사 단어 검색 API
	@app.post("/api/similar_words")
	async def similar_words(word: str, top_k: int = 10):
	"""Word2Vec 모델을 사용한 유사 단어 검색 API (비동기 지원)"""
	try:
	if word2vec_model is None:
	return {"error": "Word2Vec 모델이 로드되지 않았습니다."}

	loop = asyncio.get_event_loop()

	def _get_similar():
	if word not in word2vec_model:
	return []

	similar = word2vec_model.most_similar(word, topn=top_k)
	return [{"word": w, "similarity": float(s)} for w, s in similar]

	result = await loop.run_in_executor(thread_pool, _get_similar)

	if not result:
	return {"word": word, "similar_words": [], "message": "단어가 모델에 없습니다."}

	return {"word": word, "similar_words": result}
	except Exception as e:
	logger.error(f"❌ 유사 단어 검색 중 오류: {str(e)}")
	raise HTTPException(status_code=500, detail=f"유사 단어 검색 오류: {str(e)}")

	# ✅ FAISS 인덱스 갱신 API (명시적으로 요청할 때만 실행)
	@app.post("/api/update_index")
	async def update_index(background_tasks: BackgroundTasks):
	"""FAISS 인덱스를 새롭게 구축 (명시적 요청 시에만, 비동기 처리)"""
	try:
	# 인덱스 재구축을 백그라운드 태스크로 실행
	background_tasks.add_task(rebuild_and_log_index)
	return {"message": "✅ FAISS 인덱스 업데이트가 백그라운드에서 시작되었습니다."}
	except Exception as e:
	logger.exception("❌ [API] 인덱스 업데이트 처리 중 예외 발생")
	raise HTTPException(status_code=500, detail=f"인덱스 업데이트 실패: {str(e)}")

	# 백그라운드 작업용 인덱스 재구축 함수
	async def rebuild_and_log_index():
	"""백그라운드에서 인덱스를 재구축하고 결과를 로깅"""
	try:
	logger.info("🔄 백그라운드에서 인덱스 재구축 시작")
	start_time = time.time()
	await rebuild_faiss_index()
	elapsed = time.time() - start_time
	logger.info(f"✅ 백그라운드 인덱스 재구축 완료! 소요 시간: {elapsed:.2f}초")
	except Exception as e:
	logger.error(f"❌ 백그라운드 인덱스 재구축 중 오류: {str(e)}")

	# 메모리 정리
	await cleanup_memory()

	# ✅ 메모리 사용량 확인 및 관리 API
	@app.get("/api/memory_status")
	async def memory_status():
	"""메모리 사용량 확인 및 정리"""
	try:
	if device == "cuda":
	# GPU 메모리 정보 수집
	gpu_stats = {}

	# PyTorch GPU 메모리 정보
	torch.cuda.empty_cache() # 캐시 정리
	gpu_stats["allocated"] = torch.cuda.memory_allocated() / (1024**3)
	gpu_stats["reserved"] = torch.cuda.memory_reserved() / (1024**3)

	# 가비지 컬렉션 강제 실행
	gc.collect()

	# 메모리 정리 후 정보
	torch.cuda.empty_cache()
	gpu_stats["after_cleanup_allocated"] = torch.cuda.memory_allocated() / (1024**3)
	gpu_stats["after_cleanup_reserved"] = torch.cuda.memory_reserved() / (1024**3)

	return {
	"device": "GPU",
	"memory_stats": {
	"allocated_gb": round(gpu_stats["allocated"], 3),
	"reserved_gb": round(gpu_stats["reserved"], 3),
	"after_cleanup_allocated_gb": round(gpu_stats["after_cleanup_allocated"], 3),
	"after_cleanup_reserved_gb": round(gpu_stats["after_cleanup_reserved"], 3)
	},
	"request_count": request_count
	}
	else:
	# CPU 모드 정보
	return {
	"device": "CPU",
	"message": "CPU 모드에서 실행 중입니다. 메모리 정보가 제한적입니다.",
	"request_count": request_count
	}
	except Exception as e:
	logger.error(f"❌ 메모리 상태 확인 중 오류: {str(e)}")
	raise HTTPException(status_code=500, detail=f"메모리 상태 확인 오류: {str(e)}")

	# 인덱스 디버깅 API 및 텍스트 검색 API 유지 (생략)

	# ✅ FastAPI 실행
	if __name__ == "__main__":
	# 서버 시작 시 저장된 인덱스 로드 시도
	try:
	# 비동기 함수를 동기적으로 호출하기 위한 임시 이벤트 루프 사용
	loop = asyncio.new_event_loop()
	if not loop.run_until_complete(load_faiss_index()):
	logger.warning("⚠️ 기존 인덱스 로드에 실패했습니다. 즉시 새 인덱스를 구축합니다.")
	# 인덱스 즉시 재구축
	loop.run_until_complete(rebuild_faiss_index())
	logger.info("✅ FAISS 인덱스 생성 완료!")
	else:
	logger.info("✅ 기존 인덱스를 성공적으로 로드했습니다.")
	loop.close()
	except Exception as e:
	logger.error(f"❌ 인덱스 초기 구축 실패: {e}")
	logger.warning("⚠️ 인덱스 없이 시작합니다. 검색 기능이 제한될 수 있습니다.")

	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)