from fastapi import FastAPI, HTTPException from sentence_transformers import SentenceTransformer import faiss import pandas as pd import os import random app = FastAPI() # Load SentenceTransformer model model = SentenceTransformer('all-MiniLM-L6-v2') # Load FAISS index faiss_index_path = "novel_index.index" if os.path.exists(faiss_index_path): faiss_index = faiss.read_index(faiss_index_path) else: raise HTTPException(status_code=500, detail="Failed to load FAISS index.") # Load DataFrame csv_path = "novel_df4.csv" if os.path.exists(csv_path): df = pd.read_csv(csv_path) else: raise HTTPException(status_code=500, detail="Failed to load CSV file.") def find_novel_keywords(novel_name, df): keywords = df.loc[df['name'].str.contains(novel_name, case=False, na=False), 'keyword'].values if len(keywords) > 0: return keywords[0] else: return None def encode_and_search(keywords, index, model, k=10): if keywords is None: keywords = "" elif not isinstance(keywords, str): keywords = str(keywords) query_embedding = model.encode([keywords])[0] query_embedding = query_embedding.reshape(1, -1) _, indices = index.search(query_embedding, k) return indices def process_search_results(indices, df, novel_name): result_df = df.iloc[indices.ravel()] result_df = result_df[result_df['name'].str.lower() != novel_name.lower()] if result_df.empty: return [] result_df.loc[:, 'genre'] = result_df['genre'].apply(lambda x: x.split(',') if isinstance(x, str) else []) result_df.loc[:, 'tag'] = result_df['tag'].apply(lambda x: x.split(',') if isinstance(x, str) else []) results = result_df[['name', 'genre','tag','image_url','source']].to_dict(orient='records') # Remove duplicates unique_results = [] seen_names = set() for result in results: if result['name'] not in seen_names: unique_results.append(result) seen_names.add(result['name']) random.shuffle(unique_results) return unique_results def search_similar_by_novel_name(novel_name, index, df, model, k=10): keywords = find_novel_keywords(novel_name, df) indices = encode_and_search(keywords, index, model, k) return process_search_results(indices, df, novel_name) @app.get("/search/{novel_name}") async def search_similar_novels(novel_name: str): try: similar_novels = search_similar_by_novel_name(novel_name, faiss_index, df, model, 30) if not similar_novels: raise HTTPException(status_code=404, detail="Novel not found or no similar novels found.") return {"similar_novels": similar_novels} except HTTPException as e: raise e except Exception as e: raise HTTPException(status_code=500, detail="Failed to search for similar novels.")