Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import pandas as pd | |
| import os | |
| import random | |
| app = FastAPI() | |
| # Load SentenceTransformer model | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Load FAISS index | |
| faiss_index_path = "novel_index.index" | |
| if os.path.exists(faiss_index_path): | |
| faiss_index = faiss.read_index(faiss_index_path) | |
| else: | |
| raise HTTPException(status_code=500, detail="Failed to load FAISS index.") | |
| # Load DataFrame | |
| csv_path = "novel_df4.csv" | |
| if os.path.exists(csv_path): | |
| df = pd.read_csv(csv_path) | |
| else: | |
| raise HTTPException(status_code=500, detail="Failed to load CSV file.") | |
| def find_novel_keywords(novel_name, df): | |
| keywords = df.loc[df['name'].str.contains(novel_name, case=False, na=False), 'keyword'].values | |
| if len(keywords) > 0: | |
| return keywords[0] | |
| else: | |
| return None | |
| def encode_and_search(keywords, index, model, k=10): | |
| if keywords is None: | |
| keywords = "" | |
| elif not isinstance(keywords, str): | |
| keywords = str(keywords) | |
| query_embedding = model.encode([keywords])[0] | |
| query_embedding = query_embedding.reshape(1, -1) | |
| _, indices = index.search(query_embedding, k) | |
| return indices | |
| def process_search_results(indices, df, novel_name): | |
| result_df = df.iloc[indices.ravel()] | |
| result_df = result_df[result_df['name'].str.lower() != novel_name.lower()] | |
| if result_df.empty: | |
| return [] | |
| result_df.loc[:, 'genre'] = result_df['genre'].apply(lambda x: x.split(',') if isinstance(x, str) else []) | |
| result_df.loc[:, 'tag'] = result_df['tag'].apply(lambda x: x.split(',') if isinstance(x, str) else []) | |
| results = result_df[['name', 'genre','tag','image_url','source']].to_dict(orient='records') | |
| # Remove duplicates | |
| unique_results = [] | |
| seen_names = set() | |
| for result in results: | |
| if result['name'] not in seen_names: | |
| unique_results.append(result) | |
| seen_names.add(result['name']) | |
| random.shuffle(unique_results) | |
| return unique_results | |
| def search_similar_by_novel_name(novel_name, index, df, model, k=10): | |
| keywords = find_novel_keywords(novel_name, df) | |
| indices = encode_and_search(keywords, index, model, k) | |
| return process_search_results(indices, df, novel_name) | |
| async def search_similar_novels(novel_name: str): | |
| try: | |
| similar_novels = search_similar_by_novel_name(novel_name, faiss_index, df, model, 30) | |
| if not similar_novels: | |
| raise HTTPException(status_code=404, detail="Novel not found or no similar novels found.") | |
| return {"similar_novels": similar_novels} | |
| except HTTPException as e: | |
| raise e | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail="Failed to search for similar novels.") |