Dragneel's picture
Upload 9 files
946d6df verified
from fastapi import FastAPI, HTTPException
from sentence_transformers import SentenceTransformer
import faiss
import pandas as pd
import os
import random
app = FastAPI()
# Load SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Load FAISS index
faiss_index_path = "novel_index.index"
if os.path.exists(faiss_index_path):
faiss_index = faiss.read_index(faiss_index_path)
else:
raise HTTPException(status_code=500, detail="Failed to load FAISS index.")
# Load DataFrame
csv_path = "novel_df4.csv"
if os.path.exists(csv_path):
df = pd.read_csv(csv_path)
else:
raise HTTPException(status_code=500, detail="Failed to load CSV file.")
def find_novel_keywords(novel_name, df):
keywords = df.loc[df['name'].str.contains(novel_name, case=False, na=False), 'keyword'].values
if len(keywords) > 0:
return keywords[0]
else:
return None
def encode_and_search(keywords, index, model, k=10):
if keywords is None:
keywords = ""
elif not isinstance(keywords, str):
keywords = str(keywords)
query_embedding = model.encode([keywords])[0]
query_embedding = query_embedding.reshape(1, -1)
_, indices = index.search(query_embedding, k)
return indices
def process_search_results(indices, df, novel_name):
result_df = df.iloc[indices.ravel()]
result_df = result_df[result_df['name'].str.lower() != novel_name.lower()]
if result_df.empty:
return []
result_df.loc[:, 'genre'] = result_df['genre'].apply(lambda x: x.split(',') if isinstance(x, str) else [])
result_df.loc[:, 'tag'] = result_df['tag'].apply(lambda x: x.split(',') if isinstance(x, str) else [])
results = result_df[['name', 'genre','tag','image_url','source']].to_dict(orient='records')
# Remove duplicates
unique_results = []
seen_names = set()
for result in results:
if result['name'] not in seen_names:
unique_results.append(result)
seen_names.add(result['name'])
random.shuffle(unique_results)
return unique_results
def search_similar_by_novel_name(novel_name, index, df, model, k=10):
keywords = find_novel_keywords(novel_name, df)
indices = encode_and_search(keywords, index, model, k)
return process_search_results(indices, df, novel_name)
@app.get("/search/{novel_name}")
async def search_similar_novels(novel_name: str):
try:
similar_novels = search_similar_by_novel_name(novel_name, faiss_index, df, model, 30)
if not similar_novels:
raise HTTPException(status_code=404, detail="Novel not found or no similar novels found.")
return {"similar_novels": similar_novels}
except HTTPException as e:
raise e
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to search for similar novels.")