deepshelf-api / tests /test_api_endpoints.py
nice-bill's picture
initial commit
cdb73a8
import os
import unittest
from unittest.mock import MagicMock
import numpy as np
import pandas as pd
# FastAPI related imports
from fastapi.testclient import TestClient
import src.book_recommender.core.config as config
from src.book_recommender.api.dependencies import ( # Import actual dependencies to override
get_clusters_data,
get_recommender,
get_sentence_transformer_model,
)
from src.book_recommender.api.main import app as fastapi_app # Import the FastAPI app instance
from src.book_recommender.ml.recommender import BookRecommender
class TestFastAPIEndpoints(unittest.TestCase):
dummy_processed_data = pd.DataFrame(
{
"id": ["1", "2", "3", "4", "5"],
"title": ["Test Book 1", "Test Book 2", "Related Test Book 1", "Unrelated Book", "Sci-Fi Classic"],
"authors": ["Author A", "Author B", "Author A", "Author C", "Author D"],
"genres": ["Fiction", "Science Fiction", "Fiction", "Fantasy", "Science Fiction"],
"description": [
"Description for book 1",
"Description for book 2 with sci-fi themes",
"Another book by Author A",
"A magical adventure",
"Deep space exploration",
],
"title_lower": ["test book 1", "test book 2", "related test book 1", "unrelated book", "sci-fi classic"],
"authors_lower": ["author a", "author b", "author a", "author c", "author d"],
"combined_text": [
"test book 1 by author a. genres: fiction. " "description: description for book 1. tags: ",
"test book 2 by author b. genres: science fiction. "
"description: description for book 2 with sci-fi themes. tags: ",
"related test book 1 by author a. genres: fiction. " "description: another book by author a. tags: ",
"unrelated book by author c. genres: fantasy. " "description: a magical adventure. tags: ",
"sci-fi classic by author d. genres: science fiction. " "description: deep space exploration. tags: ",
],
"cluster_id": [0, 1, 0, 2, 1],
}
)
dummy_embeddings = np.random.rand(len(dummy_processed_data), config.EMBEDDING_DIMENSION).astype("float32")
dummy_cluster_names = {0: "Fiction Collection", 1: "Sci-Fi Collection", 2: "Fantasy Collection"}
def setUp(self):
super().setUp()
# Clear lru_cache for dependencies
get_recommender.cache_clear()
get_sentence_transformer_model.cache_clear()
get_clusters_data.cache_clear()
# 1. Create mock objects for dependencies
self.mock_model = MagicMock()
self.mock_recommender_instance = MagicMock(
spec=BookRecommender
) # Note: this is the instance returned by get_recommender
self.mock_clusters_data_value = ( # Note: this is the actual value returned by get_clusters_data
self.dummy_processed_data["cluster_id"].values,
self.dummy_cluster_names,
self.dummy_processed_data,
)
# 2. Configure mock behaviors
self.mock_model.encode.side_effect = lambda x, **kwargs: (
np.array([np.random.rand(config.EMBEDDING_DIMENSION)]).astype("float32")
if isinstance(x, str)
else np.random.rand(len(x), config.EMBEDDING_DIMENSION).astype("float32")
)
self.mock_recommender_instance.book_data = self.dummy_processed_data
self.mock_recommender_instance.embeddings = self.dummy_embeddings
self.mock_recommender_instance.get_recommendations.return_value = [
{
"id": "3",
"title": "Related Test Book 1",
"authors": "Author A",
"description": "Another book by Author A",
"genres": "Fiction",
"similarity": 0.8,
},
{
"id": "1",
"title": "Test Book 1",
"authors": "Author A",
"description": "Description for book 1",
"genres": "Fiction",
"similarity": 0.7,
},
]
self.mock_recommender_instance.get_recommendations_from_vector.return_value = [
{
"id": "5",
"title": "Sci-Fi Classic",
"authors": "Author D",
"description": "Deep space exploration",
"genres": "Science Fiction",
"similarity": 0.9,
},
{
"id": "2",
"title": "Test Book 2",
"authors": "Author B",
"description": "Description for book 2 with sci-fi themes",
"genres": "Science Fiction",
"similarity": 0.8,
},
]
# 3. Override dependencies for the FastAPI app
fastapi_app.dependency_overrides[get_sentence_transformer_model] = lambda: self.mock_model
fastapi_app.dependency_overrides[get_recommender] = (
lambda: self.mock_recommender_instance
) # Return the instance
fastapi_app.dependency_overrides[get_clusters_data] = (
lambda: self.mock_clusters_data_value
) # Return the actual value
# self.client needs to be created *after* the dependencies are mocked
self.client = TestClient(fastapi_app)
# Ensure feedback file is clean before each test
feedback_file_path = os.path.join(config.BASE_DIR, "data", "feedback", "user_feedback.jsonl")
if os.path.exists(feedback_file_path):
os.remove(feedback_file_path)
def tearDown(self):
# Clear dependency overrides after each test
fastapi_app.dependency_overrides = {}
super().tearDown()
def test_health_check(self):
response = self.client.get("/health")
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.json(), {"status": "OK", "message": "BookFinder API is healthy and core services are loaded."}
)
def test_recommend_by_query(self):
response = self.client.post("/recommend/query", json={"query": "sci-fi books", "top_k": 2})
self.assertEqual(response.status_code, 200)
recommendations = response.json()
self.assertEqual(len(recommendations), 2)
self.assertEqual(recommendations[0]["book"]["title"], "Sci-Fi Classic")
self.assertEqual(recommendations[1]["book"]["title"], "Test Book 2")
def test_recommend_by_title(self):
response = self.client.post("/recommend/title", json={"title": "Test Book 1", "top_k": 2})
self.assertEqual(response.status_code, 200)
recommendations = response.json()
self.assertEqual(len(recommendations), 2)
self.assertEqual(recommendations[0]["book"]["title"], "Related Test Book 1")
self.assertEqual(recommendations[1]["book"]["title"], "Test Book 1")
def test_list_books(self):
response = self.client.get("/books?page=1&page_size=3")
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["total"], 5)
self.assertEqual(len(data["books"]), 3)
self.assertEqual(data["books"][0]["title"], "Test Book 1")
def test_search_books(self):
response = self.client.get("/books/search?query=author A&page=1&page_size=5")
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["total"], 2)
self.assertEqual(len(data["books"]), 2)
self.assertEqual(data["books"][0]["title"], "Test Book 1")
self.assertEqual(data["books"][1]["title"], "Related Test Book 1")
def test_get_stats(self):
response = self.client.get("/stats")
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["total_books"], 5)
self.assertIn("fiction", data["genres_count"])
self.assertIn("author a", data["authors_count"])
def test_list_clusters(self):
response = self.client.get("/clusters")
self.assertEqual(response.status_code, 200)
clusters = response.json()
self.assertEqual(len(clusters), 3)
self.assertEqual(clusters[0]["name"], "Fiction Collection")
self.assertEqual(clusters[0]["size"], 2) # Test Book 1, Related Test Book 1
self.assertEqual(len(clusters[0]["top_books"]), 2) # Should sample up to 3, but there are only 2
def test_get_books_in_cluster(self):
response = self.client.get("/clusters/0?page=1&page_size=10")
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["total"], 2)
self.assertEqual(len(data["books"]), 2)
self.assertEqual(data["books"][0]["title"], "Test Book 1")
def test_get_cluster_sample(self):
response = self.client.get("/clusters/1/sample?sample_size=1")
self.assertEqual(response.status_code, 200)
books = response.json()
self.assertEqual(len(books), 1)
self.assertEqual(books[0]["genres"], ["Science Fiction"]) # Only Sci-Fi books in cluster 1
def test_explain_recommendation_endpoint(self):
sample_book = {
"id": "1",
"title": "Test Book 1",
"authors": ["Author A"],
"description": "Description for book 1",
"genres": ["Fiction"],
"cover_image_url": None,
}
response = self.client.post(
"/explain",
json={"query_text": "A book about fiction", "recommended_book": sample_book, "similarity_score": 0.75},
)
self.assertEqual(response.status_code, 200)
explanation = response.json()
self.assertIn("match_score", explanation)
self.assertIn("confidence", explanation)
self.assertIn("summary", explanation)
self.assertIn("details", explanation)
def test_submit_feedback_and_get_stats(self):
# Submit feedback
feedback_payload = {
"query": "fantasy adventure",
"book_id": "4", # Unrelated Book
"feedback_type": "positive",
"session_id": "test_session_1",
}
response = self.client.post("/feedback", json=feedback_payload)
self.assertEqual(response.status_code, 204) # No Content
feedback_payload_2 = {
"query": "sci-fi classic",
"book_id": "5", # Sci-Fi Classic
"feedback_type": "negative",
"session_id": "test_session_1",
}
response = self.client.post("/feedback", json=feedback_payload_2)
self.assertEqual(response.status_code, 204) # No Content
# Get stats
response = self.client.get("/feedback/stats")
self.assertEqual(response.status_code, 200)
stats = response.json()
self.assertEqual(stats["total_feedback"], 2)
self.assertEqual(stats["positive_feedback"], 1)
self.assertEqual(stats["negative_feedback"], 1)
self.assertIn("Unrelated Book", stats["feedback_by_book_title"])
self.assertIn("fantasy adventure", stats["feedback_by_query"])