Spaces:

nice-bill
/

deepshelf-api

Running

App Files Files Community

deepshelf-api / tests /test_api_endpoints.py

nice-bill

initial commit

cdb73a8 3 months ago

raw

history blame contribute delete

11.3 kB

	import os
	import unittest
	from unittest.mock import MagicMock

	import numpy as np
	import pandas as pd

	# FastAPI related imports
	from fastapi.testclient import TestClient

	import src.book_recommender.core.config as config
	from src.book_recommender.api.dependencies import ( # Import actual dependencies to override
	get_clusters_data,
	get_recommender,
	get_sentence_transformer_model,
	)
	from src.book_recommender.api.main import app as fastapi_app # Import the FastAPI app instance
	from src.book_recommender.ml.recommender import BookRecommender


	class TestFastAPIEndpoints(unittest.TestCase):

	dummy_processed_data = pd.DataFrame(
	{
	"id": ["1", "2", "3", "4", "5"],
	"title": ["Test Book 1", "Test Book 2", "Related Test Book 1", "Unrelated Book", "Sci-Fi Classic"],
	"authors": ["Author A", "Author B", "Author A", "Author C", "Author D"],
	"genres": ["Fiction", "Science Fiction", "Fiction", "Fantasy", "Science Fiction"],
	"description": [
	"Description for book 1",
	"Description for book 2 with sci-fi themes",
	"Another book by Author A",
	"A magical adventure",
	"Deep space exploration",
	],
	"title_lower": ["test book 1", "test book 2", "related test book 1", "unrelated book", "sci-fi classic"],
	"authors_lower": ["author a", "author b", "author a", "author c", "author d"],
	"combined_text": [
	"test book 1 by author a. genres: fiction. " "description: description for book 1. tags: ",
	"test book 2 by author b. genres: science fiction. "
	"description: description for book 2 with sci-fi themes. tags: ",
	"related test book 1 by author a. genres: fiction. " "description: another book by author a. tags: ",
	"unrelated book by author c. genres: fantasy. " "description: a magical adventure. tags: ",
	"sci-fi classic by author d. genres: science fiction. " "description: deep space exploration. tags: ",
	],
	"cluster_id": [0, 1, 0, 2, 1],
	}
	)
	dummy_embeddings = np.random.rand(len(dummy_processed_data), config.EMBEDDING_DIMENSION).astype("float32")

	dummy_cluster_names = {0: "Fiction Collection", 1: "Sci-Fi Collection", 2: "Fantasy Collection"}

	def setUp(self):
	super().setUp()

	# Clear lru_cache for dependencies
	get_recommender.cache_clear()
	get_sentence_transformer_model.cache_clear()
	get_clusters_data.cache_clear()

	# 1. Create mock objects for dependencies
	self.mock_model = MagicMock()
	self.mock_recommender_instance = MagicMock(
	spec=BookRecommender
	) # Note: this is the instance returned by get_recommender
	self.mock_clusters_data_value = ( # Note: this is the actual value returned by get_clusters_data
	self.dummy_processed_data["cluster_id"].values,
	self.dummy_cluster_names,
	self.dummy_processed_data,
	)

	# 2. Configure mock behaviors
	self.mock_model.encode.side_effect = lambda x, **kwargs: (
	np.array([np.random.rand(config.EMBEDDING_DIMENSION)]).astype("float32")
	if isinstance(x, str)
	else np.random.rand(len(x), config.EMBEDDING_DIMENSION).astype("float32")
	)

	self.mock_recommender_instance.book_data = self.dummy_processed_data
	self.mock_recommender_instance.embeddings = self.dummy_embeddings
	self.mock_recommender_instance.get_recommendations.return_value = [
	{
	"id": "3",
	"title": "Related Test Book 1",
	"authors": "Author A",
	"description": "Another book by Author A",
	"genres": "Fiction",
	"similarity": 0.8,
	},
	{
	"id": "1",
	"title": "Test Book 1",
	"authors": "Author A",
	"description": "Description for book 1",
	"genres": "Fiction",
	"similarity": 0.7,
	},
	]
	self.mock_recommender_instance.get_recommendations_from_vector.return_value = [
	{
	"id": "5",
	"title": "Sci-Fi Classic",
	"authors": "Author D",
	"description": "Deep space exploration",
	"genres": "Science Fiction",
	"similarity": 0.9,
	},
	{
	"id": "2",
	"title": "Test Book 2",
	"authors": "Author B",
	"description": "Description for book 2 with sci-fi themes",
	"genres": "Science Fiction",
	"similarity": 0.8,
	},
	]

	# 3. Override dependencies for the FastAPI app
	fastapi_app.dependency_overrides[get_sentence_transformer_model] = lambda: self.mock_model
	fastapi_app.dependency_overrides[get_recommender] = (
	lambda: self.mock_recommender_instance
	) # Return the instance
	fastapi_app.dependency_overrides[get_clusters_data] = (
	lambda: self.mock_clusters_data_value
	) # Return the actual value

	# self.client needs to be created after the dependencies are mocked
	self.client = TestClient(fastapi_app)

	# Ensure feedback file is clean before each test
	feedback_file_path = os.path.join(config.BASE_DIR, "data", "feedback", "user_feedback.jsonl")
	if os.path.exists(feedback_file_path):
	os.remove(feedback_file_path)

	def tearDown(self):
	# Clear dependency overrides after each test
	fastapi_app.dependency_overrides = {}
	super().tearDown()

	def test_health_check(self):
	response = self.client.get("/health")
	self.assertEqual(response.status_code, 200)
	self.assertEqual(
	response.json(), {"status": "OK", "message": "BookFinder API is healthy and core services are loaded."}
	)

	def test_recommend_by_query(self):
	response = self.client.post("/recommend/query", json={"query": "sci-fi books", "top_k": 2})
	self.assertEqual(response.status_code, 200)
	recommendations = response.json()
	self.assertEqual(len(recommendations), 2)
	self.assertEqual(recommendations[0]["book"]["title"], "Sci-Fi Classic")
	self.assertEqual(recommendations[1]["book"]["title"], "Test Book 2")

	def test_recommend_by_title(self):
	response = self.client.post("/recommend/title", json={"title": "Test Book 1", "top_k": 2})
	self.assertEqual(response.status_code, 200)
	recommendations = response.json()
	self.assertEqual(len(recommendations), 2)
	self.assertEqual(recommendations[0]["book"]["title"], "Related Test Book 1")
	self.assertEqual(recommendations[1]["book"]["title"], "Test Book 1")

	def test_list_books(self):
	response = self.client.get("/books?page=1&page_size=3")
	self.assertEqual(response.status_code, 200)
	data = response.json()
	self.assertEqual(data["total"], 5)
	self.assertEqual(len(data["books"]), 3)
	self.assertEqual(data["books"][0]["title"], "Test Book 1")

	def test_search_books(self):
	response = self.client.get("/books/search?query=author A&page=1&page_size=5")
	self.assertEqual(response.status_code, 200)
	data = response.json()
	self.assertEqual(data["total"], 2)
	self.assertEqual(len(data["books"]), 2)
	self.assertEqual(data["books"][0]["title"], "Test Book 1")
	self.assertEqual(data["books"][1]["title"], "Related Test Book 1")

	def test_get_stats(self):
	response = self.client.get("/stats")
	self.assertEqual(response.status_code, 200)
	data = response.json()
	self.assertEqual(data["total_books"], 5)
	self.assertIn("fiction", data["genres_count"])
	self.assertIn("author a", data["authors_count"])

	def test_list_clusters(self):
	response = self.client.get("/clusters")
	self.assertEqual(response.status_code, 200)
	clusters = response.json()
	self.assertEqual(len(clusters), 3)
	self.assertEqual(clusters[0]["name"], "Fiction Collection")
	self.assertEqual(clusters[0]["size"], 2) # Test Book 1, Related Test Book 1
	self.assertEqual(len(clusters[0]["top_books"]), 2) # Should sample up to 3, but there are only 2

	def test_get_books_in_cluster(self):
	response = self.client.get("/clusters/0?page=1&page_size=10")
	self.assertEqual(response.status_code, 200)
	data = response.json()
	self.assertEqual(data["total"], 2)
	self.assertEqual(len(data["books"]), 2)
	self.assertEqual(data["books"][0]["title"], "Test Book 1")

	def test_get_cluster_sample(self):
	response = self.client.get("/clusters/1/sample?sample_size=1")
	self.assertEqual(response.status_code, 200)
	books = response.json()
	self.assertEqual(len(books), 1)
	self.assertEqual(books[0]["genres"], ["Science Fiction"]) # Only Sci-Fi books in cluster 1

	def test_explain_recommendation_endpoint(self):
	sample_book = {
	"id": "1",
	"title": "Test Book 1",
	"authors": ["Author A"],
	"description": "Description for book 1",
	"genres": ["Fiction"],
	"cover_image_url": None,
	}
	response = self.client.post(
	"/explain",
	json={"query_text": "A book about fiction", "recommended_book": sample_book, "similarity_score": 0.75},
	)
	self.assertEqual(response.status_code, 200)
	explanation = response.json()
	self.assertIn("match_score", explanation)
	self.assertIn("confidence", explanation)
	self.assertIn("summary", explanation)
	self.assertIn("details", explanation)

	def test_submit_feedback_and_get_stats(self):
	# Submit feedback
	feedback_payload = {
	"query": "fantasy adventure",
	"book_id": "4", # Unrelated Book
	"feedback_type": "positive",
	"session_id": "test_session_1",
	}
	response = self.client.post("/feedback", json=feedback_payload)
	self.assertEqual(response.status_code, 204) # No Content

	feedback_payload_2 = {
	"query": "sci-fi classic",
	"book_id": "5", # Sci-Fi Classic
	"feedback_type": "negative",
	"session_id": "test_session_1",
	}
	response = self.client.post("/feedback", json=feedback_payload_2)
	self.assertEqual(response.status_code, 204) # No Content

	# Get stats
	response = self.client.get("/feedback/stats")
	self.assertEqual(response.status_code, 200)
	stats = response.json()
	self.assertEqual(stats["total_feedback"], 2)
	self.assertEqual(stats["positive_feedback"], 1)
	self.assertEqual(stats["negative_feedback"], 1)
	self.assertIn("Unrelated Book", stats["feedback_by_book_title"])
	self.assertIn("fantasy adventure", stats["feedback_by_query"])