Spaces:
Running
Running
Jatin Mehra
Add pytest configuration and restructure test files; move tests to core directory and update imports
5b02b7b
import unittest | |
from unittest.mock import AsyncMock, MagicMock | |
from crawlgpt.core.LLMBasedCrawler import Model | |
from crawlgpt.core.DatabaseHandler import VectorDatabase | |
class TestIntegration(unittest.IsolatedAsyncioTestCase): # Use IsolatedAsyncioTestCase for async tests | |
def setUp(self): | |
""" | |
Set up the integration test environment. | |
""" | |
self.model = Model() | |
# Mock the chunking of text | |
self.model.chunk_text = MagicMock(return_value=["Chunk 1", "Chunk 2", "Chunk 3"]) | |
# Mock the summarizer | |
self.model.summarizer = MagicMock() | |
self.model.summarizer.generate_summary = MagicMock(side_effect=lambda chunk: f"Summary of {chunk}") | |
# Mock the database and its methods | |
self.model.database = MagicMock() | |
self.model.database.data = [] # Simulated in-memory database storage | |
def mock_add_data(chunk, summary): | |
# Append chunks and summaries to the simulated database | |
self.model.database.data.append({"chunk": chunk, "summary": summary}) | |
self.model.database.add_data = MagicMock(side_effect=mock_add_data) | |
# Mock URL content extraction | |
self.model.extract_content_from_url = AsyncMock() | |
async def test_end_to_end_flow(self): | |
""" | |
Test the full pipeline: URL extraction, summarization, and response generation. | |
""" | |
print("[DEBUG] Starting integration test.") | |
# Mock URL and simulate content extraction | |
url = "https://example.com" | |
print(f"[DEBUG] Mocking URL: {url}") | |
await self.model.extract_content_from_url(url) | |
# Simulate the summarization and database insertion pipeline | |
chunks = self.model.chunk_text("Example text for testing.") | |
for chunk in chunks: | |
summary = self.model.summarizer.generate_summary(chunk) | |
self.model.database.add_data(chunk, summary) | |
# Validate database contents | |
database_size = len(self.model.database.data) | |
print(f"[DEBUG] Database size after processing: {database_size}") | |
self.assertGreater(database_size, 0) | |
# Generate a query response | |
query = "What is the test about?" | |
print(f"[DEBUG] Running query: {query}") | |
self.model.generate_response = MagicMock(return_value="This is a test response.") | |
response = self.model.generate_response(query, temperature=0.5, max_tokens=100, model="llama-3.1-8b-instant") | |
print(f"[DEBUG] Query response: {response}") | |
self.assertIsInstance(response, str) | |
self.assertGreater(len(response), 0) | |
if __name__ == "__main__": | |
unittest.main() | |