Spaces:

jatinmehra
/

CRAWL-GPT-CHAT

Running

CRAWL-GPT-CHAT / tests /test_core /test_integration.py

Jatin Mehra

Add pytest configuration and restructure test files; move tests to core directory and update imports

5b02b7b 4 months ago

2.67 kB

	import unittest
	from unittest.mock import AsyncMock, MagicMock
	from crawlgpt.core.LLMBasedCrawler import Model
	from crawlgpt.core.DatabaseHandler import VectorDatabase


	class TestIntegration(unittest.IsolatedAsyncioTestCase): # Use IsolatedAsyncioTestCase for async tests
	def setUp(self):
	"""
	Set up the integration test environment.
	"""
	self.model = Model()

	# Mock the chunking of text
	self.model.chunk_text = MagicMock(return_value=["Chunk 1", "Chunk 2", "Chunk 3"])

	# Mock the summarizer
	self.model.summarizer = MagicMock()
	self.model.summarizer.generate_summary = MagicMock(side_effect=lambda chunk: f"Summary of {chunk}")

	# Mock the database and its methods
	self.model.database = MagicMock()
	self.model.database.data = [] # Simulated in-memory database storage

	def mock_add_data(chunk, summary):
	# Append chunks and summaries to the simulated database
	self.model.database.data.append({"chunk": chunk, "summary": summary})

	self.model.database.add_data = MagicMock(side_effect=mock_add_data)

	# Mock URL content extraction
	self.model.extract_content_from_url = AsyncMock()

	async def test_end_to_end_flow(self):
	"""
	Test the full pipeline: URL extraction, summarization, and response generation.
	"""
	print("[DEBUG] Starting integration test.")

	# Mock URL and simulate content extraction
	url = "https://example.com"
	print(f"[DEBUG] Mocking URL: {url}")
	await self.model.extract_content_from_url(url)

	# Simulate the summarization and database insertion pipeline
	chunks = self.model.chunk_text("Example text for testing.")
	for chunk in chunks:
	summary = self.model.summarizer.generate_summary(chunk)
	self.model.database.add_data(chunk, summary)

	# Validate database contents
	database_size = len(self.model.database.data)
	print(f"[DEBUG] Database size after processing: {database_size}")
	self.assertGreater(database_size, 0)

	# Generate a query response
	query = "What is the test about?"
	print(f"[DEBUG] Running query: {query}")
	self.model.generate_response = MagicMock(return_value="This is a test response.")
	response = self.model.generate_response(query, temperature=0.5, max_tokens=100, model="llama-3.1-8b-instant")
	print(f"[DEBUG] Query response: {response}")

	self.assertIsInstance(response, str)
	self.assertGreater(len(response), 0)


	if __name__ == "__main__":
	unittest.main()