Spaces:
Running
Running
File size: 2,667 Bytes
e0cea98 5b02b7b e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 38b8118 e0cea98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import unittest
from unittest.mock import AsyncMock, MagicMock
from crawlgpt.core.LLMBasedCrawler import Model
from crawlgpt.core.DatabaseHandler import VectorDatabase
class TestIntegration(unittest.IsolatedAsyncioTestCase): # Use IsolatedAsyncioTestCase for async tests
def setUp(self):
"""
Set up the integration test environment.
"""
self.model = Model()
# Mock the chunking of text
self.model.chunk_text = MagicMock(return_value=["Chunk 1", "Chunk 2", "Chunk 3"])
# Mock the summarizer
self.model.summarizer = MagicMock()
self.model.summarizer.generate_summary = MagicMock(side_effect=lambda chunk: f"Summary of {chunk}")
# Mock the database and its methods
self.model.database = MagicMock()
self.model.database.data = [] # Simulated in-memory database storage
def mock_add_data(chunk, summary):
# Append chunks and summaries to the simulated database
self.model.database.data.append({"chunk": chunk, "summary": summary})
self.model.database.add_data = MagicMock(side_effect=mock_add_data)
# Mock URL content extraction
self.model.extract_content_from_url = AsyncMock()
async def test_end_to_end_flow(self):
"""
Test the full pipeline: URL extraction, summarization, and response generation.
"""
print("[DEBUG] Starting integration test.")
# Mock URL and simulate content extraction
url = "https://example.com"
print(f"[DEBUG] Mocking URL: {url}")
await self.model.extract_content_from_url(url)
# Simulate the summarization and database insertion pipeline
chunks = self.model.chunk_text("Example text for testing.")
for chunk in chunks:
summary = self.model.summarizer.generate_summary(chunk)
self.model.database.add_data(chunk, summary)
# Validate database contents
database_size = len(self.model.database.data)
print(f"[DEBUG] Database size after processing: {database_size}")
self.assertGreater(database_size, 0)
# Generate a query response
query = "What is the test about?"
print(f"[DEBUG] Running query: {query}")
self.model.generate_response = MagicMock(return_value="This is a test response.")
response = self.model.generate_response(query, temperature=0.5, max_tokens=100, model="llama-3.1-8b-instant")
print(f"[DEBUG] Query response: {response}")
self.assertIsInstance(response, str)
self.assertGreater(len(response), 0)
if __name__ == "__main__":
unittest.main()
|