File size: 2,667 Bytes
e0cea98
 
5b02b7b
 
e0cea98
 
38b8118
e0cea98
 
 
 
 
38b8118
 
 
 
 
e0cea98
 
38b8118
 
 
 
 
 
 
 
 
 
 
 
 
e0cea98
 
 
 
 
38b8118
e0cea98
38b8118
 
 
e0cea98
 
38b8118
 
 
 
 
 
e0cea98
38b8118
 
 
e0cea98
 
 
38b8118
 
e0cea98
38b8118
 
e0cea98
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import unittest
from unittest.mock import AsyncMock, MagicMock
from crawlgpt.core.LLMBasedCrawler import Model
from crawlgpt.core.DatabaseHandler import VectorDatabase


class TestIntegration(unittest.IsolatedAsyncioTestCase):  # Use IsolatedAsyncioTestCase for async tests
    def setUp(self):
        """
        Set up the integration test environment.
        """
        self.model = Model()

        # Mock the chunking of text
        self.model.chunk_text = MagicMock(return_value=["Chunk 1", "Chunk 2", "Chunk 3"])

        # Mock the summarizer
        self.model.summarizer = MagicMock()
        self.model.summarizer.generate_summary = MagicMock(side_effect=lambda chunk: f"Summary of {chunk}")

        # Mock the database and its methods
        self.model.database = MagicMock()
        self.model.database.data = []  # Simulated in-memory database storage

        def mock_add_data(chunk, summary):
            # Append chunks and summaries to the simulated database
            self.model.database.data.append({"chunk": chunk, "summary": summary})

        self.model.database.add_data = MagicMock(side_effect=mock_add_data)

        # Mock URL content extraction
        self.model.extract_content_from_url = AsyncMock()

    async def test_end_to_end_flow(self):
        """
        Test the full pipeline: URL extraction, summarization, and response generation.
        """
        print("[DEBUG] Starting integration test.")

        # Mock URL and simulate content extraction
        url = "https://example.com"
        print(f"[DEBUG] Mocking URL: {url}")
        await self.model.extract_content_from_url(url)

        # Simulate the summarization and database insertion pipeline
        chunks = self.model.chunk_text("Example text for testing.")
        for chunk in chunks:
            summary = self.model.summarizer.generate_summary(chunk)
            self.model.database.add_data(chunk, summary)

        # Validate database contents
        database_size = len(self.model.database.data)
        print(f"[DEBUG] Database size after processing: {database_size}")
        self.assertGreater(database_size, 0)

        # Generate a query response
        query = "What is the test about?"
        print(f"[DEBUG] Running query: {query}")
        self.model.generate_response = MagicMock(return_value="This is a test response.")
        response = self.model.generate_response(query, temperature=0.5, max_tokens=100, model="llama-3.1-8b-instant")
        print(f"[DEBUG] Query response: {response}")

        self.assertIsInstance(response, str)
        self.assertGreater(len(response), 0)


if __name__ == "__main__":
    unittest.main()