Spaces:

NeerajCodz
/

scrapeRL

Running

NeerajCodz commited on Mar 28

Commit

e8d7c11

1 Parent(s): 864b733

test: add comprehensive API and core module tests

- Add 30+ new tests for memory, tasks, episode modules
- Episode tests cover lifecycle, steps, manager operations
- Memory API tests cover store, query, delete operations
- Tasks API tests cover list, filter, create operations
- All 101 tests passing with 44% coverage

Files changed (12) hide show

backend/.coverage +0 -0
backend/app/agents/__pycache__/coordinator.cpython-314.pyc +0 -0
backend/app/agents/__pycache__/memory_agent.cpython-314.pyc +0 -0
backend/app/api/routes/__pycache__/agents.cpython-314.pyc +0 -0
backend/app/memory/__pycache__/long_term.cpython-314.pyc +0 -0
backend/app/memory/__pycache__/short_term.cpython-314.pyc +0 -0
backend/app/memory/__pycache__/working.cpython-314.pyc +0 -0
backend/app/models/__pycache__/router.cpython-314.pyc +0 -0
backend/tests/test_api/test_memory.py +160 -0
backend/tests/test_api/test_tasks.py +131 -0
backend/tests/test_core/test_episode.py +330 -0
backend/tests/test_models/test_base_simple.py +68 -0

backend/.coverage ADDED Viewed

Binary file (53.2 kB). View file

backend/app/agents/__pycache__/coordinator.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/agents/__pycache__/coordinator.cpython-314.pyc and b/backend/app/agents/__pycache__/coordinator.cpython-314.pyc differ

backend/app/agents/__pycache__/memory_agent.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/agents/__pycache__/memory_agent.cpython-314.pyc and b/backend/app/agents/__pycache__/memory_agent.cpython-314.pyc differ

backend/app/api/routes/__pycache__/agents.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/api/routes/__pycache__/agents.cpython-314.pyc and b/backend/app/api/routes/__pycache__/agents.cpython-314.pyc differ

backend/app/memory/__pycache__/long_term.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/memory/__pycache__/long_term.cpython-314.pyc and b/backend/app/memory/__pycache__/long_term.cpython-314.pyc differ

backend/app/memory/__pycache__/short_term.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/memory/__pycache__/short_term.cpython-314.pyc and b/backend/app/memory/__pycache__/short_term.cpython-314.pyc differ

backend/app/memory/__pycache__/working.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/memory/__pycache__/working.cpython-314.pyc and b/backend/app/memory/__pycache__/working.cpython-314.pyc differ

backend/app/models/__pycache__/router.cpython-314.pyc CHANGED Viewed

Binary files a/backend/app/models/__pycache__/router.cpython-314.pyc and b/backend/app/models/__pycache__/router.cpython-314.pyc differ

backend/tests/test_api/test_memory.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""Tests for memory API routes."""
+import pytest
+from fastapi.testclient import TestClient
+class TestMemoryAPI:
+    """Test memory API endpoints."""
+    def test_store_memory_entry(self, client: TestClient) -> None:
+        """Test POST /api/memory/store creates new memory entry."""
+        payload = {
+            "memory_type": "short_term",
+            "content": {
+                "observation": "User clicked login button",
+                "action": "click",
+            },
+            "metadata": {"url": "https://example.com"},
+            "episode_id": "ep_001",
+            "agent_id": "agent_test",
+        }
+        response = client.post("/api/memory/store", json=payload)
+        assert response.status_code == 201
+        data = response.json()
+        assert "id" in data
+        assert data["memory_type"] == "short_term"
+        assert data["content"] == payload["content"]
+        assert data["episode_id"] == "ep_001"
+        assert "timestamp" in data
+    def test_store_memory_all_types(self, client: TestClient) -> None:
+        """Test storing memory with all valid types."""
+        valid_types = ["short_term", "working", "long_term", "shared"]
+        for memory_type in valid_types:
+            payload = {
+                "memory_type": memory_type,
+                "content": {"test": f"data for {memory_type}"},
+            }
+            response = client.post("/api/memory/store", json=payload)
+            assert response.status_code == 201
+            data = response.json()
+            assert data["memory_type"] == memory_type
+    def test_store_memory_invalid_type(self, client: TestClient) -> None:
+        """Test storing memory with invalid type."""
+        payload = {"memory_type": "invalid_type", "content": {"test": "data"}}
+        response = client.post("/api/memory/store", json=payload)
+        assert response.status_code == 422
+    def test_get_memory_entry(self, client: TestClient) -> None:
+        """Test GET /api/memory/{entry_id}."""
+        # Store first
+        payload = {
+            "memory_type": "long_term",
+            "content": {"knowledge": "test data"},
+        }
+        store_response = client.post("/api/memory/store", json=payload)
+        assert store_response.status_code == 201
+        entry_id = store_response.json()["id"]
+        # Retrieve
+        response = client.get(f"/api/memory/{entry_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == entry_id
+        assert data["memory_type"] == "long_term"
+    def test_get_nonexistent_memory(self, client: TestClient) -> None:
+        """Test GET /api/memory/{entry_id} for non-existent."""
+        response = client.get("/api/memory/nonexistent-id-12345")
+        assert response.status_code == 404
+    def test_delete_memory_entry(self, client: TestClient) -> None:
+        """Test DELETE /api/memory/{entry_id}."""
+        # Store first
+        payload = {
+            "memory_type": "short_term",
+            "content": {"temporary": "data"},
+        }
+        store_response = client.post("/api/memory/store", json=payload)
+        assert store_response.status_code == 201
+        entry_id = store_response.json()["id"]
+        # Delete
+        response = client.delete(f"/api/memory/{entry_id}")
+        assert response.status_code == 204
+        # Verify deleted
+        get_response = client.get(f"/api/memory/{entry_id}")
+        assert get_response.status_code == 404
+    def test_query_memory(self, client: TestClient) -> None:
+        """Test POST /api/memory/query."""
+        # Store some entries first
+        for i in range(3):
+            payload = {
+                "memory_type": "short_term",
+                "content": {"index": i, "data": f"test_{i}"},
+            }
+            client.post("/api/memory/store", json=payload)
+        # Query
+        query_payload = {"query": "test", "limit": 10}
+        response = client.post("/api/memory/query", json=query_payload)
+        assert response.status_code == 200
+        data = response.json()
+        assert "entries" in data
+        assert "total_found" in data
+    def test_get_memory_stats(self, client: TestClient) -> None:
+        """Test GET /api/memory/stats/overview."""
+        response = client.get("/api/memory/stats/overview")
+        assert response.status_code == 200
+        data = response.json()
+        assert "short_term_count" in data
+        assert "working_count" in data
+        assert "long_term_count" in data
+        assert "shared_count" in data
+        assert "total_count" in data
+    def test_clear_memory_layer(self, client: TestClient) -> None:
+        """Test DELETE /api/memory/clear/{memory_type}."""
+        # Store entries
+        payload = {
+            "memory_type": "short_term",
+            "content": {"test": "data"},
+        }
+        client.post("/api/memory/store", json=payload)
+        # Clear
+        response = client.delete("/api/memory/clear/short_term")
+        assert response.status_code == 204
+    def test_consolidate_memory(self, client: TestClient) -> None:
+        """Test POST /api/memory/consolidate."""
+        # Store short-term entries
+        for i in range(3):
+            payload = {
+                "memory_type": "short_term",
+                "content": {"index": i},
+            }
+            client.post("/api/memory/store", json=payload)
+        # Consolidate
+        response = client.post("/api/memory/consolidate")
+        assert response.status_code == 200
+        data = response.json()
+        assert "consolidated_count" in data

backend/tests/test_api/test_tasks.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""Tests for tasks API routes."""
+import pytest
+from fastapi.testclient import TestClient
+class TestTasksAPI:
+    """Test tasks API endpoints."""
+    def test_list_tasks(self, client: TestClient) -> None:
+        """Test GET /api/tasks/ returns task list."""
+        response = client.get("/api/tasks/")
+        assert response.status_code == 200
+        data = response.json()
+        assert "tasks" in data
+        assert "total" in data
+        assert "page" in data
+        assert "page_size" in data
+        assert isinstance(data["tasks"], list)
+    def test_list_tasks_pagination(self, client: TestClient) -> None:
+        """Test task list pagination."""
+        response = client.get("/api/tasks/?page=1&page_size=2")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["page"] == 1
+        assert data["page_size"] == 2
+    def test_list_tasks_filter_by_difficulty(self, client: TestClient) -> None:
+        """Test filtering tasks by difficulty."""
+        response = client.get("/api/tasks/?difficulty=easy")
+        assert response.status_code == 200
+        data = response.json()
+        for task in data["tasks"]:
+            assert task["difficulty"] == "easy"
+    def test_list_tasks_filter_by_type(self, client: TestClient) -> None:
+        """Test filtering tasks by type."""
+        response = client.get("/api/tasks/?task_type=single_page")
+        assert response.status_code == 200
+        data = response.json()
+        for task in data["tasks"]:
+            assert task["task_type"] == "single_page"
+    def test_list_tasks_filter_by_tag(self, client: TestClient) -> None:
+        """Test filtering tasks by tag."""
+        response = client.get("/api/tasks/?tag=ecommerce")
+        assert response.status_code == 200
+        data = response.json()
+        for task in data["tasks"]:
+            assert "ecommerce" in task["tags"]
+    def test_get_task_by_id(self, client: TestClient) -> None:
+        """Test GET /api/tasks/{task_id}."""
+        response = client.get("/api/tasks/task_001")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == "task_001"
+        assert "name" in data
+        assert "description" in data
+        assert "fields_to_extract" in data
+    def test_get_nonexistent_task(self, client: TestClient) -> None:
+        """Test GET /api/tasks/{task_id} for non-existent task."""
+        response = client.get("/api/tasks/nonexistent-task-id")
+        assert response.status_code == 404
+    def test_create_task(self, client: TestClient) -> None:
+        """Test POST /api/tasks/ creates a new task."""
+        import uuid
+        task_id = f"test_task_{uuid.uuid4().hex[:8]}"
+        payload = {
+            "id": task_id,
+            "name": "Test Task",
+            "description": "A test scraping task",
+            "task_type": "single_page",
+            "difficulty": "easy",
+            "target_url": "https://example.com/test",
+            "fields_to_extract": [
+                {
+                    "name": "title",
+                    "description": "Page title",
+                    "field_type": "string",
+                    "required": True,
+                }
+            ],
+            "success_criteria": {"min_accuracy": 0.8},
+        }
+        response = client.post("/api/tasks/", json=payload)
+        assert response.status_code == 201
+        data = response.json()
+        assert data["id"] == task_id
+        assert data["name"] == "Test Task"
+    def test_create_duplicate_task(self, client: TestClient) -> None:
+        """Test creating duplicate task returns conflict."""
+        payload = {
+            "id": "task_001",  # Existing task ID
+            "name": "Duplicate Task",
+            "description": "Should conflict",
+            "task_type": "single_page",
+            "difficulty": "easy",
+            "fields_to_extract": [
+                {"name": "test", "description": "test"}
+            ],
+            "success_criteria": {},
+        }
+        response = client.post("/api/tasks/", json=payload)
+        assert response.status_code == 409
+    def test_get_task_types(self, client: TestClient) -> None:
+        """Test GET /api/tasks/types/ returns task types."""
+        response = client.get("/api/tasks/types/")
+        assert response.status_code == 200
+        data = response.json()
+        assert "task_types" in data
+        assert "difficulties" in data
+        assert "single_page" in data["task_types"]
+        assert "easy" in data["difficulties"]

backend/tests/test_core/test_episode.py ADDED Viewed

	@@ -0,0 +1,330 @@

+"""Tests for episode management."""
+import pytest
+from app.core.episode import Episode, EpisodeStep, EpisodeStatus, EpisodeManager
+class TestEpisode:
+    """Test Episode class."""
+    def test_episode_creation(self) -> None:
+        """Test creating an episode."""
+        episode = Episode(
+            episode_id="ep_001",
+            task_id="task_001",
+            max_steps=10,
+        )
+        assert episode.episode_id == "ep_001"
+        assert episode.task_id == "task_001"
+        assert episode.max_steps == 10
+        assert episode.status == EpisodeStatus.PENDING
+        assert len(episode.steps) == 0
+    def test_episode_start(self) -> None:
+        """Test starting an episode."""
+        episode = Episode(episode_id="ep_002", task_id="task_002")
+        episode.start()
+        assert episode.status == EpisodeStatus.RUNNING
+        assert episode.started_at is not None
+    def test_episode_add_step(self) -> None:
+        """Test adding a step to episode."""
+        episode = Episode(episode_id="ep_003", task_id="task_003")
+        episode.start()
+        step = episode.add_step(
+            action_type="navigate",
+            action_params={"target": "/login"},
+            reward=0.5,
+            reward_breakdown={"progress": 0.5},
+            observation_summary={"url": "https://example.com"},
+        )
+        assert len(episode.steps) == 1
+        assert episode.steps[0].step_number == 1
+        assert episode.total_reward == 0.5
+    def test_episode_multiple_steps(self) -> None:
+        """Test adding multiple steps."""
+        episode = Episode(episode_id="ep_004", task_id="task_004")
+        episode.start()
+        rewards = [0.1, 0.2, 0.3, 0.4]
+        for i, reward in enumerate(rewards):
+            episode.add_step(
+                action_type="test",
+                action_params={"step": i},
+                reward=reward,
+                reward_breakdown={"base": reward},
+                observation_summary={"step": i},
+            )
+        assert len(episode.steps) == 4
+        assert episode.total_reward == pytest.approx(1.0)
+        assert episode.current_step == 4
+    def test_episode_completion(self) -> None:
+        """Test completing an episode."""
+        episode = Episode(episode_id="ep_005", task_id="task_005")
+        episode.start()
+        episode.complete(success=True)
+        assert episode.status == EpisodeStatus.COMPLETED
+        assert episode.ended_at is not None
+    def test_episode_failure(self) -> None:
+        """Test failing an episode."""
+        episode = Episode(episode_id="ep_006", task_id="task_006")
+        episode.start()
+        episode.fail(reason="Test failure")
+        assert episode.status == EpisodeStatus.FAILED
+        assert episode.failure_reason == "Test failure"
+    def test_episode_truncation(self) -> None:
+        """Test truncating an episode."""
+        episode = Episode(episode_id="ep_007", task_id="task_007", max_steps=5)
+        episode.start()
+        # Add steps up to max
+        for i in range(5):
+            episode.add_step(
+                action_type="test",
+                action_params={},
+                reward=0.1,
+                reward_breakdown={"base": 0.1},
+                observation_summary={},
+            )
+        episode.truncate()
+        assert episode.status == EpisodeStatus.TRUNCATED
+    def test_episode_is_terminal(self) -> None:
+        """Test terminal state check."""
+        episode = Episode(episode_id="ep_008", task_id="task_008")
+        assert not episode.is_terminal
+        episode.start()
+        assert not episode.is_terminal
+        episode.complete(success=True)
+        assert episode.is_terminal
+    def test_episode_duration(self) -> None:
+        """Test episode duration calculation."""
+        episode = Episode(episode_id="ep_009", task_id="task_009")
+        episode.start()
+        # Duration should be None before completion
+        import time
+        time.sleep(0.01)  # Small delay
+        episode.complete(success=True)
+        assert episode.duration_seconds is not None
+        assert episode.duration_seconds >= 0
+    def test_episode_average_reward(self) -> None:
+        """Test average reward calculation."""
+        episode = Episode(episode_id="ep_010", task_id="task_010")
+        episode.start()
+        rewards = [0.2, 0.4, 0.6]
+        for i, reward in enumerate(rewards):
+            episode.add_step(
+                action_type="test",
+                action_params={},
+                reward=reward,
+                reward_breakdown={"base": reward},
+                observation_summary={},
+            )
+        assert episode.average_reward == pytest.approx(0.4)
+    def test_episode_summary(self) -> None:
+        """Test episode summary."""
+        episode = Episode(episode_id="ep_011", task_id="task_011")
+        episode.start()
+        summary = episode.get_summary()
+        assert summary["episode_id"] == "ep_011"
+        assert summary["task_id"] == "task_011"
+        assert "status" in summary
+        assert "steps" in summary
+    def test_episode_cancel(self) -> None:
+        """Test episode cancellation."""
+        episode = Episode(episode_id="ep_012", task_id="task_012")
+        episode.start()
+        episode.cancel()
+        assert episode.status == EpisodeStatus.CANCELLED
+        assert episode.is_terminal
+    def test_episode_get_action_sequence(self) -> None:
+        """Test getting action sequence."""
+        episode = Episode(episode_id="ep_013", task_id="task_013")
+        episode.start()
+        episode.add_step("navigate", {}, 0.1, {}, {})
+        episode.add_step("click", {}, 0.2, {}, {})
+        episode.add_step("extract", {}, 0.3, {}, {})
+        actions = episode.get_action_sequence()
+        assert actions == ["navigate", "click", "extract"]
+    def test_episode_get_reward_history(self) -> None:
+        """Test getting reward history."""
+        episode = Episode(episode_id="ep_014", task_id="task_014")
+        episode.start()
+        episode.add_step("a", {}, 0.1, {}, {})
+        episode.add_step("b", {}, 0.2, {}, {})
+        episode.add_step("c", {}, 0.3, {}, {})
+        rewards = episode.get_reward_history()
+        assert rewards == [0.1, 0.2, 0.3]
+class TestEpisodeStep:
+    """Test EpisodeStep class."""
+    def test_step_creation(self) -> None:
+        """Test creating an episode step."""
+        from datetime import datetime, timezone
+        step = EpisodeStep(
+            step_number=1,
+            timestamp=datetime.now(timezone.utc).isoformat(),
+            action_type="click",
+            action_params={"selector": "#btn"},
+            reward=0.75,
+            reward_breakdown={"progress": 0.75},
+            observation_summary={"url": "https://example.com", "title": "Test"},
+        )
+        assert step.step_number == 1
+        assert step.action_type == "click"
+        assert step.action_params["selector"] == "#btn"
+        assert step.reward == 0.75
+    def test_step_with_error(self) -> None:
+        """Test step with error."""
+        from datetime import datetime, timezone
+        step = EpisodeStep(
+            step_number=1,
+            timestamp=datetime.now(timezone.utc).isoformat(),
+            action_type="click",
+            action_params={},
+            reward=-0.5,
+            reward_breakdown={"error": -0.5},
+            observation_summary={},
+            error="Element not found",
+            duration_ms=150.0,
+        )
+        assert step.error == "Element not found"
+        assert step.duration_ms == 150.0
+    def test_step_with_reasoning(self) -> None:
+        """Test step with action reasoning."""
+        from datetime import datetime, timezone
+        step = EpisodeStep(
+            step_number=1,
+            timestamp=datetime.now(timezone.utc).isoformat(),
+            action_type="extract",
+            action_params={"field": "price"},
+            action_reasoning="Extracting price from product page",
+            reward=0.5,
+            reward_breakdown={"extraction": 0.5},
+            observation_summary={},
+        )
+        assert step.action_reasoning == "Extracting price from product page"
+class TestEpisodeManager:
+    """Test EpisodeManager class."""
+    def test_manager_create_episode(self) -> None:
+        """Test creating episode via manager."""
+        manager = EpisodeManager()
+        episode = manager.create_episode("ep_100", "task_100")
+        assert episode.episode_id == "ep_100"
+        assert episode.task_id == "task_100"
+    def test_manager_get_episode(self) -> None:
+        """Test getting episode from manager."""
+        manager = EpisodeManager()
+        manager.create_episode("ep_101", "task_101")
+        episode = manager.get_episode("ep_101")
+        assert episode is not None
+        assert episode.episode_id == "ep_101"
+    def test_manager_get_nonexistent(self) -> None:
+        """Test getting non-existent episode."""
+        manager = EpisodeManager()
+        episode = manager.get_episode("nonexistent")
+        assert episode is None
+    def test_manager_remove_episode(self) -> None:
+        """Test removing episode from manager."""
+        manager = EpisodeManager()
+        manager.create_episode("ep_102", "task_102")
+        removed = manager.remove_episode("ep_102")
+        assert removed is True
+        episode = manager.get_episode("ep_102")
+        assert episode is None
+    def test_manager_list_episodes(self) -> None:
+        """Test listing episodes."""
+        manager = EpisodeManager()
+        manager.create_episode("ep_103", "task_103")
+        manager.create_episode("ep_104", "task_104")
+        manager.create_episode("ep_105", "task_105")
+        episodes = manager.list_episodes()
+        assert len(episodes) == 3
+    def test_manager_list_episodes_by_status(self) -> None:
+        """Test listing episodes by status."""
+        manager = EpisodeManager()
+        ep1 = manager.create_episode("ep_106", "task_106")
+        ep2 = manager.create_episode("ep_107", "task_107")
+        ep3 = manager.create_episode("ep_108", "task_108")
+        ep1.start()
+        ep2.start()
+        ep2.complete(success=True)
+        running = manager.list_episodes(status=EpisodeStatus.RUNNING)
+        assert len(running) == 1
+        assert running[0].episode_id == "ep_106"
+        completed = manager.list_episodes(status=EpisodeStatus.COMPLETED)
+        assert len(completed) == 1
+        assert completed[0].episode_id == "ep_107"
+    def test_manager_list_episodes_by_task(self) -> None:
+        """Test listing episodes by task ID."""
+        manager = EpisodeManager()
+        manager.create_episode("ep_109", "task_A")
+        manager.create_episode("ep_110", "task_A")
+        manager.create_episode("ep_111", "task_B")
+        task_a_episodes = manager.list_episodes(task_id="task_A")
+        assert len(task_a_episodes) == 2
+        task_b_episodes = manager.list_episodes(task_id="task_B")
+        assert len(task_b_episodes) == 1

backend/tests/test_models/test_base_simple.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""Simple tests to verify the base model structures."""
+import pytest
+from app.models.providers.base import (
+    TokenUsage,
+    CompletionResponse,
+    ModelInfo,
+    ProviderError
+)
+def test_token_usage_creation():
+    """Test TokenUsage creation and addition."""
+    usage1 = TokenUsage(prompt_tokens=10, completion_tokens=20, total_tokens=30)
+    usage2 = TokenUsage(prompt_tokens=5, completion_tokens=10, total_tokens=15)
+    combined = usage1 + usage2
+    assert combined.prompt_tokens == 15
+    assert combined.completion_tokens == 30
+    assert combined.total_tokens == 45
+def test_completion_response_creation():
+    """Test CompletionResponse creation."""
+    usage = TokenUsage(prompt_tokens=10, completion_tokens=20, total_tokens=30)
+    response = CompletionResponse(
+        content="Hello world",
+        model="test-model",
+        provider="test-provider",
+        usage=usage,
+        finish_reason="stop",
+        cost=0.001
+    )
+    assert response.content == "Hello world"
+    assert response.model == "test-model"
+    assert response.provider == "test-provider"
+    assert response.usage.total_tokens == 30
+    assert response.cost == 0.001
+def test_model_info_creation():
+    """Test ModelInfo creation."""
+    info = ModelInfo(
+        id="test-model",
+        name="Test Model",
+        provider="test",
+        context_window=4096,
+        max_output_tokens=1000,
+        cost_per_1k_input=0.001,
+        cost_per_1k_output=0.002
+    )
+    assert info.id == "test-model"
+    assert info.context_window == 4096
+    assert info.cost_per_million_input == 1.0
+    assert info.cost_per_million_output == 2.0
+def test_provider_error():
+    """Test ProviderError creation."""
+    error = ProviderError("Test error", "test-provider", 500)
+    assert error.message == "Test error"
+    assert error.provider == "test-provider"
+    assert error.status_code == 500
+    assert str(error) == "[test-provider] Test error"