| | import os |
| | import sys |
| | import pytest |
| | import asyncio |
| | import json |
| |
|
| | |
| | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| | sys.path.append(parent_dir) |
| |
|
| | from crawl4ai.async_webcrawler import AsyncWebCrawler |
| |
|
| | @pytest.mark.asyncio |
| | async def test_cache_url(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.example.com" |
| | |
| | result1 = await crawler.arun(url=url, bypass_cache=True) |
| | assert result1.success |
| |
|
| | |
| | result2 = await crawler.arun(url=url, bypass_cache=False) |
| | assert result2.success |
| | assert result2.html == result1.html |
| |
|
| | @pytest.mark.asyncio |
| | async def test_bypass_cache(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.python.org" |
| | |
| | result1 = await crawler.arun(url=url, bypass_cache=True) |
| | assert result1.success |
| |
|
| | |
| | result2 = await crawler.arun(url=url, bypass_cache=True) |
| | assert result2.success |
| | assert result2.html != result1.html |
| |
|
| | @pytest.mark.asyncio |
| | async def test_cache_size(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | initial_size = await crawler.aget_cache_size() |
| | |
| | url = "https://www.nbcnews.com/business" |
| | await crawler.arun(url=url, bypass_cache=True) |
| | |
| | new_size = await crawler.aget_cache_size() |
| | assert new_size == initial_size + 1 |
| |
|
| | @pytest.mark.asyncio |
| | async def test_clear_cache(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.example.org" |
| | await crawler.arun(url=url, bypass_cache=True) |
| | |
| | initial_size = await crawler.aget_cache_size() |
| | assert initial_size > 0 |
| |
|
| | await crawler.aclear_cache() |
| | new_size = await crawler.aget_cache_size() |
| | assert new_size == 0 |
| |
|
| | @pytest.mark.asyncio |
| | async def test_flush_cache(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.example.net" |
| | await crawler.arun(url=url, bypass_cache=True) |
| | |
| | initial_size = await crawler.aget_cache_size() |
| | assert initial_size > 0 |
| |
|
| | await crawler.aflush_cache() |
| | new_size = await crawler.aget_cache_size() |
| | assert new_size == 0 |
| |
|
| | |
| | result = await crawler.arun(url=url, bypass_cache=False) |
| | assert result.success |
| |
|
| | |
| | if __name__ == "__main__": |
| | pytest.main([__file__, "-v"]) |