mirror of
https://github.com/unclecode/crawl4ai.git
synced 2025-09-27 11:48:51 +00:00
124 lines
4.3 KiB
Python
124 lines
4.3 KiB
Python
![]() |
import os
|
||
|
import sys
|
||
|
import pytest
|
||
|
import asyncio
|
||
|
import base64
|
||
|
from PIL import Image
|
||
|
import io
|
||
|
|
||
|
# Add the parent directory to the Python path
|
||
|
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
|
sys.path.append(parent_dir)
|
||
|
|
||
|
from crawl4ai.async_webcrawler import AsyncWebCrawler
|
||
|
|
||
|
@pytest.mark.asyncio
|
||
|
async def test_basic_screenshot():
|
||
|
async with AsyncWebCrawler(verbose=True) as crawler:
|
||
|
url = "https://example.com" # A static website
|
||
|
result = await crawler.arun(url=url, bypass_cache=True, screenshot=True)
|
||
|
|
||
|
assert result.success
|
||
|
assert result.screenshot is not None
|
||
|
|
||
|
# Verify the screenshot is a valid image
|
||
|
image_data = base64.b64decode(result.screenshot)
|
||
|
image = Image.open(io.BytesIO(image_data))
|
||
|
assert image.format == "PNG"
|
||
|
|
||
|
@pytest.mark.asyncio
|
||
|
async def test_screenshot_with_wait_for():
|
||
|
async with AsyncWebCrawler(verbose=True) as crawler:
|
||
|
# Using a website with dynamic content
|
||
|
url = "https://www.youtube.com"
|
||
|
wait_for = "css:#content" # Wait for the main content to load
|
||
|
|
||
|
result = await crawler.arun(
|
||
|
url=url,
|
||
|
bypass_cache=True,
|
||
|
screenshot=True,
|
||
|
wait_for=wait_for
|
||
|
)
|
||
|
|
||
|
assert result.success
|
||
|
assert result.screenshot is not None
|
||
|
|
||
|
# Verify the screenshot is a valid image
|
||
|
image_data = base64.b64decode(result.screenshot)
|
||
|
image = Image.open(io.BytesIO(image_data))
|
||
|
assert image.format == "PNG"
|
||
|
|
||
|
# You might want to add more specific checks here, like image dimensions
|
||
|
# or even use image recognition to verify certain elements are present
|
||
|
|
||
|
@pytest.mark.asyncio
|
||
|
async def test_screenshot_with_js_wait_for():
|
||
|
async with AsyncWebCrawler(verbose=True) as crawler:
|
||
|
url = "https://www.amazon.com"
|
||
|
wait_for = "js:() => document.querySelector('#nav-logo-sprites') !== null"
|
||
|
|
||
|
result = await crawler.arun(
|
||
|
url=url,
|
||
|
bypass_cache=True,
|
||
|
screenshot=True,
|
||
|
wait_for=wait_for
|
||
|
)
|
||
|
|
||
|
assert result.success
|
||
|
assert result.screenshot is not None
|
||
|
|
||
|
image_data = base64.b64decode(result.screenshot)
|
||
|
image = Image.open(io.BytesIO(image_data))
|
||
|
assert image.format == "PNG"
|
||
|
|
||
|
@pytest.mark.asyncio
|
||
|
async def test_screenshot_without_wait_for():
|
||
|
async with AsyncWebCrawler(verbose=True) as crawler:
|
||
|
url = "https://www.nytimes.com" # A website with lots of dynamic content
|
||
|
|
||
|
result = await crawler.arun(url=url, bypass_cache=True, screenshot=True)
|
||
|
|
||
|
assert result.success
|
||
|
assert result.screenshot is not None
|
||
|
|
||
|
image_data = base64.b64decode(result.screenshot)
|
||
|
image = Image.open(io.BytesIO(image_data))
|
||
|
assert image.format == "PNG"
|
||
|
|
||
|
@pytest.mark.asyncio
|
||
|
async def test_screenshot_comparison():
|
||
|
async with AsyncWebCrawler(verbose=True) as crawler:
|
||
|
url = "https://www.reddit.com"
|
||
|
wait_for = "css:#SHORTCUT_FOCUSABLE_DIV"
|
||
|
|
||
|
# Take screenshot without wait_for
|
||
|
result_without_wait = await crawler.arun(
|
||
|
url=url,
|
||
|
bypass_cache=True,
|
||
|
screenshot=True
|
||
|
)
|
||
|
|
||
|
# Take screenshot with wait_for
|
||
|
result_with_wait = await crawler.arun(
|
||
|
url=url,
|
||
|
bypass_cache=True,
|
||
|
screenshot=True,
|
||
|
wait_for=wait_for
|
||
|
)
|
||
|
|
||
|
assert result_without_wait.success and result_with_wait.success
|
||
|
assert result_without_wait.screenshot is not None
|
||
|
assert result_with_wait.screenshot is not None
|
||
|
|
||
|
# Compare the two screenshots
|
||
|
image_without_wait = Image.open(io.BytesIO(base64.b64decode(result_without_wait.screenshot)))
|
||
|
image_with_wait = Image.open(io.BytesIO(base64.b64decode(result_with_wait.screenshot)))
|
||
|
|
||
|
# This is a simple size comparison. In a real-world scenario, you might want to use
|
||
|
# more sophisticated image comparison techniques.
|
||
|
assert image_with_wait.size[0] >= image_without_wait.size[0]
|
||
|
assert image_with_wait.size[1] >= image_without_wait.size[1]
|
||
|
|
||
|
# Entry point for debugging
|
||
|
if __name__ == "__main__":
|
||
|
pytest.main([__file__, "-v"])
|