diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ec7963..309218d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,64 @@ # Changelog +## [0.3.746] November 29, 2024 + +### Major Features +1. Enhanced Docker Support (Nov 29, 2024) + - Improved GPU support in Docker images. + - Dockerfile refactored for better platform-specific installations. + - Introduced new Docker commands for different platforms: + - `basic-amd64`, `all-amd64`, `gpu-amd64` for AMD64. + - `basic-arm64`, `all-arm64`, `gpu-arm64` for ARM64. + +### Infrastructure & Documentation +- Enhanced README.md to improve user guidance and installation instructions. +- Added installation instructions for Playwright setup in README. +- Created and updated examples in `docs/examples/quickstart_async.py` to be more useful and user-friendly. +- Updated `requirements.txt` with a new `pydantic` dependency. +- Bumped version number in `crawl4ai/__version__.py` to 0.3.746. + +### Breaking Changes +- Streamlined application structure: + - Removed static pages and related code from `main.py` which might affect existing deployments relying on static content. + +### Development Updates +- Developed `post_install` method in `crawl4ai/install.py` to streamline post-installation setup tasks. +- Refined migration processes in `crawl4ai/migrations.py` with enhanced logging for better error visibility. +- Updated `docker-compose.yml` to support local and hub services for different architectures, enhancing build and deploy capabilities. +- Refactored example test cases in `docs/examples/docker_example.py` to facilitate comprehensive testing. + +### README.md +Updated README with new docker commands and setup instructions. +Enhanced installation instructions and guidance. + +### crawl4ai/install.py +Added post-install script functionality. +Introduced `post_install` method for automation of post-installation tasks. + +### crawl4ai/migrations.py +Improved migration logging. +Refined migration processes and added better logging. + +### docker-compose.yml +Refactored docker-compose for better service management. +Updated to define services for different platforms and versions. + +### requirements.txt +Updated dependencies. +Added `pydantic` to requirements file. + +### crawler/__version__.py +Updated version number. +Bumped version number to 0.3.746. + +### docs/examples/quickstart_async.py +Enhanced example scripts. +Uncommented example usage in async guide for user functionality. + +### main.py +Refactored code to improve maintainability. +Streamlined app structure by removing static pages code. + ## [0.3.743] November 27, 2024 Enhance features and documentation diff --git a/README.md b/README.md index bbfa585..3d89ee1 100644 --- a/README.md +++ b/README.md @@ -220,48 +220,173 @@ Crawl4AI is available as Docker images for easy deployment. You can either pull --- -### Option 1: Docker Hub (Recommended) +
+🐳 Option 1: Docker Hub (Recommended) +Choose the appropriate image based on your platform and needs: + +### For AMD64 (Regular Linux/Windows): ```bash -# Pull and run from Docker Hub (choose one): -docker pull unclecode/crawl4ai:basic # Basic crawling features -docker pull unclecode/crawl4ai:all # Full installation (ML, LLM support) -docker pull unclecode/crawl4ai:gpu # GPU-enabled version +# Basic version (recommended) +docker pull unclecode/crawl4ai:basic-amd64 +docker run -p 11235:11235 unclecode/crawl4ai:basic-amd64 -# Run the container -docker run -p 11235:11235 unclecode/crawl4ai:basic # Replace 'basic' with your chosen version +# Full ML/LLM support +docker pull unclecode/crawl4ai:all-amd64 +docker run -p 11235:11235 unclecode/crawl4ai:all-amd64 -# In case you want to set platform to arm64 -docker run --platform linux/arm64 -p 11235:11235 unclecode/crawl4ai:basic - -# In case to allocate more shared memory for the container -docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic +# With GPU support +docker pull unclecode/crawl4ai:gpu-amd64 +docker run -p 11235:11235 unclecode/crawl4ai:gpu-amd64 ``` ---- +### For ARM64 (M1/M2 Macs, ARM servers): +```bash +# Basic version (recommended) +docker pull unclecode/crawl4ai:basic-arm64 +docker run -p 11235:11235 unclecode/crawl4ai:basic-arm64 -### Option 2: Build from Repository +# Full ML/LLM support +docker pull unclecode/crawl4ai:all-arm64 +docker run -p 11235:11235 unclecode/crawl4ai:all-arm64 + +# With GPU support +docker pull unclecode/crawl4ai:gpu-arm64 +docker run -p 11235:11235 unclecode/crawl4ai:gpu-arm64 +``` + +Need more memory? Add `--shm-size`: +```bash +docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic-amd64 +``` + +Test the installation: +```bash +curl http://localhost:11235/health +``` + +### For Raspberry Pi (32-bit) (Experimental) +```bash +# Pull and run basic version (recommended for Raspberry Pi) +docker pull unclecode/crawl4ai:basic-armv7 +docker run -p 11235:11235 unclecode/crawl4ai:basic-armv7 + +# With increased shared memory if needed +docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic-armv7 +``` + +Note: Due to hardware constraints, only the basic version is recommended for Raspberry Pi. + +
+ +
+🐳 Option 2: Build from Repository + +Build the image locally based on your platform: ```bash # Clone the repository git clone https://github.com/unclecode/crawl4ai.git cd crawl4ai -# Build the image -docker build -t crawl4ai:local \ - --build-arg INSTALL_TYPE=basic \ # Options: basic, all +# For AMD64 (Regular Linux/Windows) +docker build --platform linux/amd64 \ + --tag crawl4ai:local \ + --build-arg INSTALL_TYPE=basic \ . -# In case you want to set platform to arm64 -docker build -t crawl4ai:local \ - --build-arg INSTALL_TYPE=basic \ # Options: basic, all - --platform linux/arm64 \ +# For ARM64 (M1/M2 Macs, ARM servers) +docker build --platform linux/arm64 \ + --tag crawl4ai:local \ + --build-arg INSTALL_TYPE=basic \ . - -# Run your local build -docker run -p 11235:11235 crawl4ai:local ``` +Build options: +- INSTALL_TYPE=basic (default): Basic crawling features +- INSTALL_TYPE=all: Full ML/LLM support +- ENABLE_GPU=true: Add GPU support + +Example with all options: +```bash +docker build --platform linux/amd64 \ + --tag crawl4ai:local \ + --build-arg INSTALL_TYPE=all \ + --build-arg ENABLE_GPU=true \ + . +``` + +Run your local build: +```bash +# Regular run +docker run -p 11235:11235 crawl4ai:local + +# With increased shared memory +docker run --shm-size=2gb -p 11235:11235 crawl4ai:local +``` + +Test the installation: +```bash +curl http://localhost:11235/health +``` + +
+ +
+🐳 Option 3: Using Docker Compose + +Docker Compose provides a more structured way to run Crawl4AI, especially when dealing with environment variables and multiple configurations. + +```bash +# Clone the repository +git clone https://github.com/unclecode/crawl4ai.git +cd crawl4ai +``` + +### For AMD64 (Regular Linux/Windows): +```bash +# Build and run locally +docker-compose --profile local-amd64 up + +# Run from Docker Hub +VERSION=basic docker-compose --profile hub-amd64 up # Basic version +VERSION=all docker-compose --profile hub-amd64 up # Full ML/LLM support +VERSION=gpu docker-compose --profile hub-amd64 up # GPU support +``` + +### For ARM64 (M1/M2 Macs, ARM servers): +```bash +# Build and run locally +docker-compose --profile local-arm64 up + +# Run from Docker Hub +VERSION=basic docker-compose --profile hub-arm64 up # Basic version +VERSION=all docker-compose --profile hub-arm64 up # Full ML/LLM support +VERSION=gpu docker-compose --profile hub-arm64 up # GPU support +``` + +Environment variables (optional): +```bash +# Create a .env file +CRAWL4AI_API_TOKEN=your_token +OPENAI_API_KEY=your_openai_key +CLAUDE_API_KEY=your_claude_key +``` + +The compose file includes: +- Memory management (4GB limit, 1GB reserved) +- Shared memory volume for browser support +- Health checks +- Auto-restart policy +- All necessary port mappings + +Test the installation: +```bash +curl http://localhost:11235/health +``` + +
+ --- ### Quick Test @@ -278,11 +403,11 @@ response = requests.post( ) task_id = response.json()["task_id"] -# Get results +# Continue polling until the task is complete (status="completed") result = requests.get(f"http://localhost:11235/task/{task_id}") ``` -For advanced configuration, environment variables, and usage examples, see our [Docker Deployment Guide](https://crawl4ai.com/mkdocs/basic/docker-deployment/). +For more examples, see our [Docker Examples](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_example.py). For advanced configuration, environment variables, and usage examples, see our [Docker Deployment Guide](https://crawl4ai.com/mkdocs/basic/docker-deployment/). diff --git a/docker-compose.yml b/docker-compose.yml index b93beda..4b22fd9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,6 @@ services: - crawl4ai: + # Local build services for different platforms + crawl4ai-amd64: build: context: . dockerfile: Dockerfile @@ -7,35 +8,39 @@ services: PYTHON_VERSION: "3.10" INSTALL_TYPE: ${INSTALL_TYPE:-basic} ENABLE_GPU: false - profiles: ["local"] - ports: - - "11235:11235" - - "8000:8000" - - "9222:9222" - - "8080:8080" - environment: - - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} - - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} - volumes: - - /dev/shm:/dev/shm - deploy: - resources: - limits: - memory: 4G - reservations: - memory: 1G - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:11235/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s + platforms: + - linux/amd64 + profiles: ["local-amd64"] + extends: &base-config + file: docker-compose.yml + service: base-config - crawl4ai-hub: - image: unclecode/crawl4ai:basic - profiles: ["hub"] + crawl4ai-arm64: + build: + context: . + dockerfile: Dockerfile + args: + PYTHON_VERSION: "3.10" + INSTALL_TYPE: ${INSTALL_TYPE:-basic} + ENABLE_GPU: false + platforms: + - linux/arm64 + profiles: ["local-arm64"] + extends: *base-config + + # Hub services for different platforms and versions + crawl4ai-hub-amd64: + image: unclecode/crawl4ai:${VERSION:-basic}-amd64 + profiles: ["hub-amd64"] + extends: *base-config + + crawl4ai-hub-arm64: + image: unclecode/crawl4ai:${VERSION:-basic}-arm64 + profiles: ["hub-arm64"] + extends: *base-config + + # Base configuration to be extended + base-config: ports: - "11235:11235" - "8000:8000" @@ -59,4 +64,4 @@ services: interval: 30s timeout: 10s retries: 3 - start_period: 40s + start_period: 40s \ No newline at end of file diff --git a/docs/examples/docker_example.py b/docs/examples/docker_example.py index 17ef9f0..48acc80 100644 --- a/docs/examples/docker_example.py +++ b/docs/examples/docker_example.py @@ -78,20 +78,20 @@ def test_docker_deployment(version="basic"): time.sleep(5) # Test cases based on version - # test_basic_crawl(tester) - # test_basic_crawl(tester) - # test_basic_crawl_sync(tester) test_basic_crawl_direct(tester) + test_basic_crawl(tester) + test_basic_crawl(tester) + test_basic_crawl_sync(tester) - # if version in ["full", "transformer"]: - # test_cosine_extraction(tester) + if version in ["full", "transformer"]: + test_cosine_extraction(tester) - # test_js_execution(tester) - # test_css_selector(tester) - # test_structured_extraction(tester) - # test_llm_extraction(tester) - # test_llm_with_ollama(tester) - # test_screenshot(tester) + test_js_execution(tester) + test_css_selector(tester) + test_structured_extraction(tester) + test_llm_extraction(tester) + test_llm_with_ollama(tester) + test_screenshot(tester) def test_basic_crawl(tester: Crawl4AiTester):