crawl4ai/deploy/docker/server.py

import os
import sys
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
import json
import asyncio
from typing import AsyncGenerator
from crawl4ai import (
    BrowserConfig,
    CrawlerRunConfig,
    AsyncWebCrawler,
    MemoryAdaptiveDispatcher,
    RateLimiter,
)

from typing import List, Optional
from pydantic import BaseModel

class CrawlRequest(BaseModel):
    urls: List[str]
    browser_config: Optional[dict] = None
    crawler_config: Optional[dict] = None

class CrawlResponse(BaseModel):
    success: bool
    results: List[dict]  

    class Config:
        arbitrary_types_allowed = True

app = FastAPI(title="Crawl4AI API")

async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:
    """Stream results and manage crawler lifecycle"""
    def datetime_handler(obj):
        """Custom handler for datetime objects during JSON serialization"""
        if hasattr(obj, 'isoformat'):
            return obj.isoformat()
        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

    try:
        async for result in results_gen:
            try:
                # Use dump method for serialization
                result_dict = result.model_dump()
                print(f"Streaming result for URL: {result_dict['url']}, Success: {result_dict['success']}")
                # Use custom JSON encoder with datetime handler
                yield (json.dumps(result_dict, default=datetime_handler) + "\n").encode('utf-8')
            except Exception as e:
                print(f"Error serializing result: {e}")
                error_response = {
                    "error": str(e),
                    "url": getattr(result, 'url', 'unknown')
                }
                yield (json.dumps(error_response, default=datetime_handler) + "\n").encode('utf-8')
    except asyncio.CancelledError:
        print("Client disconnected, cleaning up...")
    finally:
        try:
            await crawler.close()
        except Exception as e:
            print(f"Error closing crawler: {e}")

@app.post("/crawl")
async def crawl(request: CrawlRequest):
    # Load configs using our new utilities
    browser_config = BrowserConfig.load(request.browser_config)
    crawler_config = CrawlerRunConfig.load(request.crawler_config)

    dispatcher = MemoryAdaptiveDispatcher(
        memory_threshold_percent=95.0,
        rate_limiter=RateLimiter(base_delay=(1.0, 2.0)),
    )

    try:
        if crawler_config.stream:
            crawler = AsyncWebCrawler(config=browser_config)
            await crawler.start()

            results_gen = await crawler.arun_many(
                urls=request.urls,
                config=crawler_config,
                dispatcher=dispatcher
            )

            return StreamingResponse(
                stream_results(crawler, results_gen),
                media_type='application/x-ndjson'
            )
        else:
            async with AsyncWebCrawler(config=browser_config) as crawler:
                results = await crawler.arun_many(
                    urls=request.urls,
                    config=crawler_config,
                    dispatcher=dispatcher
                )
                # Use dump method for each result
                results_dict = [result.model_dump() for result in results]
                return CrawlResponse(success=True, results=results_dict)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/schema")
async def get_schema():
    """Return config schemas for client validation"""
    return {
        "browser": BrowserConfig.model_json_schema(),
        "crawler": CrawlerRunConfig.model_json_schema()
    }

@app.get("/health")
async def health():
    return {"status": "ok"}


if __name__ == "__main__":
    import uvicorn
    uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)
refactor(docker): clean up import statements in server.py 2025-02-01 14:28:28 +08:00			`import os`
			`import sys`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`sys.path.append(os.path.dirname(os.path.realpath(__file__)))`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`from fastapi import FastAPI, HTTPException`
			`from fastapi.responses import StreamingResponse`
			`import json`
			`import asyncio`
			`from typing import AsyncGenerator`
			`from crawl4ai import (`
			`BrowserConfig,`
			`CrawlerRunConfig,`
			`AsyncWebCrawler,`
			`MemoryAdaptiveDispatcher,`
			`RateLimiter,`
			`)`
refactor(docker): remove unused models and utilities for cleaner codebase 2025-02-01 20:10:13 +08:00
			`from typing import List, Optional`
			`from pydantic import BaseModel`

			`class CrawlRequest(BaseModel):`
			`urls: List[str]`
			`browser_config: Optional[dict] = None`
			`crawler_config: Optional[dict] = None`

			`class CrawlResponse(BaseModel):`
			`success: bool`
			`results: List[dict]`

			`class Config:`
			`arbitrary_types_allowed = True`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00
			`app = FastAPI(title="Crawl4AI API")`

			`async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:`
			`"""Stream results and manage crawler lifecycle"""`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`def datetime_handler(obj):`
			`"""Custom handler for datetime objects during JSON serialization"""`
			`if hasattr(obj, 'isoformat'):`
			`return obj.isoformat()`
			`raise TypeError(f"Object of type {type(obj)} is not JSON serializable")`

feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`try:`
			`async for result in results_gen:`
			`try:`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`# Use dump method for serialization`
			`result_dict = result.model_dump()`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`print(f"Streaming result for URL: {result_dict['url']}, Success: {result_dict['success']}")`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`# Use custom JSON encoder with datetime handler`
			`yield (json.dumps(result_dict, default=datetime_handler) + "\n").encode('utf-8')`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`except Exception as e:`
			`print(f"Error serializing result: {e}")`
			`error_response = {`
			`"error": str(e),`
			`"url": getattr(result, 'url', 'unknown')`
			`}`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`yield (json.dumps(error_response, default=datetime_handler) + "\n").encode('utf-8')`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`except asyncio.CancelledError:`
			`print("Client disconnected, cleaning up...")`
			`finally:`
			`try:`
			`await crawler.close()`
			`except Exception as e:`
			`print(f"Error closing crawler: {e}")`

			`@app.post("/crawl")`
			`async def crawl(request: CrawlRequest):`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`# Load configs using our new utilities`
			`browser_config = BrowserConfig.load(request.browser_config)`
			`crawler_config = CrawlerRunConfig.load(request.crawler_config)`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00
			`dispatcher = MemoryAdaptiveDispatcher(`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`memory_threshold_percent=95.0,`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`rate_limiter=RateLimiter(base_delay=(1.0, 2.0)),`
			`)`

			`try:`
			`if crawler_config.stream:`
			`crawler = AsyncWebCrawler(config=browser_config)`
			`await crawler.start()`

			`results_gen = await crawler.arun_many(`
			`urls=request.urls,`
			`config=crawler_config,`
			`dispatcher=dispatcher`
			`)`

			`return StreamingResponse(`
			`stream_results(crawler, results_gen),`
			`media_type='application/x-ndjson'`
			`)`
			`else:`
			`async with AsyncWebCrawler(config=browser_config) as crawler:`
			`results = await crawler.arun_many(`
			`urls=request.urls,`
			`config=crawler_config,`
			`dispatcher=dispatcher`
			`)`
feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`# Use dump method for each result`
			`results_dict = [result.model_dump() for result in results]`
feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00			`return CrawlResponse(success=True, results=results_dict)`
			`except Exception as e:`
			`raise HTTPException(status_code=500, detail=str(e))`

			`@app.get("/schema")`
			`async def get_schema():`
			`"""Return config schemas for client validation"""`
			`return {`
			`"browser": BrowserConfig.model_json_schema(),`
			`"crawler": CrawlerRunConfig.model_json_schema()`
			`}`

feat(docker): add Docker service integration and config serialization Add Docker service integration with FastAPI server and client implementation. Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support Docker service communication. Clean up imports and improve error handling. - Add Crawl4aiDockerClient class - Implement config serialization/deserialization - Add FastAPI server with streaming support - Add health check endpoint - Clean up imports and type hints 2025-01-31 18:00:16 +08:00			`@app.get("/health")`
			`async def health():`
			`return {"status": "ok"}`


feat(docker): add Docker deployment configuration and API server Add Docker deployment setup with FastAPI server implementation for Crawl4AI: - Create Dockerfile with Python 3.10 and Playwright dependencies - Implement FastAPI server with streaming and non-streaming endpoints - Add request/response models and JSON serialization - Include test script for API verification Also includes: - Update .gitignore for Continue development files - Add project rules in .continuerules - Clean up async_dispatcher.py formatting 2025-01-31 15:22:21 +08:00
			`if __name__ == "__main__":`
			`import uvicorn`
			`uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)`