
- Update Docker base image to Python 3.12-slim-bookworm - Bump version from 0.6.0rc1 to 0.6.0 - Update documentation to reflect release version changes - Fix license specification in pyproject.toml and setup.py - Clean up code formatting in demo_docker_api.py BREAKING CHANGE: Base Python version upgraded from 3.10 to 3.12
200 lines
5.8 KiB
Docker
200 lines
5.8 KiB
Docker
FROM python:3.12-slim-bookworm AS build
|
|
|
|
# C4ai version
|
|
ARG C4AI_VER=0.6.0
|
|
ENV C4AI_VERSION=$C4AI_VER
|
|
LABEL c4ai.version=$C4AI_VER
|
|
|
|
# Set build arguments
|
|
ARG APP_HOME=/app
|
|
ARG GITHUB_REPO=https://github.com/unclecode/crawl4ai.git
|
|
ARG GITHUB_BRANCH=main
|
|
ARG USE_LOCAL=true
|
|
|
|
ENV PYTHONFAULTHANDLER=1 \
|
|
PYTHONHASHSEED=random \
|
|
PYTHONUNBUFFERED=1 \
|
|
PIP_NO_CACHE_DIR=1 \
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
PIP_DEFAULT_TIMEOUT=100 \
|
|
DEBIAN_FRONTEND=noninteractive \
|
|
REDIS_HOST=localhost \
|
|
REDIS_PORT=6379
|
|
|
|
ARG PYTHON_VERSION=3.12
|
|
ARG INSTALL_TYPE=default
|
|
ARG ENABLE_GPU=false
|
|
ARG TARGETARCH
|
|
|
|
LABEL maintainer="unclecode"
|
|
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
|
|
LABEL version="1.0"
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
curl \
|
|
wget \
|
|
gnupg \
|
|
git \
|
|
cmake \
|
|
pkg-config \
|
|
python3-dev \
|
|
libjpeg-dev \
|
|
redis-server \
|
|
supervisor \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
libglib2.0-0 \
|
|
libnss3 \
|
|
libnspr4 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libdrm2 \
|
|
libdbus-1-3 \
|
|
libxcb1 \
|
|
libxkbcommon0 \
|
|
libx11-6 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxext6 \
|
|
libxfixes3 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libpango-1.0-0 \
|
|
libcairo2 \
|
|
libasound2 \
|
|
libatspi2.0-0 \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN apt-get update && apt-get dist-upgrade -y \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
nvidia-cuda-toolkit \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/* ; \
|
|
else \
|
|
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
|
|
fi
|
|
|
|
RUN if [ "$TARGETARCH" = "arm64" ]; then \
|
|
echo "🦾 Installing ARM-specific optimizations"; \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
libopenblas-dev \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*; \
|
|
elif [ "$TARGETARCH" = "amd64" ]; then \
|
|
echo "🖥️ Installing AMD64-specific optimizations"; \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
libomp-dev \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*; \
|
|
else \
|
|
echo "Skipping platform-specific optimizations (unsupported platform)"; \
|
|
fi
|
|
|
|
# Create a non-root user and group
|
|
RUN groupadd -r appuser && useradd --no-log-init -r -g appuser appuser
|
|
|
|
# Create and set permissions for appuser home directory
|
|
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
|
|
|
|
WORKDIR ${APP_HOME}
|
|
|
|
RUN echo '#!/bin/bash\n\
|
|
if [ "$USE_LOCAL" = "true" ]; then\n\
|
|
echo "📦 Installing from local source..."\n\
|
|
pip install --no-cache-dir /tmp/project/\n\
|
|
else\n\
|
|
echo "🌐 Installing from GitHub..."\n\
|
|
for i in {1..3}; do \n\
|
|
git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai && break || \n\
|
|
{ echo "Attempt $i/3 failed! Taking a short break... ☕"; sleep 5; }; \n\
|
|
done\n\
|
|
pip install --no-cache-dir /tmp/crawl4ai\n\
|
|
fi' > /tmp/install.sh && chmod +x /tmp/install.sh
|
|
|
|
COPY . /tmp/project/
|
|
|
|
# Copy supervisor config first (might need root later, but okay for now)
|
|
COPY deploy/docker/supervisord.conf .
|
|
|
|
COPY deploy/docker/requirements.txt .
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
|
|
pip install --no-cache-dir \
|
|
torch \
|
|
torchvision \
|
|
torchaudio \
|
|
scikit-learn \
|
|
nltk \
|
|
transformers \
|
|
tokenizers && \
|
|
python -m nltk.downloader punkt stopwords ; \
|
|
fi
|
|
|
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
|
|
pip install "/tmp/project/[all]" && \
|
|
python -m crawl4ai.model_loader ; \
|
|
elif [ "$INSTALL_TYPE" = "torch" ] ; then \
|
|
pip install "/tmp/project/[torch]" ; \
|
|
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
|
|
pip install "/tmp/project/[transformer]" && \
|
|
python -m crawl4ai.model_loader ; \
|
|
else \
|
|
pip install "/tmp/project" ; \
|
|
fi
|
|
|
|
RUN pip install --no-cache-dir --upgrade pip && \
|
|
/tmp/install.sh && \
|
|
python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
|
|
python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
|
|
|
|
RUN crawl4ai-setup
|
|
|
|
RUN playwright install --with-deps
|
|
|
|
RUN mkdir -p /home/appuser/.cache/ms-playwright \
|
|
&& cp -r /root/.cache/ms-playwright/chromium-* /home/appuser/.cache/ms-playwright/ \
|
|
&& chown -R appuser:appuser /home/appuser/.cache/ms-playwright
|
|
|
|
RUN crawl4ai-doctor
|
|
|
|
# Copy application code
|
|
COPY deploy/docker/* ${APP_HOME}/
|
|
|
|
# copy the playground + any future static assets
|
|
COPY deploy/docker/static ${APP_HOME}/static
|
|
|
|
# Change ownership of the application directory to the non-root user
|
|
RUN chown -R appuser:appuser ${APP_HOME}
|
|
|
|
# give permissions to redis persistence dirs if used
|
|
RUN mkdir -p /var/lib/redis /var/log/redis && chown -R appuser:appuser /var/lib/redis /var/log/redis
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
CMD bash -c '\
|
|
MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
|
|
if [ $MEM -lt 2048 ]; then \
|
|
echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
|
|
exit 1; \
|
|
fi && \
|
|
redis-cli ping > /dev/null && \
|
|
curl -f http://localhost:11235/health || exit 1'
|
|
|
|
EXPOSE 6379
|
|
# Switch to the non-root user before starting the application
|
|
USER appuser
|
|
|
|
# Set environment variables to ptoduction
|
|
ENV PYTHON_ENV=production
|
|
|
|
# Start the application using supervisord
|
|
CMD ["supervisord", "-c", "supervisord.conf"] |