2024-09-29 10:51:46 +08:00
# base stage
2024-11-19 18:25:04 +08:00
FROM ubuntu:22.04 AS base
2024-09-29 10:51:46 +08:00
USER root
2024-11-22 20:24:32 +08:00
SHELL [ "/bin/bash" , "-c" ]
2024-09-29 10:51:46 +08:00
2024-12-06 20:47:22 +08:00
ARG NEED_MIRROR = 0
2024-12-06 14:05:30 +08:00
ARG LIGHTEN = 0
ENV LIGHTEN = ${ LIGHTEN }
2024-04-16 12:29:58 +08:00
WORKDIR /ragflow
2024-12-06 14:05:30 +08:00
# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /huggingface.co,target= /huggingface.co \
2024-12-18 14:19:43 +08:00
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
2024-12-06 14:05:30 +08:00
tar --exclude= '.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
2024-12-18 14:19:43 +08:00
| tar -xf - --strip-components= 3 -C /ragflow/rag/res/deepdoc
2024-12-06 14:05:30 +08:00
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /huggingface.co,target= /huggingface.co \
2024-12-06 20:47:22 +08:00
if [ " $LIGHTEN " != "1" ] ; then \
2024-12-06 14:05:30 +08:00
( tar -cf - \
/huggingface.co/BAAI/bge-large-zh-v1.5 \
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
| tar -xf - --strip-components= 2 -C /root/.ragflow) \
fi
2024-09-29 10:51:46 +08:00
2024-12-06 14:05:30 +08:00
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /,target= /deps \
cp -r /deps/nltk_data /root/ && \
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
ENV TIKA_SERVER_JAR = "file:///ragflow/tika-server-standard-3.0.0.jar"
2024-12-10 16:32:58 +08:00
ENV DEBIAN_FRONTEND = noninteractive
2024-12-06 14:05:30 +08:00
# Setup apt
2024-12-10 16:32:58 +08:00
# Python package and implicit dependencies:
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
2024-12-06 14:05:30 +08:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
2024-12-06 20:47:22 +08:00
if [ " $NEED_MIRROR " = = "1" ] ; then \
2025-04-07 11:58:46 +08:00
sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
2024-12-06 20:47:22 +08:00
fi ; \
2024-12-06 14:05:30 +08:00
rm -f /etc/apt/apt.conf.d/docker-clean && \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
2024-12-09 14:20:18 +08:00
chmod 1777 /tmp && \
2024-12-06 20:47:22 +08:00
apt update && \
apt --no-install-recommends install -y ca-certificates && \
apt update && \
2024-12-10 16:32:58 +08:00
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
apt install -y pkg-config libicu-dev libgdiplus && \
apt install -y default-jdk && \
apt install -y libatk-bridge2.0-0 && \
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
2025-03-04 12:49:39 +08:00
apt install -y libjemalloc-dev && \
2025-05-19 14:54:06 +08:00
apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
apt install -y ghostscript
2024-12-06 20:47:22 +08:00
RUN if [ " $NEED_MIRROR " = = "1" ] ; then \
2025-02-24 20:15:40 +08:00
pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
pip3 config set global.trusted-host mirrors.aliyun.com; \
2025-01-14 11:49:43 +08:00
mkdir -p /etc/uv && \
echo "[[index]]" > /etc/uv/uv.toml && \
2025-02-24 20:15:40 +08:00
echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
2025-01-14 11:49:43 +08:00
echo "default = true" >> /etc/uv/uv.toml; \
2024-12-06 20:47:22 +08:00
fi ; \
2025-01-14 11:49:43 +08:00
pipx install uv
2024-09-29 18:24:24 +08:00
ENV PYTHONDONTWRITEBYTECODE = 1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT = 1
2024-11-02 22:21:17 +08:00
ENV PATH = /root/.local/bin:$PATH
2024-09-29 10:51:46 +08:00
2024-11-19 18:25:04 +08:00
# nodejs 12.22 on Ubuntu 22.04 is too old
2024-12-06 14:05:30 +08:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
2024-11-19 18:25:04 +08:00
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
2025-01-28 09:51:16 +01:00
apt purge -y nodejs npm cargo && \
apt autoremove -y && \
2024-11-19 18:25:04 +08:00
apt update && \
2025-01-28 09:51:16 +01:00
apt install -y nodejs
2025-01-14 11:49:43 +08:00
2025-01-28 09:51:16 +01:00
# A modern version of cargo is needed for the latest version of the Rust compiler.
RUN apt update && apt install -y curl build-essential \
&& if [ " $NEED_MIRROR " = = "1" ] ; then \
# Use TUNA mirrors for rustup/rust dist files
export RUSTUP_DIST_SERVER = "https://mirrors.tuna.tsinghua.edu.cn/rustup" ; \
export RUSTUP_UPDATE_ROOT = "https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup" ; \
echo "Using TUNA mirrors for Rustup." ; \
fi ; \
# Force curl to use HTTP/1.1
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
ENV PATH = " /root/.cargo/bin: ${ PATH } "
RUN cargo --version && rustc --version
2024-12-17 17:44:51 +08:00
# Add msssql ODBC driver
# macOS ARM64 environment, install msodbcsql18.
# general x86_64 environment, install msodbcsql17.
2024-12-12 18:26:44 +07:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
2024-12-17 17:44:51 +08:00
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
2024-12-12 18:26:44 +07:00
apt update && \
2025-01-28 09:51:16 +01:00
arch = " $( uname -m) " ; \
if [ " $arch " = "arm64" ] || [ " $arch " = "aarch64" ] ; then \
# ARM64 (macOS/Apple Silicon or Linux aarch64)
2024-12-17 17:44:51 +08:00
ACCEPT_EULA = Y apt install -y unixodbc-dev msodbcsql18; \
else \
2025-01-28 09:51:16 +01:00
# x86_64 or others
2024-12-17 17:44:51 +08:00
ACCEPT_EULA = Y apt install -y unixodbc-dev msodbcsql17; \
fi || \
{ echo "Failed to install ODBC driver" ; exit 1; }
2024-12-12 18:26:44 +07:00
2024-11-19 18:25:04 +08:00
2024-12-06 14:05:30 +08:00
# Add dependencies of selenium
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /chrome-linux64-121-0-6167-85,target= /chrome-linux64.zip \
unzip /chrome-linux64.zip && \
mv chrome-linux64 /opt/chrome && \
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /chromedriver-linux64-121-0-6167-85,target= /chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /,target= /deps \
if [ " $( uname -m) " = "x86_64" ] ; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ " $( uname -m) " = "aarch64" ] ; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi
2024-09-29 10:51:46 +08:00
# builder stage
FROM base AS builder
USER root
WORKDIR /ragflow
2025-01-14 11:49:43 +08:00
# install dependencies from uv.lock file
COPY pyproject.toml uv.lock ./
2024-11-29 13:37:50 +08:00
2025-01-17 12:01:04 +08:00
# https://github.com/astral-sh/uv/issues/10462
# uv records index url into uv.lock but doesn't failover among multiple indexes
2025-01-14 11:49:43 +08:00
RUN --mount= type = cache,id= ragflow_uv,target= /root/.cache/uv,sharing= locked \
2025-01-17 12:01:04 +08:00
if [ " $NEED_MIRROR " = = "1" ] ; then \
2025-02-24 20:15:40 +08:00
sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
2025-01-17 12:01:04 +08:00
else \
2025-02-24 20:15:40 +08:00
sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
2025-01-17 12:01:04 +08:00
fi ; \
2024-11-29 13:37:50 +08:00
if [ " $LIGHTEN " = = "1" ] ; then \
2025-01-14 11:49:43 +08:00
uv sync --python 3.10 --frozen; \
2024-11-29 13:37:50 +08:00
else \
2025-01-14 11:49:43 +08:00
uv sync --python 3.10 --frozen --all-extras; \
2024-11-29 13:37:50 +08:00
fi
COPY web web
COPY docs docs
2024-12-06 14:05:30 +08:00
RUN --mount= type = cache,id= ragflow_npm,target= /root/.npm,sharing= locked \
2024-12-30 18:19:58 +08:00
cd web && npm install && npm run build
2024-11-29 13:37:50 +08:00
2024-11-22 20:24:32 +08:00
COPY .git /ragflow/.git
2024-12-07 16:56:34 +08:00
RUN version_info = $( git describe --tags --match= v* --first-parent --always) ; \
2024-11-22 20:24:32 +08:00
if [ " $LIGHTEN " = = "1" ] ; then \
version_info = " $version_info slim " ; \
else \
version_info = " $version_info full " ; \
fi ; \
2024-11-25 14:09:42 +08:00
echo " RAGFlow version: $version_info " ; \
2024-11-22 20:24:32 +08:00
echo $version_info > /ragflow/VERSION
2024-09-29 10:51:46 +08:00
# production stage
FROM base AS production
USER root
WORKDIR /ragflow
2024-11-29 13:37:50 +08:00
# Copy Python environment and packages
ENV VIRTUAL_ENV = /ragflow/.venv
COPY --from= builder ${ VIRTUAL_ENV } ${ VIRTUAL_ENV }
ENV PATH = " ${ VIRTUAL_ENV } /bin: ${ PATH } "
2024-11-22 20:24:32 +08:00
2024-04-16 12:29:58 +08:00
ENV PYTHONPATH = /ragflow/
2024-11-29 13:37:50 +08:00
COPY web web
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
2025-02-26 15:40:52 +08:00
COPY agentic_reasoning agentic_reasoning
2025-01-14 11:49:43 +08:00
COPY pyproject.toml uv.lock ./
2025-04-21 09:43:20 +08:00
COPY mcp mcp
2025-05-16 16:32:19 +08:00
COPY plugin plugin
2024-11-29 13:37:50 +08:00
2024-11-12 15:56:53 +01:00
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
2025-03-28 01:39:34 -03:00
COPY docker/entrypoint.sh ./
2025-01-20 22:49:46 +08:00
RUN chmod +x ./entrypoint*.sh
2024-04-16 12:29:58 +08:00
2024-11-29 13:37:50 +08:00
# Copy compiled web pages
COPY --from= builder /ragflow/web/dist /ragflow/web/dist
COPY --from= builder /ragflow/VERSION /ragflow/VERSION
2024-09-29 10:51:46 +08:00
ENTRYPOINT [ "./entrypoint.sh" ]