2024-09-29 10:51:46 +08:00
# base stage
2024-11-19 18:25:04 +08:00
FROM ubuntu:22.04 AS base
2024-09-29 10:51:46 +08:00
USER root
2024-11-22 20:24:32 +08:00
SHELL [ "/bin/bash" , "-c" ]
2024-09-29 10:51:46 +08:00
ENV LIGHTEN = 0
2024-04-16 12:29:58 +08:00
WORKDIR /ragflow
2024-10-01 17:41:38 +08:00
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
RUN --mount= type = cache,id= ragflow_base_apt,target= /var/cache/apt,sharing= locked \
apt update && apt-get --no-install-recommends install -y ca-certificates
2024-09-29 10:51:46 +08:00
2024-11-19 18:25:04 +08:00
# Setup apt mirror site
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
2024-09-29 10:51:46 +08:00
2024-10-01 17:41:38 +08:00
RUN --mount= type = cache,id= ragflow_base_apt,target= /var/cache/apt,sharing= locked \
2024-11-19 18:25:04 +08:00
apt update && DEBIAN_FRONTEND = noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
2024-11-22 20:24:32 +08:00
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
2024-11-19 18:25:04 +08:00
&& rm -rf /var/lib/apt/lists/*
2024-09-29 10:51:46 +08:00
2024-11-02 22:21:17 +08:00
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
&& pipx install poetry \
&& /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
2024-10-03 21:00:26 +08:00
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
2024-10-08 17:40:06 +08:00
RUN --mount= type = bind,source= libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target= /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
2024-11-22 20:24:32 +08:00
--mount= type = bind,source= libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target= /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
if [ " $( uname -m) " = "x86_64" ] ; then \
2024-10-08 17:40:06 +08:00
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
2024-11-22 20:24:32 +08:00
elif [ " $( uname -m) " = "aarch64" ] ; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
2024-10-03 21:00:26 +08:00
fi
2024-09-29 18:24:24 +08:00
ENV PYTHONDONTWRITEBYTECODE = 1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT = 1
2024-11-02 22:21:17 +08:00
ENV PATH = /root/.local/bin:$PATH
2024-09-29 10:51:46 +08:00
# Configure Poetry
ENV POETRY_NO_INTERACTION = 1
ENV POETRY_VIRTUALENVS_IN_PROJECT = true
ENV POETRY_VIRTUALENVS_CREATE = true
ENV POETRY_REQUESTS_TIMEOUT = 15
2024-11-02 22:21:17 +08:00
ENV POETRY_PYPI_MIRROR_URL = https://pypi.tuna.tsinghua.edu.cn/simple/
2024-09-29 10:51:46 +08:00
2024-11-19 18:25:04 +08:00
# nodejs 12.22 on Ubuntu 22.04 is too old
RUN --mount= type = cache,id= ragflow_base_apt,target= /var/cache/apt,sharing= locked \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt purge -y nodejs npm && \
apt autoremove && \
apt update && \
apt install -y nodejs cargo && \
rm -rf /var/lib/apt/lists/*
2024-09-29 10:51:46 +08:00
# builder stage
FROM base AS builder
USER root
WORKDIR /ragflow
2024-11-29 13:37:50 +08:00
# install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./
RUN --mount= type = cache,id= ragflow_builder_poetry,target= /root/.cache/pypoetry,sharing= locked \
if [ " $LIGHTEN " = = "1" ] ; then \
poetry install --no-root; \
else \
poetry install --no-root --with= full; \
fi
COPY web web
COPY docs docs
RUN --mount= type = cache,id= ragflow_builder_npm,target= /root/.npm,sharing= locked \
cd web && npm install --force && npm run build
2024-11-22 20:24:32 +08:00
COPY .git /ragflow/.git
RUN current_commit = $( git rev-parse --short HEAD) ; \
last_tag = $( git describe --tags --abbrev= 0) ; \
commit_count = $( git rev-list --count " $last_tag ..HEAD " ) ; \
version_info = "" ; \
if [ " $commit_count " -eq 0 ] ; then \
version_info = $last_tag ; \
else \
version_info = " $current_commit ( $last_tag ~ $commit_count ) " ; \
fi ; \
if [ " $LIGHTEN " = = "1" ] ; then \
version_info = " $version_info slim " ; \
else \
version_info = " $version_info full " ; \
fi ; \
2024-11-25 14:09:42 +08:00
echo " RAGFlow version: $version_info " ; \
2024-11-22 20:24:32 +08:00
echo $version_info > /ragflow/VERSION
2024-09-29 10:51:46 +08:00
# production stage
FROM base AS production
USER root
WORKDIR /ragflow
2024-11-29 13:37:50 +08:00
# Copy Python environment and packages
ENV VIRTUAL_ENV = /ragflow/.venv
COPY --from= builder ${ VIRTUAL_ENV } ${ VIRTUAL_ENV }
ENV PATH = " ${ VIRTUAL_ENV } /bin: ${ PATH } "
2024-11-22 20:24:32 +08:00
2024-09-29 10:51:46 +08:00
# Install python packages' dependencies
# cv2 requires libGL.so.1
2024-10-01 17:41:38 +08:00
RUN --mount= type = cache,id= ragflow_production_apt,target= /var/cache/apt,sharing= locked \
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
rm -rf /var/lib/apt/lists/*
2024-09-29 10:51:46 +08:00
# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
2024-09-29 20:38:11 +08:00
RUN --mount= type = bind,source= huggingface.co,target= /huggingface.co \
2024-09-29 10:51:46 +08:00
tar --exclude= '.*' -cf - \
2024-09-29 20:38:11 +08:00
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
2024-09-29 10:51:46 +08:00
| tar -xf - --strip-components= 3 -C /ragflow/rag/res/deepdoc
2024-09-29 20:38:11 +08:00
RUN --mount= type = bind,source= huggingface.co,target= /huggingface.co \
2024-09-29 10:51:46 +08:00
tar -cf - \
2024-09-29 20:38:11 +08:00
/huggingface.co/BAAI/bge-large-zh-v1.5 \
/huggingface.co/BAAI/bge-reranker-v2-m3 \
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
2024-09-29 10:51:46 +08:00
| tar -xf - --strip-components= 2 -C /root/.ragflow
2024-10-01 17:41:38 +08:00
# Copy nltk data downloaded via download_deps.py
COPY nltk_data /root/nltk_data
2024-11-03 00:20:26 +08:00
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
2024-11-15 16:57:01 +08:00
COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar
COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5
2024-11-03 00:20:26 +08:00
ENV TIKA_SERVER_JAR = "file:///ragflow/tika-server-standard.jar"
2024-11-15 10:18:40 +08:00
# Copy cl100k_base
2024-11-19 18:25:04 +08:00
COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
# Add dependencies of selenium
RUN --mount= type = bind,source= chrome-linux64-121-0-6167-85,target= /chrome-linux64.zip \
unzip /chrome-linux64.zip && \
2024-11-25 11:53:58 +08:00
mv chrome-linux64 /opt/chrome && \
2024-11-19 18:25:04 +08:00
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount= type = bind,source= chromedriver-linux64-121-0-6167-85,target= /chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome
2024-11-15 10:18:40 +08:00
2024-04-16 12:29:58 +08:00
ENV PYTHONPATH = /ragflow/
2024-11-29 13:37:50 +08:00
COPY web web
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
COPY pyproject.toml poetry.toml poetry.lock ./
2024-11-12 15:56:53 +01:00
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
2024-09-29 10:51:46 +08:00
COPY docker/entrypoint.sh ./entrypoint.sh
2024-04-16 12:29:58 +08:00
RUN chmod +x ./entrypoint.sh
2024-11-29 13:37:50 +08:00
# Copy compiled web pages
COPY --from= builder /ragflow/web/dist /ragflow/web/dist
COPY --from= builder /ragflow/VERSION /ragflow/VERSION
2024-09-29 10:51:46 +08:00
ENTRYPOINT [ "./entrypoint.sh" ]