mirror of
https://github.com/allenai/olmocr.git
synced 2025-11-10 23:50:43 +00:00
Cleanup of vllm perf branch with @amanr
This commit is contained in:
parent
316d0af1cd
commit
a83a0da65f
13
Dockerfile
13
Dockerfile
@ -1,10 +1,13 @@
|
|||||||
ARG CUDA_VERSION=12.8.1
|
ARG CUDA_VERSION=12.8.1
|
||||||
FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
||||||
|
|
||||||
# Needs to be repeated below the FROM, or else it's not picked up
|
# Needs to be repeated below the FROM, or else it's not picked up
|
||||||
ARG PYTHON_VERSION=3.12
|
ARG PYTHON_VERSION=3.12
|
||||||
ARG CUDA_VERSION=12.8.1
|
ARG CUDA_VERSION=12.8.1
|
||||||
|
|
||||||
# Set environment variable to prevent interactive prompts
|
# Set environment variable to prevent interactive prompts
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
# From original VLLM dockerfile https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
|
# From original VLLM dockerfile https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
|
||||||
# Install Python and other dependencies
|
# Install Python and other dependencies
|
||||||
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||||
@ -33,8 +36,7 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTH
|
|||||||
&& python3 --version && python3 -m pip --version
|
&& python3 --version && python3 -m pip --version
|
||||||
|
|
||||||
# Install uv for faster pip installs
|
# Install uv for faster pip installs
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv python3 -m pip install uv
|
||||||
python3 -m pip install uv
|
|
||||||
|
|
||||||
# Install some helper utilities for things like the benchmark
|
# Install some helper utilities for things like the benchmark
|
||||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||||
@ -43,18 +45,21 @@ git-lfs \
|
|||||||
curl \
|
curl \
|
||||||
wget \
|
wget \
|
||||||
unzip
|
unzip
|
||||||
|
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
WORKDIR /root
|
WORKDIR /root
|
||||||
COPY pyproject.toml pyproject.toml
|
COPY pyproject.toml pyproject.toml
|
||||||
COPY olmocr/version.py olmocr/version.py
|
COPY olmocr/version.py olmocr/version.py
|
||||||
|
|
||||||
# Needed to resolve setuptools dependencies
|
# Needed to resolve setuptools dependencies
|
||||||
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
RUN uv pip install --system --no-cache -e . --extra-index-url https://download.pytorch.org/whl/cu128
|
RUN uv pip install --system --no-cache -e ".[gpu]" --extra-index-url https://download.pytorch.org/whl/cu128
|
||||||
RUN uv pip install --system --no-cache ".[gpu]" --extra-index-url https://download.pytorch.org/whl/cu128
|
|
||||||
RUN uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl
|
RUN uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl
|
||||||
RUN uv pip install --system --no-cache ".[bench]"
|
RUN uv pip install --system --no-cache ".[bench]"
|
||||||
RUN playwright install-deps
|
RUN playwright install-deps
|
||||||
RUN playwright install chromium
|
RUN playwright install chromium
|
||||||
|
|
||||||
COPY olmocr olmocr
|
COPY olmocr olmocr
|
||||||
COPY scripts scripts
|
COPY scripts scripts
|
||||||
|
|
||||||
RUN python3 -m olmocr.pipeline --help
|
RUN python3 -m olmocr.pipeline --help
|
||||||
@ -574,7 +574,6 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
|
|||||||
"--disable-log-requests",
|
"--disable-log-requests",
|
||||||
"--uvicorn-log-level", "warning",
|
"--uvicorn-log-level", "warning",
|
||||||
"--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
|
"--served-model-name", "Qwen/Qwen2-VL-7B-Instruct",
|
||||||
"--dtype", args.dtype,
|
|
||||||
]
|
]
|
||||||
cmd.extend(mem_fraction_arg)
|
cmd.extend(mem_fraction_arg)
|
||||||
|
|
||||||
@ -987,7 +986,6 @@ async def main():
|
|||||||
help="List of paths where you can find the model to convert this pdf. You can specify several different paths here, and the script will try to use the one which is fastest to access",
|
help="List of paths where you can find the model to convert this pdf. You can specify several different paths here, and the script will try to use the one which is fastest to access",
|
||||||
default="allenai/olmOCR-7B-0225-preview",
|
default="allenai/olmOCR-7B-0225-preview",
|
||||||
)
|
)
|
||||||
parser.add_argument("--dtype", type=str, default="bfloat16", help="Data type for model weights")
|
|
||||||
parser.add_argument("--model_max_context", type=int, default="8192", help="Maximum context length that the model was fine tuned under")
|
parser.add_argument("--model_max_context", type=int, default="8192", help="Maximum context length that the model was fine tuned under")
|
||||||
parser.add_argument("--model_chat_template", type=str, default="qwen2-vl", help="Chat template to pass to vllm server")
|
parser.add_argument("--model_chat_template", type=str, default="qwen2-vl", help="Chat template to pass to vllm server")
|
||||||
parser.add_argument("--target_longest_image_dim", type=int, help="Dimension on longest side to use for rendering the pdf pages", default=1024)
|
parser.add_argument("--target_longest_image_dim", type=int, help="Dimension on longest side to use for rendering the pdf pages", default=1024)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user