mirror of
https://github.com/docling-project/docling.git
synced 2025-11-30 04:46:34 +00:00
* Experimental code for repetition detection, VLLM Streaming Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update VLLM Streaming Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update VLLM inference code, CLI and VLM specs Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix generation and decoder args for HF model Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix vllm device args Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Cleanup Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Bugfixes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove streaming VLLM for the moment Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add repetition StoppingCriteria for GraniteDocling/SmolDocling Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Make GenerationStopper base class and port for MLX Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add streaming support and custom GenerationStopper support for ApiVlmModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for ApiVlmModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for ApiVlmModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix api_image_request_streaming when GenerationStopper triggers. Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Move DocTagsRepetitionStopper to utility unit, update examples Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
109 lines
3.5 KiB
Python
Vendored
109 lines
3.5 KiB
Python
Vendored
# %% [markdown]
|
|
# Experimental VLM pipeline with custom repetition stopping criteria.
|
|
#
|
|
# This script demonstrates the use of custom stopping criteria that detect
|
|
# repetitive location coordinate patterns in generated text and stop generation
|
|
# when such patterns are found.
|
|
#
|
|
# What this example does
|
|
# - Uses the GraniteDocling model with custom repetition stopping criteria injected
|
|
# - Processes a PDF document or image and monitors for repetitive coordinate patterns
|
|
# - Stops generation early when repetitive patterns are detected
|
|
|
|
|
|
# %%
|
|
|
|
import logging
|
|
|
|
from docling.datamodel import vlm_model_specs
|
|
from docling.datamodel.base_models import InputFormat
|
|
from docling.datamodel.pipeline_options import VlmPipelineOptions
|
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
|
from docling.models.utils.generation_utils import (
|
|
DocTagsRepetitionStopper,
|
|
)
|
|
from docling.pipeline.vlm_pipeline import VlmPipeline
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
|
|
|
|
|
|
# Set up logging to see when repetition stopping is triggered
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
# Replace with a local path if preferred.
|
|
# source = "https://ibm.biz/docling-page-with-table" # Example that shows no repetitions.
|
|
source = "tests/data_scanned/old_newspaper.png" # Example that creates repetitions.
|
|
print(f"Processing document: {source}")
|
|
|
|
###### USING GRANITEDOCLING WITH CUSTOM REPETITION STOPPING
|
|
|
|
## Using standard Huggingface Transformers (most portable, slowest)
|
|
custom_vlm_options = vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.model_copy()
|
|
|
|
# Uncomment this to use MLX-accelerated version on Apple Silicon
|
|
# custom_vlm_options = vlm_model_specs.GRANITEDOCLING_MLX.model_copy() # use this for Apple Silicon
|
|
|
|
|
|
# Create custom VLM options with repetition stopping criteria
|
|
custom_vlm_options.custom_stopping_criteria = [
|
|
DocTagsRepetitionStopper(N=32)
|
|
] # check for repetitions for every 32 new tokens decoded.
|
|
|
|
pipeline_options = VlmPipelineOptions(
|
|
vlm_options=custom_vlm_options,
|
|
)
|
|
|
|
converter = DocumentConverter(
|
|
format_options={
|
|
InputFormat.IMAGE: PdfFormatOption(
|
|
pipeline_cls=VlmPipeline,
|
|
pipeline_options=pipeline_options,
|
|
),
|
|
}
|
|
)
|
|
|
|
doc = converter.convert(source=source).document
|
|
|
|
print(doc.export_to_markdown())
|
|
|
|
## Using a remote VLM inference service (for example VLLM) - uncomment to use
|
|
|
|
# custom_vlm_options = ApiVlmOptions(
|
|
# url="http://localhost:8000/v1/chat/completions", # LM studio defaults to port 1234, VLLM to 8000
|
|
# params=dict(
|
|
# model=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.repo_id,
|
|
# max_tokens=8192,
|
|
# skip_special_tokens=True, # needed for VLLM
|
|
# ),
|
|
# headers={
|
|
# "Authorization": "Bearer YOUR_API_KEY",
|
|
# },
|
|
# prompt=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.prompt,
|
|
# timeout=90,
|
|
# scale=2.0,
|
|
# temperature=0.0,
|
|
# response_format=ResponseFormat.DOCTAGS,
|
|
# custom_stopping_criteria=[
|
|
# DocTagsRepetitionStopper(N=1)
|
|
# ], # check for repetitions for every new chunk of the response stream
|
|
# )
|
|
|
|
|
|
# pipeline_options = VlmPipelineOptions(
|
|
# vlm_options=custom_vlm_options,
|
|
# enable_remote_services=True, # required when using a remote inference service.
|
|
# )
|
|
|
|
# converter = DocumentConverter(
|
|
# format_options={
|
|
# InputFormat.IMAGE: PdfFormatOption(
|
|
# pipeline_cls=VlmPipeline,
|
|
# pipeline_options=pipeline_options,
|
|
# ),
|
|
# }
|
|
# )
|
|
|
|
# doc = converter.convert(source=source).document
|
|
|
|
# print(doc.export_to_markdown())
|