docling/docs/examples/granitedocling_repetition_stopping.py

# %% [markdown]
# Experimental VLM pipeline with custom repetition stopping criteria.
#
# This script demonstrates the use of custom stopping criteria that detect
# repetitive location coordinate patterns in generated text and stop generation
# when such patterns are found.
#
# What this example does
# - Uses the GraniteDocling model with custom repetition stopping criteria injected
# - Processes a PDF document or image and monitors for repetitive coordinate patterns
# - Stops generation early when repetitive patterns are detected


# %%

import logging

from docling.datamodel import vlm_model_specs
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import VlmPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.models.utils.generation_utils import (
    DocTagsRepetitionStopper,
)
from docling.pipeline.vlm_pipeline import VlmPipeline

logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")


# Set up logging to see when repetition stopping is triggered
logging.basicConfig(level=logging.INFO)

# Replace with a local path if preferred.
# source = "https://ibm.biz/docling-page-with-table" # Example that shows no repetitions.
source = "tests/data_scanned/old_newspaper.png"  # Example that creates repetitions.
print(f"Processing document: {source}")

###### USING GRANITEDOCLING WITH CUSTOM REPETITION STOPPING

## Using standard Huggingface Transformers (most portable, slowest)
custom_vlm_options = vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.model_copy()

# Uncomment this to use MLX-accelerated version on Apple Silicon
# custom_vlm_options = vlm_model_specs.GRANITEDOCLING_MLX.model_copy() # use this for Apple Silicon


# Create custom VLM options with repetition stopping criteria
custom_vlm_options.custom_stopping_criteria = [
    DocTagsRepetitionStopper(N=32)
]  # check for repetitions for every 32 new tokens decoded.

pipeline_options = VlmPipelineOptions(
    vlm_options=custom_vlm_options,
)

converter = DocumentConverter(
    format_options={
        InputFormat.IMAGE: PdfFormatOption(
            pipeline_cls=VlmPipeline,
            pipeline_options=pipeline_options,
        ),
    }
)

doc = converter.convert(source=source).document

print(doc.export_to_markdown())

## Using a remote VLM inference service (for example VLLM) - uncomment to use

# custom_vlm_options = ApiVlmOptions(
#     url="http://localhost:8000/v1/chat/completions",  # LM studio defaults to port 1234, VLLM to 8000
#     params=dict(
#         model=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.repo_id,
#         max_tokens=8192,
#         skip_special_tokens=True,  # needed for VLLM
#     ),
#     headers={
#         "Authorization": "Bearer YOUR_API_KEY",
#     },
#     prompt=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.prompt,
#     timeout=90,
#     scale=2.0,
#     temperature=0.0,
#     response_format=ResponseFormat.DOCTAGS,
#     custom_stopping_criteria=[
#         DocTagsRepetitionStopper(N=1)
#     ],  # check for repetitions for every new chunk of the response stream
# )


# pipeline_options = VlmPipelineOptions(
#     vlm_options=custom_vlm_options,
#     enable_remote_services=True, # required when using a remote inference service.
# )

# converter = DocumentConverter(
#     format_options={
#         InputFormat.IMAGE: PdfFormatOption(
#             pipeline_cls=VlmPipeline,
#             pipeline_options=pipeline_options,
#         ),
#     }
# )

# doc = converter.convert(source=source).document

# print(doc.export_to_markdown())
feat: Repetition-based StoppingCriteria for GraniteDocling (#2323) * Experimental code for repetition detection, VLLM Streaming Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update VLLM Streaming Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update VLLM inference code, CLI and VLM specs Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix generation and decoder args for HF model Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix vllm device args Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Cleanup Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Bugfixes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove streaming VLLM for the moment Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add repetition StoppingCriteria for GraniteDocling/SmolDocling Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Make GenerationStopper base class and port for MLX Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add streaming support and custom GenerationStopper support for ApiVlmModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for ApiVlmModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for ApiVlmModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix api_image_request_streaming when GenerationStopper triggers. Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Move DocTagsRepetitionStopper to utility unit, update examples Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> 2025-09-30 15:26:09 +02:00			`# %% [markdown]`
			`# Experimental VLM pipeline with custom repetition stopping criteria.`
			`#`
			`# This script demonstrates the use of custom stopping criteria that detect`
			`# repetitive location coordinate patterns in generated text and stop generation`
			`# when such patterns are found.`
			`#`
			`# What this example does`
			`# - Uses the GraniteDocling model with custom repetition stopping criteria injected`
			`# - Processes a PDF document or image and monitors for repetitive coordinate patterns`
			`# - Stops generation early when repetitive patterns are detected`


			`# %%`

			`import logging`

			`from docling.datamodel import vlm_model_specs`
			`from docling.datamodel.base_models import InputFormat`
			`from docling.datamodel.pipeline_options import VlmPipelineOptions`
			`from docling.document_converter import DocumentConverter, PdfFormatOption`
			`from docling.models.utils.generation_utils import (`
			`DocTagsRepetitionStopper,`
			`)`
			`from docling.pipeline.vlm_pipeline import VlmPipeline`

			`logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")`


			`# Set up logging to see when repetition stopping is triggered`
			`logging.basicConfig(level=logging.INFO)`

			`# Replace with a local path if preferred.`
			`# source = "https://ibm.biz/docling-page-with-table" # Example that shows no repetitions.`
			`source = "tests/data_scanned/old_newspaper.png" # Example that creates repetitions.`
			`print(f"Processing document: {source}")`

			`###### USING GRANITEDOCLING WITH CUSTOM REPETITION STOPPING`

			`## Using standard Huggingface Transformers (most portable, slowest)`
			`custom_vlm_options = vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.model_copy()`

			`# Uncomment this to use MLX-accelerated version on Apple Silicon`
			`# custom_vlm_options = vlm_model_specs.GRANITEDOCLING_MLX.model_copy() # use this for Apple Silicon`


			`# Create custom VLM options with repetition stopping criteria`
			`custom_vlm_options.custom_stopping_criteria = [`
			`DocTagsRepetitionStopper(N=32)`
			`] # check for repetitions for every 32 new tokens decoded.`

			`pipeline_options = VlmPipelineOptions(`
			`vlm_options=custom_vlm_options,`
			`)`

			`converter = DocumentConverter(`
			`format_options={`
			`InputFormat.IMAGE: PdfFormatOption(`
			`pipeline_cls=VlmPipeline,`
			`pipeline_options=pipeline_options,`
			`),`
			`}`
			`)`

			`doc = converter.convert(source=source).document`

			`print(doc.export_to_markdown())`

			`## Using a remote VLM inference service (for example VLLM) - uncomment to use`

			`# custom_vlm_options = ApiVlmOptions(`
			`# url="http://localhost:8000/v1/chat/completions", # LM studio defaults to port 1234, VLLM to 8000`
			`# params=dict(`
			`# model=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.repo_id,`
			`# max_tokens=8192,`
			`# skip_special_tokens=True, # needed for VLLM`
			`# ),`
			`# headers={`
			`# "Authorization": "Bearer YOUR_API_KEY",`
			`# },`
			`# prompt=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.prompt,`
			`# timeout=90,`
			`# scale=2.0,`
			`# temperature=0.0,`
			`# response_format=ResponseFormat.DOCTAGS,`
			`# custom_stopping_criteria=[`
			`# DocTagsRepetitionStopper(N=1)`
			`# ], # check for repetitions for every new chunk of the response stream`
			`# )`


			`# pipeline_options = VlmPipelineOptions(`
			`# vlm_options=custom_vlm_options,`
			`# enable_remote_services=True, # required when using a remote inference service.`
			`# )`

			`# converter = DocumentConverter(`
			`# format_options={`
			`# InputFormat.IMAGE: PdfFormatOption(`
			`# pipeline_cls=VlmPipeline,`
			`# pipeline_options=pipeline_options,`
			`# ),`
			`# }`
			`# )`

			`# doc = converter.convert(source=source).document`

			`# print(doc.export_to_markdown())`