docling/docs/examples/develop_picture_enrichment.py

# WARNING
# This example demonstrates only how to develop a new enrichment model.
# It does not run the actual picture classifier model.

import logging
from collections.abc import Iterable
from pathlib import Path
from typing import Any

from docling_core.types.doc import (
    DoclingDocument,
    NodeItem,
    PictureClassificationClass,
    PictureClassificationData,
    PictureItem,
)

from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.models.base_model import BaseEnrichmentModel
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline


class ExamplePictureClassifierPipelineOptions(PdfPipelineOptions):
    do_picture_classifer: bool = True


class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
    def __init__(self, enabled: bool):
        self.enabled = enabled

    def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
        return self.enabled and isinstance(element, PictureItem)

    def __call__(
        self, doc: DoclingDocument, element_batch: Iterable[NodeItem]
    ) -> Iterable[Any]:
        if not self.enabled:
            return

        for element in element_batch:
            assert isinstance(element, PictureItem)

            # uncomment this to interactively visualize the image
            # element.get_image(doc).show()

            element.annotations.append(
                PictureClassificationData(
                    provenance="example_classifier-0.0.1",
                    predicted_classes=[
                        PictureClassificationClass(class_name="dummy", confidence=0.42)
                    ],
                )
            )

            yield element


class ExamplePictureClassifierPipeline(StandardPdfPipeline):
    def __init__(self, pipeline_options: ExamplePictureClassifierPipelineOptions):
        super().__init__(pipeline_options)
        self.pipeline_options: ExamplePictureClassifierPipeline

        self.enrichment_pipe = [
            ExamplePictureClassifierEnrichmentModel(
                enabled=pipeline_options.do_picture_classifer
            )
        ]

    @classmethod
    def get_default_options(cls) -> ExamplePictureClassifierPipelineOptions:
        return ExamplePictureClassifierPipelineOptions()


def main():
    logging.basicConfig(level=logging.INFO)

    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")

    pipeline_options = ExamplePictureClassifierPipelineOptions()
    pipeline_options.images_scale = 2.0
    pipeline_options.generate_picture_images = True

    doc_converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_cls=ExamplePictureClassifierPipeline,
                pipeline_options=pipeline_options,
            )
        }
    )
    result = doc_converter.convert(input_doc_path)

    for element, _level in result.document.iterate_items():
        if isinstance(element, PictureItem):
            print(
                f"The model populated the `data` portion of picture {element.self_ref}:\n{element.annotations}"
            )


if __name__ == "__main__":
    main()
docs: Enrichment models (#1097) * warning for develop examples Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add docs for enrichment models Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * minor reorg of top-level docs (#1098) * minor reorg of top-level docs Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * fix typo [no ci] Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --------- Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> * trigger ci Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> 2025-03-04 14:24:38 +01:00			`# WARNING`
			`# This example demonstrates only how to develop a new enrichment model.`
			`# It does not run the actual picture classifier model.`

feat!: Docling v2 (#117) --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Maxim Lysak <mly@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Maxim Lysak <mly@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> 2024-10-16 21:02:03 +02:00			`import logging`
ci: add coverage and ruff (#1383) * add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> 2025-04-14 18:01:26 +02:00			`from collections.abc import Iterable`
feat!: Docling v2 (#117) --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Maxim Lysak <mly@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Maxim Lysak <mly@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> 2024-10-16 21:02:03 +02:00			`from pathlib import Path`
ci: add coverage and ruff (#1383) * add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> 2025-04-14 18:01:26 +02:00			`from typing import Any`
feat!: Docling v2 (#117) --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Maxim Lysak <mly@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Maxim Lysak <mly@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> 2024-10-16 21:02:03 +02:00
			`from docling_core.types.doc import (`
			`DoclingDocument,`
			`NodeItem,`
			`PictureClassificationClass,`
			`PictureClassificationData,`
			`PictureItem,`
			`)`

			`from docling.datamodel.base_models import InputFormat`
			`from docling.datamodel.pipeline_options import PdfPipelineOptions`
			`from docling.document_converter import DocumentConverter, PdfFormatOption`
			`from docling.models.base_model import BaseEnrichmentModel`
			`from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline`


			`class ExamplePictureClassifierPipelineOptions(PdfPipelineOptions):`
			`do_picture_classifer: bool = True`


			`class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):`
			`def __init__(self, enabled: bool):`
			`self.enabled = enabled`

			`def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:`
			`return self.enabled and isinstance(element, PictureItem)`

			`def __call__(`
			`self, doc: DoclingDocument, element_batch: Iterable[NodeItem]`
			`) -> Iterable[Any]:`
			`if not self.enabled:`
			`return`

			`for element in element_batch:`
			`assert isinstance(element, PictureItem)`

			`# uncomment this to interactively visualize the image`
feat: added support for exporting DocItem to an image when page image is available (#379) * Updated minimum docling-core version to 2.4.0 Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com> * Deprecated the generate_table_images option Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com> * Updated examples to use get_image instead of element.image Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com> --------- Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com> 2024-11-19 16:28:52 +01:00			`# element.get_image(doc).show()`
feat!: Docling v2 (#117) --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Maxim Lysak <mly@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Maxim Lysak <mly@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> 2024-10-16 21:02:03 +02:00
			`element.annotations.append(`
			`PictureClassificationData(`
			`provenance="example_classifier-0.0.1",`
			`predicted_classes=[`
			`PictureClassificationClass(class_name="dummy", confidence=0.42)`
			`],`
			`)`
			`)`

			`yield element`


			`class ExamplePictureClassifierPipeline(StandardPdfPipeline):`
			`def __init__(self, pipeline_options: ExamplePictureClassifierPipelineOptions):`
			`super().__init__(pipeline_options)`
			`self.pipeline_options: ExamplePictureClassifierPipeline`

			`self.enrichment_pipe = [`
			`ExamplePictureClassifierEnrichmentModel(`
			`enabled=pipeline_options.do_picture_classifer`
			`)`
			`]`

			`@classmethod`
			`def get_default_options(cls) -> ExamplePictureClassifierPipelineOptions:`
			`return ExamplePictureClassifierPipelineOptions()`


			`def main():`
			`logging.basicConfig(level=logging.INFO)`

fix: Test cases for RTL programmatic PDFs and fixes for the formula model (#903) fix: Support for RTL programmatic documents fix(parser): detect and handle rotated pages fix(parser): fix bug causing duplicated text fix(formula): improve stopping criteria chore: update lock file fix: temporary constrain beautifulsoup * switch to code formula model v1.0.1 and new test pdf Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * switch to code formula model v1.0.1 and new test pdf Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * cleaned up the data folder in the tests Signed-off-by: Peter Staar <taa@zurich.ibm.com> * switch to code formula model v1.0.1 and new test pdf Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * added three test-files for right-to-left Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fix black Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * added new gt for test_e2e_conversion Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * added new gt for test_e2e_conversion Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * Add code to expose text direction of cell Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * new test file Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> * update lock Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix mypy reports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix example filepaths Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add test data results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * pin wheel of latest docling-parse release Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * use latest docling-core Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove debugging code Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix path to files in example Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Revert unwanted RTL additions Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix test data paths in examples Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com> Co-authored-by: Peter Staar <taa@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com> 2025-02-07 08:43:31 +01:00			`input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")`
feat!: Docling v2 (#117) --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Maxim Lysak <mly@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Maxim Lysak <mly@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> 2024-10-16 21:02:03 +02:00
			`pipeline_options = ExamplePictureClassifierPipelineOptions()`
			`pipeline_options.images_scale = 2.0`
			`pipeline_options.generate_picture_images = True`

			`doc_converter = DocumentConverter(`
			`format_options={`
			`InputFormat.PDF: PdfFormatOption(`
			`pipeline_cls=ExamplePictureClassifierPipeline,`
			`pipeline_options=pipeline_options,`
			`)`
			`}`
			`)`
			`result = doc_converter.convert(input_doc_path)`

			`for element, _level in result.document.iterate_items():`
			`if isinstance(element, PictureItem):`
			`print(`
			f"The model populated the `data` portion of picture {element.self_ref}:\n{element.annotations}"
			`)`


			`if __name__ == "__main__":`
			`main()`