docling/tests/test_backend_asciidoc.py
Cesar Berrospi Ramis b886e4df31
fix(asciidoc): set default size when missing in image directive (#1769)
The AsciiDoc backend should not create an ImageRef with Size equal to None, instead use default size values.
Refactor static methods as such and add the staticmethod decorator.
Extend the regression test for this fix.

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
2025-06-16 10:38:46 +02:00

73 lines
1.9 KiB
Python

import glob
import os
from pathlib import Path
from docling.backend.asciidoc_backend import (
DEFAULT_IMAGE_HEIGHT,
DEFAULT_IMAGE_WIDTH,
AsciiDocBackend,
)
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument
def _get_backend(fname):
in_doc = InputDocument(
path_or_stream=fname,
format=InputFormat.ASCIIDOC,
backend=AsciiDocBackend,
)
doc_backend = in_doc._backend
return doc_backend
def test_parse_picture():
line = (
"image::images/example1.png[Example Image, width=200, height=150, align=center]"
)
res = AsciiDocBackend._parse_picture(line)
assert res
assert res.get("width", 0) == "200"
assert res.get("height", 0) == "150"
assert res.get("uri", "") == "images/example1.png"
line = "image::renamed-bookmark.png[Renamed bookmark]"
res = AsciiDocBackend._parse_picture(line)
assert res
assert "width" not in res
assert "height" not in res
assert res.get("uri", "") == "renamed-bookmark.png"
def test_asciidocs_examples():
fnames = sorted(glob.glob("./tests/data/asciidoc/*.asciidoc"))
for fname in fnames:
print(f"reading {fname}")
bname = os.path.basename(fname)
gname = os.path.join("./tests/data/groundtruth/docling_v2/", bname + ".md")
doc_backend = _get_backend(Path(fname))
doc = doc_backend.convert()
pred_itdoc = doc._export_to_indented_text(max_text_len=16)
print("\n\n", pred_itdoc)
pred_mddoc = doc.export_to_markdown()
print("\n\n", pred_mddoc)
if os.path.exists(gname):
with open(gname) as fr:
fr.read()
# assert pred_mddoc == true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
else:
with open(gname, "w") as fw:
fw.write(pred_mddoc)
# print("\n\n", doc.export_to_markdown())
assert True