mirror of
https://github.com/docling-project/docling.git
synced 2025-06-27 05:20:05 +00:00

The AsciiDoc backend should not create an ImageRef with Size equal to None, instead use default size values. Refactor static methods as such and add the staticmethod decorator. Extend the regression test for this fix. Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
73 lines
1.9 KiB
Python
73 lines
1.9 KiB
Python
import glob
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from docling.backend.asciidoc_backend import (
|
|
DEFAULT_IMAGE_HEIGHT,
|
|
DEFAULT_IMAGE_WIDTH,
|
|
AsciiDocBackend,
|
|
)
|
|
from docling.datamodel.base_models import InputFormat
|
|
from docling.datamodel.document import InputDocument
|
|
|
|
|
|
def _get_backend(fname):
|
|
in_doc = InputDocument(
|
|
path_or_stream=fname,
|
|
format=InputFormat.ASCIIDOC,
|
|
backend=AsciiDocBackend,
|
|
)
|
|
|
|
doc_backend = in_doc._backend
|
|
return doc_backend
|
|
|
|
|
|
def test_parse_picture():
|
|
line = (
|
|
"image::images/example1.png[Example Image, width=200, height=150, align=center]"
|
|
)
|
|
res = AsciiDocBackend._parse_picture(line)
|
|
assert res
|
|
assert res.get("width", 0) == "200"
|
|
assert res.get("height", 0) == "150"
|
|
assert res.get("uri", "") == "images/example1.png"
|
|
|
|
line = "image::renamed-bookmark.png[Renamed bookmark]"
|
|
res = AsciiDocBackend._parse_picture(line)
|
|
assert res
|
|
assert "width" not in res
|
|
assert "height" not in res
|
|
assert res.get("uri", "") == "renamed-bookmark.png"
|
|
|
|
|
|
def test_asciidocs_examples():
|
|
fnames = sorted(glob.glob("./tests/data/asciidoc/*.asciidoc"))
|
|
|
|
for fname in fnames:
|
|
print(f"reading {fname}")
|
|
|
|
bname = os.path.basename(fname)
|
|
gname = os.path.join("./tests/data/groundtruth/docling_v2/", bname + ".md")
|
|
|
|
doc_backend = _get_backend(Path(fname))
|
|
doc = doc_backend.convert()
|
|
|
|
pred_itdoc = doc._export_to_indented_text(max_text_len=16)
|
|
print("\n\n", pred_itdoc)
|
|
|
|
pred_mddoc = doc.export_to_markdown()
|
|
print("\n\n", pred_mddoc)
|
|
|
|
if os.path.exists(gname):
|
|
with open(gname) as fr:
|
|
fr.read()
|
|
|
|
# assert pred_mddoc == true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
|
|
else:
|
|
with open(gname, "w") as fw:
|
|
fw.write(pred_mddoc)
|
|
|
|
# print("\n\n", doc.export_to_markdown())
|
|
|
|
assert True
|