diff --git a/pdelfin/beakerpipeline.py b/pdelfin/beakerpipeline.py index 071f413..d1a16d2 100644 --- a/pdelfin/beakerpipeline.py +++ b/pdelfin/beakerpipeline.py @@ -289,7 +289,8 @@ def build_dolma_document(pdf_s3_path, page_results): "Source-File": pdf_s3_path, "pdf-total-pages": len(page_results), "total-input-tokens": sum(page.input_tokens for page in page_results), - "total-output-tokens": sum(page.output_tokens for page in page_results) + "total-output-tokens": sum(page.output_tokens for page in page_results), + "total-fallback-pages": sum(page.is_fallback for page in page_results), } id_ = hashlib.sha1(document_text.encode()).hexdigest() diff --git a/pdelfin/version.py b/pdelfin/version.py index a60ca62..76a40ed 100644 --- a/pdelfin/version.py +++ b/pdelfin/version.py @@ -2,7 +2,7 @@ _MAJOR = "0" _MINOR = "1" # On main and in a nightly release the patch should be one ahead of the last # released build. -_PATCH = "35" +_PATCH = "36" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. _SUFFIX = ""