diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 71ff0d3..8fb2e98 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -265,30 +265,31 @@ jobs: context: . file: ./Dockerfile push: true - load: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64 outputs: type=registry no-cache: true - - name: Setup Beaker CLI - uses: allenai/setup-beaker@v2 - with: - token: ${{ secrets.BEAKER_TOKEN }} - version: latest - - name: Debug Docker images - run: docker images + # jakep: push to beaker can't work because of limitted disk space on these runners + # jakep: (you can try by setting load: true above, but you'll need a larger runner) + # - name: Setup Beaker CLI + # uses: allenai/setup-beaker@v2 + # with: + # token: ${{ secrets.BEAKER_TOKEN }} + # version: latest + # - name: Debug Docker images + # run: docker images - - name: Push to Beaker - env: - BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }} - run: | - VERSION=${{ steps.meta.outputs.version }} - beaker image create \ - --name "olmocr-inference-$VERSION" \ - --workspace ai2/olmocr \ - alleninstituteforai/olmocr:$VERSION + # - name: Push to Beaker + # env: + # BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }} + # run: | + # VERSION=${{ steps.meta.outputs.version }} + # beaker image create \ + # --name "olmocr-inference-$VERSION" \ + # --workspace ai2/olmocr \ + # alleninstituteforai/olmocr:$VERSION - name: Clean up after build if: always() diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py index 770e086..f056cd6 100644 --- a/olmocr/pipeline.py +++ b/olmocr/pipeline.py @@ -270,6 +270,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path: local_image_rotation = page_response.rotation_correction raise ValueError(f"invalid_page rotation for {pdf_orig_path}-{page_num}") + metrics.add_metrics(completed_pages=1) await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "finished") return PageResult( pdf_orig_path, @@ -308,6 +309,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path: attempt += 1 logger.error(f"Failed to process {pdf_orig_path}-{page_num} after {MAX_RETRIES} attempts.") + metrics.add_metrics(failed_pages=1) await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "errored") return PageResult( diff --git a/olmocr/version.py b/olmocr/version.py index b6346ab..81decdd 100644 --- a/olmocr/version.py +++ b/olmocr/version.py @@ -2,7 +2,7 @@ _MAJOR = "0" _MINOR = "1" # On main and in a nightly release the patch should be one ahead of the last # released build. -_PATCH = "74" +_PATCH = "75" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. _SUFFIX = ""