Final cleanup

This commit is contained in:
Jake Poznanski 2025-06-17 19:07:37 +00:00
parent e103634d30
commit b61ef52d36
3 changed files with 21 additions and 18 deletions

View File

@ -265,30 +265,31 @@ jobs:
context: . context: .
file: ./Dockerfile file: ./Dockerfile
push: true push: true
load: true
tags: ${{ steps.meta.outputs.tags }} tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64 platforms: linux/amd64
outputs: type=registry outputs: type=registry
no-cache: true no-cache: true
- name: Setup Beaker CLI # jakep: push to beaker can't work because of limitted disk space on these runners
uses: allenai/setup-beaker@v2 # jakep: (you can try by setting load: true above, but you'll need a larger runner)
with: # - name: Setup Beaker CLI
token: ${{ secrets.BEAKER_TOKEN }} # uses: allenai/setup-beaker@v2
version: latest # with:
- name: Debug Docker images # token: ${{ secrets.BEAKER_TOKEN }}
run: docker images # version: latest
# - name: Debug Docker images
# run: docker images
- name: Push to Beaker # - name: Push to Beaker
env: # env:
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }} # BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
run: | # run: |
VERSION=${{ steps.meta.outputs.version }} # VERSION=${{ steps.meta.outputs.version }}
beaker image create \ # beaker image create \
--name "olmocr-inference-$VERSION" \ # --name "olmocr-inference-$VERSION" \
--workspace ai2/olmocr \ # --workspace ai2/olmocr \
alleninstituteforai/olmocr:$VERSION # alleninstituteforai/olmocr:$VERSION
- name: Clean up after build - name: Clean up after build
if: always() if: always()

View File

@ -270,6 +270,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
local_image_rotation = page_response.rotation_correction local_image_rotation = page_response.rotation_correction
raise ValueError(f"invalid_page rotation for {pdf_orig_path}-{page_num}") raise ValueError(f"invalid_page rotation for {pdf_orig_path}-{page_num}")
metrics.add_metrics(completed_pages=1)
await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "finished") await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "finished")
return PageResult( return PageResult(
pdf_orig_path, pdf_orig_path,
@ -308,6 +309,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
attempt += 1 attempt += 1
logger.error(f"Failed to process {pdf_orig_path}-{page_num} after {MAX_RETRIES} attempts.") logger.error(f"Failed to process {pdf_orig_path}-{page_num} after {MAX_RETRIES} attempts.")
metrics.add_metrics(failed_pages=1)
await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "errored") await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "errored")
return PageResult( return PageResult(

View File

@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "1" _MINOR = "1"
# On main and in a nightly release the patch should be one ahead of the last # On main and in a nightly release the patch should be one ahead of the last
# released build. # released build.
_PATCH = "74" _PATCH = "75"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See # This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics. # https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = "" _SUFFIX = ""