From c37e545d25adea78221d557cf05b0d41a64954be Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Thu, 30 Jan 2025 10:53:35 -0800 Subject: [PATCH] running isort again --- olmocr/data/convertsilver_openai.py | 1 - olmocr/eval/runeval.py | 2 +- olmocr/prompts/__init__.py | 8 +++++++- olmocr/train/dataloader.py | 1 - olmocr/train/utils.py | 12 ++++++------ 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/olmocr/data/convertsilver_openai.py b/olmocr/data/convertsilver_openai.py index 87322b6..ab89d64 100644 --- a/olmocr/data/convertsilver_openai.py +++ b/olmocr/data/convertsilver_openai.py @@ -11,7 +11,6 @@ import smart_open from cached_path import cached_path - def setup_logging(): """Configure logging for the script.""" logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", handlers=[logging.StreamHandler(sys.stdout)]) diff --git a/olmocr/eval/runeval.py b/olmocr/eval/runeval.py index ecf6f44..994cf09 100644 --- a/olmocr/eval/runeval.py +++ b/olmocr/eval/runeval.py @@ -13,7 +13,7 @@ import sys from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed from dataclasses import dataclass from pathlib import Path -from typing import Dict, Optional, List +from typing import Dict, List, Optional import boto3 import zstandard diff --git a/olmocr/prompts/__init__.py b/olmocr/prompts/__init__.py index fbab393..6feaa65 100644 --- a/olmocr/prompts/__init__.py +++ b/olmocr/prompts/__init__.py @@ -1 +1,7 @@ -from .prompts import build_openai_silver_data_prompt, PageResponse, openai_response_format_schema, build_finetuning_prompt, extract_raw_text \ No newline at end of file +from .prompts import ( + PageResponse, + build_finetuning_prompt, + build_openai_silver_data_prompt, + extract_raw_text, + openai_response_format_schema, +) diff --git a/olmocr/train/dataloader.py b/olmocr/train/dataloader.py index 80d465d..3dde420 100644 --- a/olmocr/train/dataloader.py +++ b/olmocr/train/dataloader.py @@ -15,7 +15,6 @@ from olmocr.data.renderpdf import get_pdf_media_box_width_height from olmocr.prompts.anchor import get_anchor_text from olmocr.s3_utils import parse_custom_id, parse_s3_path - # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) diff --git a/olmocr/train/utils.py b/olmocr/train/utils.py index b2ef15d..ab083cd 100644 --- a/olmocr/train/utils.py +++ b/olmocr/train/utils.py @@ -17,18 +17,18 @@ from accelerate.utils import PrecisionType from datasets import Dataset, DatasetDict, concatenate_datasets from transformers import AutoProcessor -from .core.cli import to_native_types -from .core.config import AwsConfig, DataConfig, SourceConfig, TrainConfig, WandbConfig -from .core.loggers import get_logger -from .core.paths import copy_dir, is_local -from .core.state import BeakerState - from olmocr.train.dataloader import build_finetuning_dataset from olmocr.train.dataprep import ( batch_prepare_data_for_molmo_training, batch_prepare_data_for_qwen2_training, ) +from .core.cli import to_native_types +from .core.config import AwsConfig, DataConfig, SourceConfig, TrainConfig, WandbConfig +from .core.loggers import get_logger +from .core.paths import copy_dir, is_local +from .core.state import BeakerState + T = TypeVar("T") def accelerator_to_dtype(accelerator: Accelerator) -> torch.dtype: