running isort again

This commit is contained in:
Jake Poznanski 2025-01-30 10:53:35 -08:00
parent 2c2953329e
commit c37e545d25
5 changed files with 14 additions and 10 deletions

View File

@ -11,7 +11,6 @@ import smart_open
from cached_path import cached_path from cached_path import cached_path
def setup_logging(): def setup_logging():
"""Configure logging for the script.""" """Configure logging for the script."""
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", handlers=[logging.StreamHandler(sys.stdout)]) logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", handlers=[logging.StreamHandler(sys.stdout)])

View File

@ -13,7 +13,7 @@ import sys
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Dict, Optional, List from typing import Dict, List, Optional
import boto3 import boto3
import zstandard import zstandard

View File

@ -1 +1,7 @@
from .prompts import build_openai_silver_data_prompt, PageResponse, openai_response_format_schema, build_finetuning_prompt, extract_raw_text from .prompts import (
PageResponse,
build_finetuning_prompt,
build_openai_silver_data_prompt,
extract_raw_text,
openai_response_format_schema,
)

View File

@ -15,7 +15,6 @@ from olmocr.data.renderpdf import get_pdf_media_box_width_height
from olmocr.prompts.anchor import get_anchor_text from olmocr.prompts.anchor import get_anchor_text
from olmocr.s3_utils import parse_custom_id, parse_s3_path from olmocr.s3_utils import parse_custom_id, parse_s3_path
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -17,18 +17,18 @@ from accelerate.utils import PrecisionType
from datasets import Dataset, DatasetDict, concatenate_datasets from datasets import Dataset, DatasetDict, concatenate_datasets
from transformers import AutoProcessor from transformers import AutoProcessor
from .core.cli import to_native_types
from .core.config import AwsConfig, DataConfig, SourceConfig, TrainConfig, WandbConfig
from .core.loggers import get_logger
from .core.paths import copy_dir, is_local
from .core.state import BeakerState
from olmocr.train.dataloader import build_finetuning_dataset from olmocr.train.dataloader import build_finetuning_dataset
from olmocr.train.dataprep import ( from olmocr.train.dataprep import (
batch_prepare_data_for_molmo_training, batch_prepare_data_for_molmo_training,
batch_prepare_data_for_qwen2_training, batch_prepare_data_for_qwen2_training,
) )
from .core.cli import to_native_types
from .core.config import AwsConfig, DataConfig, SourceConfig, TrainConfig, WandbConfig
from .core.loggers import get_logger
from .core.paths import copy_dir, is_local
from .core.state import BeakerState
T = TypeVar("T") T = TypeVar("T")
def accelerator_to_dtype(accelerator: Accelerator) -> torch.dtype: def accelerator_to_dtype(accelerator: Accelerator) -> torch.dtype: