mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-31 20:36:21 +00:00
running isort again
This commit is contained in:
parent
2c2953329e
commit
c37e545d25
@ -11,7 +11,6 @@ import smart_open
|
|||||||
from cached_path import cached_path
|
from cached_path import cached_path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
"""Configure logging for the script."""
|
"""Configure logging for the script."""
|
||||||
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
|
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s: %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
|
||||||
|
@ -13,7 +13,7 @@ import sys
|
|||||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Optional, List
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
import zstandard
|
import zstandard
|
||||||
|
@ -1 +1,7 @@
|
|||||||
from .prompts import build_openai_silver_data_prompt, PageResponse, openai_response_format_schema, build_finetuning_prompt, extract_raw_text
|
from .prompts import (
|
||||||
|
PageResponse,
|
||||||
|
build_finetuning_prompt,
|
||||||
|
build_openai_silver_data_prompt,
|
||||||
|
extract_raw_text,
|
||||||
|
openai_response_format_schema,
|
||||||
|
)
|
||||||
|
@ -15,7 +15,6 @@ from olmocr.data.renderpdf import get_pdf_media_box_width_height
|
|||||||
from olmocr.prompts.anchor import get_anchor_text
|
from olmocr.prompts.anchor import get_anchor_text
|
||||||
from olmocr.s3_utils import parse_custom_id, parse_s3_path
|
from olmocr.s3_utils import parse_custom_id, parse_s3_path
|
||||||
|
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -17,18 +17,18 @@ from accelerate.utils import PrecisionType
|
|||||||
from datasets import Dataset, DatasetDict, concatenate_datasets
|
from datasets import Dataset, DatasetDict, concatenate_datasets
|
||||||
from transformers import AutoProcessor
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
from .core.cli import to_native_types
|
|
||||||
from .core.config import AwsConfig, DataConfig, SourceConfig, TrainConfig, WandbConfig
|
|
||||||
from .core.loggers import get_logger
|
|
||||||
from .core.paths import copy_dir, is_local
|
|
||||||
from .core.state import BeakerState
|
|
||||||
|
|
||||||
from olmocr.train.dataloader import build_finetuning_dataset
|
from olmocr.train.dataloader import build_finetuning_dataset
|
||||||
from olmocr.train.dataprep import (
|
from olmocr.train.dataprep import (
|
||||||
batch_prepare_data_for_molmo_training,
|
batch_prepare_data_for_molmo_training,
|
||||||
batch_prepare_data_for_qwen2_training,
|
batch_prepare_data_for_qwen2_training,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .core.cli import to_native_types
|
||||||
|
from .core.config import AwsConfig, DataConfig, SourceConfig, TrainConfig, WandbConfig
|
||||||
|
from .core.loggers import get_logger
|
||||||
|
from .core.paths import copy_dir, is_local
|
||||||
|
from .core.state import BeakerState
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
def accelerator_to_dtype(accelerator: Accelerator) -> torch.dtype:
|
def accelerator_to_dtype(accelerator: Accelerator) -> torch.dtype:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user