mirror of
https://github.com/microsoft/graphrag.git
synced 2025-12-04 02:49:57 +00:00
cleanup logging factory
This commit is contained in:
parent
bd97255838
commit
601bc4a3c9
@ -19,7 +19,7 @@ from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.index.create_pipeline_config import create_pipeline_config
|
||||
from graphrag.index.run import run_pipeline_with_config
|
||||
from graphrag.index.typing import PipelineRunResult
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.vector_stores.factory import VectorStoreType
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ async def build_index(
|
||||
is_resume_run: bool = False,
|
||||
memory_profile: bool = False,
|
||||
callbacks: list[WorkflowCallbacks] | None = None,
|
||||
progress_reporter: ProgressReporter | None = None,
|
||||
progress_reporter: ProgressLogger | None = None,
|
||||
) -> list[PipelineRunResult]:
|
||||
"""Run the pipeline with the given configuration.
|
||||
|
||||
@ -45,7 +45,7 @@ async def build_index(
|
||||
Whether to enable memory profiling.
|
||||
callbacks : list[WorkflowCallbacks] | None default=None
|
||||
A list of callbacks to register.
|
||||
progress_reporter : ProgressReporter | None default=None
|
||||
progress_reporter : ProgressLogger | None default=None
|
||||
The progress reporter.
|
||||
|
||||
Returns
|
||||
|
||||
@ -16,7 +16,7 @@ from pydantic import PositiveInt, validate_call
|
||||
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.index.llm.load_llm import load_llm
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.logger.print_progress import PrintProgressLogger
|
||||
from graphrag.prompt_tune.defaults import MAX_TOKEN_COUNT
|
||||
from graphrag.prompt_tune.generator.community_report_rating import (
|
||||
generate_community_report_rating,
|
||||
@ -80,7 +80,7 @@ async def generate_indexing_prompts(
|
||||
-------
|
||||
tuple[str, str, str]: entity extraction prompt, entity summarization prompt, community summarization prompt
|
||||
"""
|
||||
reporter = PrintProgressReporter("")
|
||||
reporter = PrintProgressLogger("")
|
||||
|
||||
# Retrieve documents
|
||||
doc_list = await load_docs_in_chunks(
|
||||
|
||||
@ -29,7 +29,7 @@ from graphrag.index.config.embeddings import (
|
||||
community_full_content_embedding,
|
||||
entity_description_embedding,
|
||||
)
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.logger.print_progress import PrintProgressLogger
|
||||
from graphrag.query.factory import (
|
||||
get_drift_search_engine,
|
||||
get_global_search_engine,
|
||||
@ -52,7 +52,7 @@ from graphrag.vector_stores.factory import VectorStoreFactory, VectorStoreType
|
||||
if TYPE_CHECKING:
|
||||
from graphrag.query.structured_search.base import SearchResult
|
||||
|
||||
reporter = PrintProgressReporter("")
|
||||
reporter = PrintProgressLogger("")
|
||||
|
||||
|
||||
@validate_call(config={"arbitrary_types_allowed": True})
|
||||
|
||||
@ -7,16 +7,16 @@ from typing import Any
|
||||
|
||||
from datashaper import ExecutionNode, NoopWorkflowCallbacks, Progress, TableContainer
|
||||
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
|
||||
|
||||
class ProgressWorkflowCallbacks(NoopWorkflowCallbacks):
|
||||
"""A callbackmanager that delegates to a ProgressReporter."""
|
||||
"""A callbackmanager that delegates to a ProgressLogger."""
|
||||
|
||||
_root_progress: ProgressReporter
|
||||
_progress_stack: list[ProgressReporter]
|
||||
_root_progress: ProgressLogger
|
||||
_progress_stack: list[ProgressLogger]
|
||||
|
||||
def __init__(self, progress: ProgressReporter) -> None:
|
||||
def __init__(self, progress: ProgressLogger) -> None:
|
||||
"""Create a new ProgressWorkflowCallbacks."""
|
||||
self._progress = progress
|
||||
self._progress_stack = [progress]
|
||||
@ -28,7 +28,7 @@ class ProgressWorkflowCallbacks(NoopWorkflowCallbacks):
|
||||
self._progress_stack.append(self._latest.child(name))
|
||||
|
||||
@property
|
||||
def _latest(self) -> ProgressReporter:
|
||||
def _latest(self) -> ProgressLogger:
|
||||
return self._progress_stack[-1]
|
||||
|
||||
def on_workflow_start(self, name: str, instance: object) -> None:
|
||||
|
||||
@ -16,9 +16,8 @@ from graphrag.config.load_config import load_config
|
||||
from graphrag.config.logging import enable_logging_with_config
|
||||
from graphrag.config.resolve_path import resolve_paths
|
||||
from graphrag.index.validate_config import validate_config_names
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logging.factory import create_progress_reporter
|
||||
from graphrag.logging.types import ReporterType
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.logger.factory import LoggerFactory, LoggerType
|
||||
from graphrag.utils.cli import redact
|
||||
|
||||
# Ignore warnings from numba
|
||||
@ -27,7 +26,7 @@ warnings.filterwarnings("ignore", message=".*NumbaDeprecationWarning.*")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _logger(reporter: ProgressReporter):
|
||||
def _logger(reporter: ProgressLogger):
|
||||
def info(msg: str, verbose: bool = False):
|
||||
log.info(msg)
|
||||
if verbose:
|
||||
@ -46,7 +45,7 @@ def _logger(reporter: ProgressReporter):
|
||||
return info, error, success
|
||||
|
||||
|
||||
def _register_signal_handlers(reporter: ProgressReporter):
|
||||
def _register_signal_handlers(reporter: ProgressLogger):
|
||||
import signal
|
||||
|
||||
def handle_signal(signum, _):
|
||||
@ -70,7 +69,7 @@ def index_cli(
|
||||
resume: str | None,
|
||||
memprofile: bool,
|
||||
cache: bool,
|
||||
reporter: ReporterType,
|
||||
reporter: LoggerType,
|
||||
config_filepath: Path | None,
|
||||
dry_run: bool,
|
||||
skip_validation: bool,
|
||||
@ -97,7 +96,7 @@ def update_cli(
|
||||
verbose: bool,
|
||||
memprofile: bool,
|
||||
cache: bool,
|
||||
reporter: ReporterType,
|
||||
reporter: LoggerType,
|
||||
config_filepath: Path | None,
|
||||
skip_validation: bool,
|
||||
output_dir: Path | None,
|
||||
@ -139,7 +138,7 @@ def _run_index(
|
||||
skip_validation,
|
||||
output_dir,
|
||||
):
|
||||
progress_reporter = create_progress_reporter(reporter)
|
||||
progress_reporter = LoggerFactory.create_logger(reporter)
|
||||
info, error, success = _logger(progress_reporter)
|
||||
run_id = resume or time.strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
|
||||
@ -6,8 +6,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
from graphrag.config.init_content import INIT_DOTENV, INIT_YAML
|
||||
from graphrag.logging.factory import create_progress_reporter
|
||||
from graphrag.logging.types import ReporterType
|
||||
from graphrag.logger.factory import LoggerFactory, LoggerType
|
||||
from graphrag.prompts.index.claim_extraction import CLAIM_EXTRACTION_PROMPT
|
||||
from graphrag.prompts.index.community_report import (
|
||||
COMMUNITY_REPORT_PROMPT,
|
||||
@ -28,7 +27,7 @@ from graphrag.prompts.query.question_gen_system_prompt import QUESTION_SYSTEM_PR
|
||||
|
||||
def initialize_project_at(path: Path) -> None:
|
||||
"""Initialize the project at the given path."""
|
||||
progress_reporter = create_progress_reporter(ReporterType.RICH)
|
||||
progress_reporter = LoggerFactory.create_logger(LoggerType.RICH)
|
||||
progress_reporter.info(f"Initializing project at {path}")
|
||||
root = Path(path)
|
||||
if not root.exists():
|
||||
|
||||
@ -12,7 +12,7 @@ from typing import Annotated
|
||||
|
||||
import typer
|
||||
|
||||
from graphrag.logging.types import ReporterType
|
||||
from graphrag.logger.types import LoggerType
|
||||
from graphrag.prompt_tune.defaults import (
|
||||
MAX_TOKEN_COUNT,
|
||||
MIN_CHUNK_SIZE,
|
||||
@ -146,8 +146,8 @@ def _index_cli(
|
||||
str | None, typer.Option(help="Resume a given indexing run")
|
||||
] = None,
|
||||
reporter: Annotated[
|
||||
ReporterType, typer.Option(help="The progress reporter to use.")
|
||||
] = ReporterType.RICH,
|
||||
LoggerType, typer.Option(help="The progress reporter to use.")
|
||||
] = LoggerType.RICH,
|
||||
dry_run: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
@ -180,7 +180,7 @@ def _index_cli(
|
||||
resume=resume,
|
||||
memprofile=memprofile,
|
||||
cache=cache,
|
||||
reporter=ReporterType(reporter),
|
||||
reporter=LoggerType(reporter),
|
||||
config_filepath=config,
|
||||
dry_run=dry_run,
|
||||
skip_validation=skip_validation,
|
||||
@ -213,8 +213,8 @@ def _update_cli(
|
||||
bool, typer.Option(help="Run the indexing pipeline with memory profiling")
|
||||
] = False,
|
||||
reporter: Annotated[
|
||||
ReporterType, typer.Option(help="The progress reporter to use.")
|
||||
] = ReporterType.RICH,
|
||||
LoggerType, typer.Option(help="The progress reporter to use.")
|
||||
] = LoggerType.RICH,
|
||||
cache: Annotated[bool, typer.Option(help="Use LLM cache.")] = True,
|
||||
skip_validation: Annotated[
|
||||
bool,
|
||||
@ -244,7 +244,7 @@ def _update_cli(
|
||||
verbose=verbose,
|
||||
memprofile=memprofile,
|
||||
cache=cache,
|
||||
reporter=ReporterType(reporter),
|
||||
reporter=LoggerType(reporter),
|
||||
config_filepath=config,
|
||||
skip_validation=skip_validation,
|
||||
output_dir=output,
|
||||
|
||||
@ -7,7 +7,7 @@ from pathlib import Path
|
||||
|
||||
import graphrag.api as api
|
||||
from graphrag.config.load_config import load_config
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.logger.print_progress import PrintProgressLogger
|
||||
from graphrag.prompt_tune.generator.community_report_summarization import (
|
||||
COMMUNITY_SUMMARIZATION_FILENAME,
|
||||
)
|
||||
@ -52,7 +52,7 @@ async def prompt_tune(
|
||||
- k: The number of documents to select when using auto selection method.
|
||||
- min_examples_required: The minimum number of examples required for entity extraction prompts.
|
||||
"""
|
||||
reporter = PrintProgressReporter("")
|
||||
reporter = PrintProgressLogger("")
|
||||
root_path = Path(root).resolve()
|
||||
graph_config = load_config(root_path, config)
|
||||
|
||||
|
||||
@ -14,11 +14,11 @@ from graphrag.config.load_config import load_config
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.config.resolve_path import resolve_paths
|
||||
from graphrag.index.create_pipeline_config import create_pipeline_config
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.logger.print_progress import PrintProgressLogger
|
||||
from graphrag.storage.factory import StorageFactory
|
||||
from graphrag.utils.storage import _load_table_from_storage
|
||||
|
||||
reporter = PrintProgressReporter("")
|
||||
reporter = PrintProgressLogger("")
|
||||
|
||||
|
||||
def run_global_search(
|
||||
|
||||
@ -12,7 +12,7 @@ import pandas as pd
|
||||
|
||||
from graphrag.index.config.input import PipelineCSVInputConfig, PipelineInputConfig
|
||||
from graphrag.index.utils.hashing import gen_sha512_hash
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -24,7 +24,7 @@ input_type = "csv"
|
||||
|
||||
async def load(
|
||||
config: PipelineInputConfig,
|
||||
progress: ProgressReporter | None,
|
||||
progress: ProgressLogger | None,
|
||||
storage: PipelineStorage,
|
||||
) -> pd.DataFrame:
|
||||
"""Load csv inputs from a directory."""
|
||||
|
||||
@ -17,8 +17,8 @@ from graphrag.index.input.csv import input_type as csv
|
||||
from graphrag.index.input.csv import load as load_csv
|
||||
from graphrag.index.input.text import input_type as text
|
||||
from graphrag.index.input.text import load as load_text
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logging.null_progress import NullProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.logger.null_progress import NullProgressLogger
|
||||
from graphrag.storage.blob_pipeline_storage import BlobPipelineStorage
|
||||
from graphrag.storage.file_pipeline_storage import FilePipelineStorage
|
||||
|
||||
@ -31,13 +31,13 @@ loaders: dict[str, Callable[..., Awaitable[pd.DataFrame]]] = {
|
||||
|
||||
async def create_input(
|
||||
config: PipelineInputConfig | InputConfig,
|
||||
progress_reporter: ProgressReporter | None = None,
|
||||
progress_reporter: ProgressLogger | None = None,
|
||||
root_dir: str | None = None,
|
||||
) -> pd.DataFrame:
|
||||
"""Instantiate input data for a pipeline."""
|
||||
root_dir = root_dir or ""
|
||||
log.info("loading input from root_dir=%s", config.base_dir)
|
||||
progress_reporter = progress_reporter or NullProgressReporter()
|
||||
progress_reporter = progress_reporter or NullProgressLogger()
|
||||
|
||||
if config is None:
|
||||
msg = "No input specified!"
|
||||
|
||||
@ -12,7 +12,7 @@ import pandas as pd
|
||||
|
||||
from graphrag.index.config.input import PipelineInputConfig
|
||||
from graphrag.index.utils.hashing import gen_sha512_hash
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
DEFAULT_FILE_PATTERN = re.compile(
|
||||
@ -24,7 +24,7 @@ log = logging.getLogger(__name__)
|
||||
|
||||
async def load(
|
||||
config: PipelineInputConfig,
|
||||
progress: ProgressReporter | None,
|
||||
progress: ProgressLogger | None,
|
||||
storage: PipelineStorage,
|
||||
) -> pd.DataFrame:
|
||||
"""Load text inputs from a directory."""
|
||||
|
||||
@ -48,8 +48,8 @@ from graphrag.index.workflows import (
|
||||
WorkflowDefinitions,
|
||||
load_workflows,
|
||||
)
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logging.null_progress import NullProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.logger.null_progress import NullProgressLogger
|
||||
from graphrag.storage.factory import StorageFactory
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
@ -64,7 +64,7 @@ async def run_pipeline_with_config(
|
||||
update_index_storage: PipelineStorage | None = None,
|
||||
cache: PipelineCache | None = None,
|
||||
callbacks: list[WorkflowCallbacks] | None = None,
|
||||
progress_reporter: ProgressReporter | None = None,
|
||||
progress_reporter: ProgressLogger | None = None,
|
||||
input_post_process_steps: list[PipelineWorkflowStep] | None = None,
|
||||
additional_verbs: VerbDefinitions | None = None,
|
||||
additional_workflows: WorkflowDefinitions | None = None,
|
||||
@ -99,7 +99,7 @@ async def run_pipeline_with_config(
|
||||
config = _apply_substitutions(config, run_id)
|
||||
root_dir = config.root_dir or ""
|
||||
|
||||
progress_reporter = progress_reporter or NullProgressReporter()
|
||||
progress_reporter = progress_reporter or NullProgressLogger()
|
||||
storage_config = config.storage.model_dump() # type: ignore
|
||||
storage = storage or StorageFactory.create_storage(
|
||||
storage_type=storage_config["type"], # type: ignore
|
||||
@ -197,7 +197,7 @@ async def run_pipeline(
|
||||
storage: PipelineStorage | None = None,
|
||||
cache: PipelineCache | None = None,
|
||||
callbacks: list[WorkflowCallbacks] | None = None,
|
||||
progress_reporter: ProgressReporter | None = None,
|
||||
progress_reporter: ProgressLogger | None = None,
|
||||
input_post_process_steps: list[PipelineWorkflowStep] | None = None,
|
||||
additional_verbs: VerbDefinitions | None = None,
|
||||
additional_workflows: WorkflowDefinitions | None = None,
|
||||
@ -226,7 +226,7 @@ async def run_pipeline(
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
progress_reporter = progress_reporter or NullProgressReporter()
|
||||
progress_reporter = progress_reporter or NullProgressLogger()
|
||||
callbacks = callbacks or [ConsoleWorkflowCallbacks()]
|
||||
callback_chain = _create_callback_chain(callbacks, progress_reporter)
|
||||
context = create_run_context(storage=storage, cache=cache, stats=None)
|
||||
|
||||
@ -21,7 +21,7 @@ from graphrag.index.context import PipelineRunContext
|
||||
from graphrag.index.exporter import ParquetExporter
|
||||
from graphrag.index.run.profiling import _write_workflow_stats
|
||||
from graphrag.index.typing import PipelineRunResult
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.utils.storage import _load_table_from_storage
|
||||
|
||||
@ -68,7 +68,7 @@ async def _export_workflow_output(
|
||||
|
||||
|
||||
def _create_callback_chain(
|
||||
callbacks: list[WorkflowCallbacks] | None, progress: ProgressReporter | None
|
||||
callbacks: list[WorkflowCallbacks] | None, progress: ProgressLogger | None
|
||||
) -> WorkflowCallbacks:
|
||||
"""Create a callback manager that encompasses multiple callbacks."""
|
||||
manager = WorkflowCallbacksManager()
|
||||
|
||||
@ -23,7 +23,7 @@ from graphrag.index.update.entities import (
|
||||
_run_entity_summarization,
|
||||
)
|
||||
from graphrag.index.update.relationships import _update_and_merge_relationships
|
||||
from graphrag.logging.print_progress import ProgressReporter
|
||||
from graphrag.logger.print_progress import ProgressLogger
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
from graphrag.utils.storage import _load_table_from_storage
|
||||
|
||||
@ -85,7 +85,7 @@ async def update_dataframe_outputs(
|
||||
config: PipelineConfig,
|
||||
cache: PipelineCache,
|
||||
callbacks: VerbCallbacks,
|
||||
progress_reporter: ProgressReporter,
|
||||
progress_reporter: ProgressLogger,
|
||||
) -> None:
|
||||
"""Update the mergeable outputs.
|
||||
|
||||
|
||||
@ -10,12 +10,10 @@ from datashaper import NoopVerbCallbacks
|
||||
|
||||
from graphrag.config.models.graph_rag_config import GraphRagConfig
|
||||
from graphrag.index.llm.load_llm import load_llm, load_llm_embeddings
|
||||
from graphrag.logging.print_progress import ProgressReporter
|
||||
from graphrag.logger.print_progress import ProgressLogger
|
||||
|
||||
|
||||
def validate_config_names(
|
||||
reporter: ProgressReporter, parameters: GraphRagConfig
|
||||
) -> None:
|
||||
def validate_config_names(reporter: ProgressLogger, parameters: GraphRagConfig) -> None:
|
||||
"""Validate config file for LLM deployment name typos."""
|
||||
# Validate Chat LLM configs
|
||||
llm = load_llm(
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Logging utilities and implementations."""
|
||||
"""Logger utilities and implementations."""
|
||||
@ -10,24 +10,24 @@ from datashaper.progress.types import Progress
|
||||
|
||||
|
||||
class StatusLogger(ABC):
|
||||
"""Provides a way to report status updates from the pipeline."""
|
||||
"""Provides a way to log status updates from the pipeline."""
|
||||
|
||||
@abstractmethod
|
||||
def error(self, message: str, details: dict[str, Any] | None = None):
|
||||
"""Report an error."""
|
||||
"""Log an error."""
|
||||
|
||||
@abstractmethod
|
||||
def warning(self, message: str, details: dict[str, Any] | None = None):
|
||||
"""Report a warning."""
|
||||
"""Log a warning."""
|
||||
|
||||
@abstractmethod
|
||||
def log(self, message: str, details: dict[str, Any] | None = None):
|
||||
"""Report a log."""
|
||||
|
||||
|
||||
class ProgressReporter(ABC):
|
||||
class ProgressLogger(ABC):
|
||||
"""
|
||||
Abstract base class for progress reporters.
|
||||
Abstract base class for progress loggers.
|
||||
|
||||
This is used to report workflow processing progress via mechanisms like progress-bars.
|
||||
"""
|
||||
@ -38,10 +38,10 @@ class ProgressReporter(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def dispose(self):
|
||||
"""Dispose of the progress reporter."""
|
||||
"""Dispose of the progress logger."""
|
||||
|
||||
@abstractmethod
|
||||
def child(self, prefix: str, transient=True) -> "ProgressReporter":
|
||||
def child(self, prefix: str, transient=True) -> "ProgressLogger":
|
||||
"""Create a child progress bar."""
|
||||
|
||||
@abstractmethod
|
||||
@ -50,20 +50,20 @@ class ProgressReporter(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def stop(self) -> None:
|
||||
"""Stop the progress reporter."""
|
||||
"""Stop the progress logger."""
|
||||
|
||||
@abstractmethod
|
||||
def error(self, message: str) -> None:
|
||||
"""Report an error."""
|
||||
"""Log an error."""
|
||||
|
||||
@abstractmethod
|
||||
def warning(self, message: str) -> None:
|
||||
"""Report a warning."""
|
||||
"""Log a warning."""
|
||||
|
||||
@abstractmethod
|
||||
def info(self, message: str) -> None:
|
||||
"""Report information."""
|
||||
"""Log information."""
|
||||
|
||||
@abstractmethod
|
||||
def success(self, message: str) -> None:
|
||||
"""Report success."""
|
||||
"""Log success."""
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
from graphrag.logging.base import StatusLogger
|
||||
from graphrag.logger.base import StatusLogger
|
||||
|
||||
|
||||
class ConsoleReporter(StatusLogger):
|
||||
43
graphrag/logger/factory.py
Normal file
43
graphrag/logger/factory.py
Normal file
@ -0,0 +1,43 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Factory functions for creating loggers."""
|
||||
|
||||
from typing import ClassVar
|
||||
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.logger.null_progress import NullProgressLogger
|
||||
from graphrag.logger.print_progress import PrintProgressLogger
|
||||
from graphrag.logger.rich_progress import RichProgressLogger
|
||||
from graphrag.logger.types import LoggerType
|
||||
|
||||
|
||||
class LoggerFactory:
|
||||
"""A factory class for loggers."""
|
||||
|
||||
logger_types: ClassVar[dict[str, type]] = {}
|
||||
|
||||
@classmethod
|
||||
def register(cls, logger_type: str, logger: type):
|
||||
"""Register a vector store type."""
|
||||
cls.logger_types[logger_type] = logger
|
||||
|
||||
@classmethod
|
||||
def create_logger(
|
||||
cls, logger_type: LoggerType | str, kwargs: dict | None = None
|
||||
) -> ProgressLogger:
|
||||
"""Create a logger based on the provided type."""
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
match logger_type:
|
||||
case LoggerType.RICH:
|
||||
return RichProgressLogger("GraphRAG Indexer ")
|
||||
case LoggerType.PRINT:
|
||||
return PrintProgressLogger("GraphRAG Indexer ")
|
||||
case LoggerType.NONE:
|
||||
return NullProgressLogger()
|
||||
case _:
|
||||
if logger_type in cls.logger_types:
|
||||
return cls.logger_types[logger_type](**kwargs)
|
||||
# default to null logger if no other logger is found
|
||||
return NullProgressLogger()
|
||||
@ -3,19 +3,19 @@
|
||||
|
||||
"""Null Progress Reporter."""
|
||||
|
||||
from graphrag.logging.base import Progress, ProgressReporter
|
||||
from graphrag.logger.base import Progress, ProgressLogger
|
||||
|
||||
|
||||
class NullProgressReporter(ProgressReporter):
|
||||
"""A progress reporter that does nothing."""
|
||||
class NullProgressLogger(ProgressLogger):
|
||||
"""A progress logger that does nothing."""
|
||||
|
||||
def __call__(self, update: Progress) -> None:
|
||||
"""Update progress."""
|
||||
|
||||
def dispose(self) -> None:
|
||||
"""Dispose of the progress reporter."""
|
||||
"""Dispose of the progress logger."""
|
||||
|
||||
def child(self, prefix: str, transient: bool = True) -> ProgressReporter:
|
||||
def child(self, prefix: str, transient: bool = True) -> ProgressLogger:
|
||||
"""Create a child progress bar."""
|
||||
return self
|
||||
|
||||
@ -23,16 +23,16 @@ class NullProgressReporter(ProgressReporter):
|
||||
"""Force a refresh."""
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the progress reporter."""
|
||||
"""Stop the progress logger."""
|
||||
|
||||
def error(self, message: str) -> None:
|
||||
"""Report an error."""
|
||||
"""Log an error."""
|
||||
|
||||
def warning(self, message: str) -> None:
|
||||
"""Report a warning."""
|
||||
"""Log a warning."""
|
||||
|
||||
def info(self, message: str) -> None:
|
||||
"""Report information."""
|
||||
"""Log information."""
|
||||
|
||||
def success(self, message: str) -> None:
|
||||
"""Report success."""
|
||||
"""Log success."""
|
||||
@ -1,18 +1,18 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Print Progress Reporter."""
|
||||
"""Print Progress Logger."""
|
||||
|
||||
from graphrag.logging.base import Progress, ProgressReporter
|
||||
from graphrag.logger.base import Progress, ProgressLogger
|
||||
|
||||
|
||||
class PrintProgressReporter(ProgressReporter):
|
||||
"""A progress reporter that does nothing."""
|
||||
class PrintProgressLogger(ProgressLogger):
|
||||
"""A progress logger that prints progress to stdout."""
|
||||
|
||||
prefix: str
|
||||
|
||||
def __init__(self, prefix: str):
|
||||
"""Create a new progress reporter."""
|
||||
"""Create a new progress logger."""
|
||||
self.prefix = prefix
|
||||
print(f"\n{self.prefix}", end="") # noqa T201
|
||||
|
||||
@ -21,30 +21,30 @@ class PrintProgressReporter(ProgressReporter):
|
||||
print(".", end="") # noqa T201
|
||||
|
||||
def dispose(self) -> None:
|
||||
"""Dispose of the progress reporter."""
|
||||
"""Dispose of the progress logger."""
|
||||
|
||||
def child(self, prefix: str, transient: bool = True) -> "ProgressReporter":
|
||||
def child(self, prefix: str, transient: bool = True) -> ProgressLogger:
|
||||
"""Create a child progress bar."""
|
||||
return PrintProgressReporter(prefix)
|
||||
return PrintProgressLogger(prefix)
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the progress reporter."""
|
||||
"""Stop the progress logger."""
|
||||
|
||||
def force_refresh(self) -> None:
|
||||
"""Force a refresh."""
|
||||
|
||||
def error(self, message: str) -> None:
|
||||
"""Report an error."""
|
||||
"""Log an error."""
|
||||
print(f"\n{self.prefix}ERROR: {message}") # noqa T201
|
||||
|
||||
def warning(self, message: str) -> None:
|
||||
"""Report a warning."""
|
||||
"""Log a warning."""
|
||||
print(f"\n{self.prefix}WARNING: {message}") # noqa T201
|
||||
|
||||
def info(self, message: str) -> None:
|
||||
"""Report information."""
|
||||
"""Log information."""
|
||||
print(f"\n{self.prefix}INFO: {message}") # noqa T201
|
||||
|
||||
def success(self, message: str) -> None:
|
||||
"""Report success."""
|
||||
"""Log success."""
|
||||
print(f"\n{self.prefix}SUCCESS: {message}") # noqa T201
|
||||
@ -13,12 +13,12 @@ from rich.progress import Progress, TaskID, TimeElapsedColumn
|
||||
from rich.spinner import Spinner
|
||||
from rich.tree import Tree
|
||||
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
|
||||
|
||||
# https://stackoverflow.com/a/34325723
|
||||
class RichProgressReporter(ProgressReporter):
|
||||
"""A rich-based progress reporter for CLI use."""
|
||||
class RichProgressLogger(ProgressLogger):
|
||||
"""A rich-based progress logger for CLI use."""
|
||||
|
||||
_console: Console
|
||||
_group: Group
|
||||
@ -32,7 +32,7 @@ class RichProgressReporter(ProgressReporter):
|
||||
_last_refresh: float = 0
|
||||
|
||||
def dispose(self) -> None:
|
||||
"""Dispose of the progress reporter."""
|
||||
"""Dispose of the progress logger."""
|
||||
self._disposing = True
|
||||
self._live.stop()
|
||||
|
||||
@ -59,10 +59,10 @@ class RichProgressReporter(ProgressReporter):
|
||||
def __init__(
|
||||
self,
|
||||
prefix: str,
|
||||
parent: "RichProgressReporter | None" = None,
|
||||
parent: "RichProgressLogger | None" = None,
|
||||
transient: bool = True,
|
||||
) -> None:
|
||||
"""Create a new rich-based progress reporter."""
|
||||
"""Create a new rich-based progress logger."""
|
||||
self._prefix = prefix
|
||||
|
||||
if parent is None:
|
||||
@ -115,27 +115,27 @@ class RichProgressReporter(ProgressReporter):
|
||||
self.live.refresh()
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the progress reporter."""
|
||||
"""Stop the progress logger."""
|
||||
self._live.stop()
|
||||
|
||||
def child(self, prefix: str, transient: bool = True) -> ProgressReporter:
|
||||
def child(self, prefix: str, transient: bool = True) -> ProgressLogger:
|
||||
"""Create a child progress bar."""
|
||||
return RichProgressReporter(parent=self, prefix=prefix, transient=transient)
|
||||
return RichProgressLogger(parent=self, prefix=prefix, transient=transient)
|
||||
|
||||
def error(self, message: str) -> None:
|
||||
"""Report an error."""
|
||||
"""Log an error."""
|
||||
self._console.print(f"❌ [red]{message}[/red]")
|
||||
|
||||
def warning(self, message: str) -> None:
|
||||
"""Report a warning."""
|
||||
"""Log a warning."""
|
||||
self._console.print(f"⚠️ [yellow]{message}[/yellow]")
|
||||
|
||||
def success(self, message: str) -> None:
|
||||
"""Report success."""
|
||||
"""Log success."""
|
||||
self._console.print(f"🚀 [green]{message}[/green]")
|
||||
|
||||
def info(self, message: str) -> None:
|
||||
"""Report information."""
|
||||
"""Log information."""
|
||||
self._console.print(message)
|
||||
|
||||
def __call__(self, progress_update: DSProgress) -> None:
|
||||
@ -1,18 +1,18 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Types for status reporting."""
|
||||
"""Logging types."""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ReporterType(str, Enum):
|
||||
"""The type of reporter to use."""
|
||||
class LoggerType(str, Enum):
|
||||
"""The type of logger to use."""
|
||||
|
||||
RICH = "rich"
|
||||
PRINT = "print"
|
||||
NONE = "none"
|
||||
|
||||
def __str__(self):
|
||||
"""Return the string representation of the enum value."""
|
||||
"""Return a string representation of the enum value."""
|
||||
return self.value
|
||||
@ -1,36 +0,0 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Factory functions for creating loggers."""
|
||||
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logging.null_progress import NullProgressReporter
|
||||
from graphrag.logging.print_progress import PrintProgressReporter
|
||||
from graphrag.logging.rich_progress import RichProgressReporter
|
||||
from graphrag.logging.types import ReporterType
|
||||
|
||||
|
||||
def create_progress_reporter(
|
||||
reporter_type: ReporterType = ReporterType.NONE,
|
||||
) -> ProgressReporter:
|
||||
"""Load a progress reporter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
reporter_type : {"rich", "print", "none"}, default=rich
|
||||
The type of progress reporter to load.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ProgressReporter
|
||||
"""
|
||||
match reporter_type:
|
||||
case ReporterType.RICH:
|
||||
return RichProgressReporter("GraphRAG Indexer ")
|
||||
case ReporterType.PRINT:
|
||||
return PrintProgressReporter("GraphRAG Indexer ")
|
||||
case ReporterType.NONE:
|
||||
return NullProgressReporter()
|
||||
case _:
|
||||
msg = f"Invalid progress reporter type: {reporter_type}"
|
||||
raise ValueError(msg)
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""GraphRAG knowledge model package root."""
|
||||
"""Knowledge model package."""
|
||||
|
||||
@ -15,7 +15,7 @@ from graphrag.config.models.llm_parameters import LLMParameters
|
||||
from graphrag.index.input.factory import create_input
|
||||
from graphrag.index.llm.load_llm import load_llm_embeddings
|
||||
from graphrag.index.operations.chunk_text import chunk_text
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.prompt_tune.defaults import (
|
||||
MIN_CHUNK_OVERLAP,
|
||||
MIN_CHUNK_SIZE,
|
||||
@ -54,7 +54,7 @@ async def load_docs_in_chunks(
|
||||
config: GraphRagConfig,
|
||||
select_method: DocSelectionType,
|
||||
limit: int,
|
||||
reporter: ProgressReporter,
|
||||
reporter: ProgressLogger,
|
||||
chunk_size: int = MIN_CHUNK_SIZE,
|
||||
n_subset_max: int = N_SUBSET_MAX,
|
||||
k: int = K,
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""All prompts for indexing."""
|
||||
"""All prompts for the indexing engine."""
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2024 Microsoft Corporation.
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""All prompts for query."""
|
||||
"""All prompts for the query engine."""
|
||||
|
||||
@ -8,8 +8,8 @@ from collections.abc import Callable
|
||||
|
||||
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
|
||||
|
||||
from graphrag.logging.base import StatusLogger
|
||||
from graphrag.logging.console import ConsoleReporter
|
||||
from graphrag.logger.base import StatusLogger
|
||||
from graphrag.logger.console import ConsoleReporter
|
||||
from graphrag.query.llm.base import BaseTextEmbedding
|
||||
from graphrag.query.llm.oai.typing import OpenaiApiType
|
||||
|
||||
|
||||
@ -15,7 +15,7 @@ from tenacity import (
|
||||
wait_exponential_jitter,
|
||||
)
|
||||
|
||||
from graphrag.logging.base import StatusLogger
|
||||
from graphrag.logger.base import StatusLogger
|
||||
from graphrag.query.llm.base import BaseLLM, BaseLLMCallback
|
||||
from graphrag.query.llm.oai.base import OpenAILLMImpl
|
||||
from graphrag.query.llm.oai.typing import (
|
||||
|
||||
@ -18,7 +18,7 @@ from tenacity import (
|
||||
wait_exponential_jitter,
|
||||
)
|
||||
|
||||
from graphrag.logging.base import StatusLogger
|
||||
from graphrag.logger.base import StatusLogger
|
||||
from graphrag.query.llm.base import BaseTextEmbedding
|
||||
from graphrag.query.llm.oai.base import OpenAILLMImpl
|
||||
from graphrag.query.llm.oai.typing import (
|
||||
|
||||
@ -13,7 +13,7 @@ from azure.identity import DefaultAzureCredential
|
||||
from azure.storage.blob import BlobServiceClient
|
||||
from datashaper import Progress
|
||||
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -95,7 +95,7 @@ class BlobPipelineStorage(PipelineStorage):
|
||||
self,
|
||||
file_pattern: re.Pattern[str],
|
||||
base_dir: str | None = None,
|
||||
progress: ProgressReporter | None = None,
|
||||
progress: ProgressLogger | None = None,
|
||||
file_filter: dict[str, Any] | None = None,
|
||||
max_count=-1,
|
||||
) -> Iterator[tuple[str, dict[str, Any]]]:
|
||||
|
||||
@ -16,7 +16,7 @@ from aiofiles.os import remove
|
||||
from aiofiles.ospath import exists
|
||||
from datashaper import Progress
|
||||
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
from graphrag.storage.pipeline_storage import PipelineStorage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -38,7 +38,7 @@ class FilePipelineStorage(PipelineStorage):
|
||||
self,
|
||||
file_pattern: re.Pattern[str],
|
||||
base_dir: str | None = None,
|
||||
progress: ProgressReporter | None = None,
|
||||
progress: ProgressLogger | None = None,
|
||||
file_filter: dict[str, Any] | None = None,
|
||||
max_count=-1,
|
||||
) -> Iterator[tuple[str, dict[str, Any]]]:
|
||||
|
||||
@ -8,7 +8,7 @@ from abc import ABCMeta, abstractmethod
|
||||
from collections.abc import Iterator
|
||||
from typing import Any
|
||||
|
||||
from graphrag.logging.base import ProgressReporter
|
||||
from graphrag.logger.base import ProgressLogger
|
||||
|
||||
|
||||
class PipelineStorage(metaclass=ABCMeta):
|
||||
@ -19,7 +19,7 @@ class PipelineStorage(metaclass=ABCMeta):
|
||||
self,
|
||||
file_pattern: re.Pattern[str],
|
||||
base_dir: str | None = None,
|
||||
progress: ProgressReporter | None = None,
|
||||
progress: ProgressLogger | None = None,
|
||||
file_filter: dict[str, Any] | None = None,
|
||||
max_count=-1,
|
||||
) -> Iterator[tuple[str, dict[str, Any]]]:
|
||||
|
||||
1576
poetry.lock
generated
1576
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -132,7 +132,6 @@ coverage_report = 'coverage report --omit "**/tests/**" --show-missing'
|
||||
check_format = 'ruff format . --check'
|
||||
fix = "ruff check --fix ."
|
||||
fix_unsafe = "ruff check --fix --unsafe-fixes ."
|
||||
|
||||
_test_all = "coverage run -m pytest ./tests"
|
||||
test_unit = "pytest ./tests/unit"
|
||||
test_integration = "pytest ./tests/integration"
|
||||
@ -146,7 +145,6 @@ query = "python -m graphrag query"
|
||||
prompt_tune = "python -m graphrag prompt-tune"
|
||||
# Pass in a test pattern
|
||||
test_only = "pytest -s -k"
|
||||
|
||||
serve_docs = "mkdocs serve"
|
||||
build_docs = "mkdocs build"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user