81 lines
3.3 KiB
Python
Raw Normal View History

2024-06-26 15:45:06 -04:00
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
from pathlib import Path
2024-07-15 16:42:22 -07:00
from typing import List
2024-06-26 15:45:06 -04:00
from datashaper import WorkflowCallbacks, WorkflowCallbacksManager
from graphrag.index.reporting import FileWorkflowCallbacks
2024-12-30 01:59:08 -05:00
from src.api.azure_clients import AzureClientManager
from src.logger.application_insights_workflow_callbacks import (
2024-06-26 15:45:06 -04:00
ApplicationInsightsWorkflowCallbacks,
)
2024-12-30 01:59:08 -05:00
from src.logger.blob_workflow_callbacks import BlobWorkflowCallbacks
from src.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks
from src.logger.typing import Reporters
2024-06-26 15:45:06 -04:00
2024-12-30 01:59:08 -05:00
def load_pipeline_logger(
2024-06-26 15:45:06 -04:00
reporting_dir: str | None,
reporters: List[Reporters] | None = [],
2024-07-15 16:42:22 -07:00
index_name: str = "",
num_workflow_steps: int = 0,
2024-06-26 15:45:06 -04:00
) -> WorkflowCallbacks:
2024-12-30 01:59:08 -05:00
"""Create a callback manager and register a list of loggers.
Loggers may be configured as generic loggers or associated with a specified indexing job.
2024-07-15 16:42:22 -07:00
"""
2024-12-30 01:59:08 -05:00
# always register the console logger if no loggers are specified
if Reporters.CONSOLE not in reporters:
reporters.append(Reporters.CONSOLE)
azure_client_manager = AzureClientManager()
2024-06-26 15:45:06 -04:00
callback_manager = WorkflowCallbacksManager()
for reporter in reporters:
2024-07-15 16:42:22 -07:00
match reporter:
case Reporters.BLOB:
# create a dedicated container for logs
container_name = "logs"
if reporting_dir is not None:
container_name = os.path.join(reporting_dir, container_name)
# ensure the root directory exists; if not, create it
2024-12-30 01:59:08 -05:00
blob_service_client = azure_client_manager.get_blob_service_client()
2024-07-15 16:42:22 -07:00
container_root = Path(container_name).parts[0]
if not blob_service_client.get_container_client(
container_root
).exists():
blob_service_client.create_container(container_root)
# register the blob reporter
callback_manager.register(
BlobWorkflowCallbacks(
2024-12-30 01:59:08 -05:00
blob_service_client=blob_service_client,
2024-07-15 16:42:22 -07:00
container_name=container_name,
index_name=index_name,
num_workflow_steps=num_workflow_steps,
)
2024-06-26 15:45:06 -04:00
)
2024-07-15 16:42:22 -07:00
case Reporters.FILE:
callback_manager.register(FileWorkflowCallbacks(dir=reporting_dir))
case Reporters.APP_INSIGHTS:
if os.getenv("APP_INSIGHTS_CONNECTION_STRING"):
callback_manager.register(
ApplicationInsightsWorkflowCallbacks(
connection_string=os.environ[
"APP_INSIGHTS_CONNECTION_STRING"
],
index_name=index_name,
num_workflow_steps=num_workflow_steps,
)
)
2024-08-09 22:22:49 -04:00
case Reporters.CONSOLE:
2024-12-30 01:59:08 -05:00
callback_manager.register(
ConsoleWorkflowCallbacks(
index_name=index_name, num_workflow_steps=num_workflow_steps
)
)
2024-07-15 16:42:22 -07:00
case _:
print(f"WARNING: unknown reporter type: {reporter}. Skipping.")
2024-06-26 15:45:06 -04:00
return callback_manager