78 lines
3.3 KiB
Python
Raw Normal View History

2024-06-26 15:45:06 -04:00
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
from datetime import datetime
from pathlib import Path
2024-07-15 16:42:22 -07:00
from typing import List
2024-06-26 15:45:06 -04:00
from datashaper import WorkflowCallbacks, WorkflowCallbacksManager
from graphrag.index.reporting import FileWorkflowCallbacks
from src.api.azure_clients import BlobServiceClientSingleton
from src.reporting.application_insights_workflow_callbacks import (
ApplicationInsightsWorkflowCallbacks,
)
from src.reporting.blob_workflow_callbacks import BlobWorkflowCallbacks
from src.reporting.console_workflow_callbacks import ConsoleWorkflowCallbacks
2024-07-15 16:42:22 -07:00
from src.reporting.typing import Reporters
2024-06-26 15:45:06 -04:00
2024-07-15 16:42:22 -07:00
# def load_pipeline_reporter_from_list(
def load_pipeline_reporter(
2024-06-26 15:45:06 -04:00
reporting_dir: str | None,
reporters: List[Reporters] | None = [],
2024-07-15 16:42:22 -07:00
index_name: str = "",
num_workflow_steps: int = 0,
2024-06-26 15:45:06 -04:00
) -> WorkflowCallbacks:
2024-07-15 16:42:22 -07:00
"""Create a callback manager and register a list of reporters.
Reporters may be configured as generic loggers or associated with a specified indexing job.
"""
2024-06-26 15:45:06 -04:00
callback_manager = WorkflowCallbacksManager()
for reporter in reporters:
2024-07-15 16:42:22 -07:00
match reporter:
case Reporters.BLOB:
# create a dedicated container for logs
container_name = "logs"
if reporting_dir is not None:
container_name = os.path.join(reporting_dir, container_name)
# ensure the root directory exists; if not, create it
blob_service_client = BlobServiceClientSingleton.get_instance()
container_root = Path(container_name).parts[0]
if not blob_service_client.get_container_client(
container_root
).exists():
blob_service_client.create_container(container_root)
# register the blob reporter
callback_manager.register(
BlobWorkflowCallbacks(
storage_account_blob_url=os.environ["STORAGE_ACCOUNT_BLOB_URL"],
container_name=container_name,
blob_name=f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S:%f')}.logs.txt",
index_name=index_name,
num_workflow_steps=num_workflow_steps,
)
2024-06-26 15:45:06 -04:00
)
2024-07-15 16:42:22 -07:00
case Reporters.FILE:
callback_manager.register(FileWorkflowCallbacks(dir=reporting_dir))
case Reporters.APP_INSIGHTS:
if os.getenv("APP_INSIGHTS_CONNECTION_STRING"):
callback_manager.register(
ApplicationInsightsWorkflowCallbacks(
connection_string=os.environ[
"APP_INSIGHTS_CONNECTION_STRING"
],
index_name=index_name,
num_workflow_steps=num_workflow_steps,
)
)
case _:
print(f"WARNING: unknown reporter type: {reporter}. Skipping.")
# always register the console reporter as a fallback
callback_manager.register(
ConsoleWorkflowCallbacks(
index_name=index_name, num_workflow_steps=num_workflow_steps
)
)
2024-06-26 15:45:06 -04:00
return callback_manager