updated files to comply with graphrag-v2++

This commit is contained in:
Gabriel Nieves 2025-03-21 12:36:09 +00:00
parent 942b2c63a1
commit aacfce0fa8
7 changed files with 1760 additions and 1133 deletions

View File

@ -40,13 +40,15 @@ data_route = APIRouter(
response_model=StorageNameList, response_model=StorageNameList,
responses={200: {"model": StorageNameList}}, responses={200: {"model": StorageNameList}},
) )
async def get_all_data_containers(): async def get_all_data_containers(
container_store_client=Depends(get_cosmos_container_store_client),
):
""" """
Retrieve a list of all data containers. Retrieve a list of all data containers.
""" """
items = [] items = []
try: try:
container_store_client = get_cosmos_container_store_client() # container_store_client = get_cosmos_container_store_client()
for item in container_store_client.read_all_items(): for item in container_store_client.read_all_items():
if item["type"] == "data": if item["type"] == "data":
items.append(item["human_readable_name"]) items.append(item["human_readable_name"])
@ -161,6 +163,8 @@ async def upload_files(
}) })
return BaseResponse(status="File upload successful.") return BaseResponse(status="File upload successful.")
except Exception as e: except Exception as e:
# import traceback
# traceback.print_exc()
logger = load_pipeline_logger() logger = load_pipeline_logger()
logger.error( logger.error(
message="Error uploading files.", message="Error uploading files.",

View File

@ -9,16 +9,16 @@ from graphrag_app.logger.load_logger import load_pipeline_logger
from graphrag_app.logger.pipeline_job_updater import PipelineJobUpdater from graphrag_app.logger.pipeline_job_updater import PipelineJobUpdater
from graphrag_app.logger.typing import ( from graphrag_app.logger.typing import (
Logger, Logger,
PipelineAppInsightsReportingConfig, PipelineAppInsightsLogger,
PipelineReportingConfigTypes, # PipelineReportingConfigTypes,
) )
__all__ = [ __all__ = [
"Logger", "Logger",
"ApplicationInsightsWorkflowCallbacks", "ApplicationInsightsWorkflowCallbacks",
"ConsoleWorkflowCallbacks", "ConsoleWorkflowCallbacks",
"PipelineAppInsightsReportingConfig", "PipelineAppInsightsLogger",
"PipelineJobUpdater", "PipelineJobUpdater",
"PipelineReportingConfigTypes", # "PipelineReportingConfigTypes",
"load_pipeline_logger", "load_pipeline_logger",
] ]

View File

@ -2,7 +2,7 @@
# Licensed under the MIT License. # Licensed under the MIT License.
import os import os
from pathlib import Path from pathlib import PurePosixPath
from typing import List from typing import List
from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks
@ -44,7 +44,9 @@ def load_pipeline_logger(
log_blob_name = os.path.join(logging_dir, log_blob_name) log_blob_name = os.path.join(logging_dir, log_blob_name)
# ensure the root directory exists; if not, create it # ensure the root directory exists; if not, create it
blob_service_client = azure_client_manager.get_blob_service_client() blob_service_client = azure_client_manager.get_blob_service_client()
container_root = Path(log_blob_name).parts[0] container_root = PurePosixPath(log_blob_name).parts[0]
print(f"container_root={container_root}")
print(f"container_exist={blob_service_client.get_container_client(container_root).exists()}")
if not blob_service_client.get_container_client( if not blob_service_client.get_container_client(
container_root container_root
).exists(): ).exists():

View File

@ -5,10 +5,7 @@ import logging
from enum import Enum from enum import Enum
from typing import Literal from typing import Literal
from graphrag.index.config.reporting import ( from graphrag.logger.base import StatusLogger
PipelineReportingConfig,
PipelineReportingConfigTypes,
)
from pydantic import Field as pydantic_Field from pydantic import Field as pydantic_Field
@ -19,9 +16,7 @@ class Logger(Enum):
APP_INSIGHTS = (4, "app_insights") APP_INSIGHTS = (4, "app_insights")
class PipelineAppInsightsReportingConfig( class PipelineAppInsightsLogger(StatusLogger):
PipelineReportingConfig[Literal["app_insights"]]
):
"""Represents the ApplicationInsights reporting configuration for the pipeline.""" """Represents the ApplicationInsights reporting configuration for the pipeline."""
type: Literal["app_insights"] = Logger.APP_INSIGHTS.name.lower() type: Literal["app_insights"] = Logger.APP_INSIGHTS.name.lower()
@ -45,6 +40,6 @@ class PipelineAppInsightsReportingConfig(
# add the new type to the existing PipelineReportingConfigTypes # add the new type to the existing PipelineReportingConfigTypes
PipelineReportingConfigTypes = ( # StatusLogger = (
PipelineReportingConfigTypes | PipelineAppInsightsReportingConfig # StatusLogger | PipelineAppInsightsReportingConfig
) # )

View File

@ -11,6 +11,7 @@ from azure.cosmos import (
from azure.identity import DefaultAzureCredential from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient from azure.storage.blob import BlobServiceClient
from azure.storage.blob.aio import BlobServiceClient as BlobServiceClientAsync from azure.storage.blob.aio import BlobServiceClient as BlobServiceClientAsync
from pathlib import PurePosixPath
ENDPOINT_ERROR_MSG = "Could not find connection string in environment variables" ENDPOINT_ERROR_MSG = "Could not find connection string in environment variables"
@ -115,6 +116,9 @@ class AzureClientManager:
_BlobServiceClientSingletonAsync.get_instance() _BlobServiceClientSingletonAsync.get_instance()
) )
# parse account hostname from the azure storage connection string or blob url
self.storage_account_hostname = PurePosixPath(self.storage_blob_url).parts[1]
# parse account name from the azure storage connection string or blob url # parse account name from the azure storage connection string or blob url
if self.storage_connection_string: if self.storage_connection_string:
meta_info = {} meta_info = {}
@ -127,12 +131,8 @@ class AzureClientManager:
meta_info[m[0]] = m[1] meta_info[m[0]] = m[1]
self.storage_account_name = meta_info["AccountName"] self.storage_account_name = meta_info["AccountName"]
else: else:
self.storage_account_name = self.storage_blob_url.split("//")[1].split(".")[ self.storage_account_name = self.storage_account_hostname.split(".")[0]
0
]
# parse account hostname from the azure storage connection string or blob url
self.storage_account_hostname = self._blob_service_client.url.split("//")[1]
def get_blob_service_client(self) -> BlobServiceClient: def get_blob_service_client(self) -> BlobServiceClient:
""" """

2843
backend/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -51,7 +51,8 @@ fastapi = ">=0.110.0"
fastapi-offline = ">=1.7.3" fastapi-offline = ">=1.7.3"
fastparquet = ">=2023.10.1" fastparquet = ">=2023.10.1"
fsspec = ">=2024.2.0" fsspec = ">=2024.2.0"
graphrag = "==1.2.0" # graphrag = "==1.2.0"
graphrag = { git = "https://github.com/microsoft/graphrag.git", rev = "3b1e70c06b5a22efe6ca38ac16e83d0428f660f0" }
httpx = ">=0.25.2" httpx = ">=0.25.2"
kubernetes = ">=29.0.0" kubernetes = ">=29.0.0"
networkx = ">=3.2.1" networkx = ">=3.2.1"