updated files to comply with graphrag-v2++

This commit is contained in:
Gabriel Nieves 2025-03-21 12:36:09 +00:00
parent 942b2c63a1
commit aacfce0fa8
7 changed files with 1760 additions and 1133 deletions

View File

@ -40,13 +40,15 @@ data_route = APIRouter(
response_model=StorageNameList,
responses={200: {"model": StorageNameList}},
)
async def get_all_data_containers():
async def get_all_data_containers(
container_store_client=Depends(get_cosmos_container_store_client),
):
"""
Retrieve a list of all data containers.
"""
items = []
try:
container_store_client = get_cosmos_container_store_client()
# container_store_client = get_cosmos_container_store_client()
for item in container_store_client.read_all_items():
if item["type"] == "data":
items.append(item["human_readable_name"])
@ -161,6 +163,8 @@ async def upload_files(
})
return BaseResponse(status="File upload successful.")
except Exception as e:
# import traceback
# traceback.print_exc()
logger = load_pipeline_logger()
logger.error(
message="Error uploading files.",

View File

@ -9,16 +9,16 @@ from graphrag_app.logger.load_logger import load_pipeline_logger
from graphrag_app.logger.pipeline_job_updater import PipelineJobUpdater
from graphrag_app.logger.typing import (
Logger,
PipelineAppInsightsReportingConfig,
PipelineReportingConfigTypes,
PipelineAppInsightsLogger,
# PipelineReportingConfigTypes,
)
__all__ = [
"Logger",
"ApplicationInsightsWorkflowCallbacks",
"ConsoleWorkflowCallbacks",
"PipelineAppInsightsReportingConfig",
"PipelineAppInsightsLogger",
"PipelineJobUpdater",
"PipelineReportingConfigTypes",
# "PipelineReportingConfigTypes",
"load_pipeline_logger",
]

View File

@ -2,7 +2,7 @@
# Licensed under the MIT License.
import os
from pathlib import Path
from pathlib import PurePosixPath
from typing import List
from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks
@ -44,7 +44,9 @@ def load_pipeline_logger(
log_blob_name = os.path.join(logging_dir, log_blob_name)
# ensure the root directory exists; if not, create it
blob_service_client = azure_client_manager.get_blob_service_client()
container_root = Path(log_blob_name).parts[0]
container_root = PurePosixPath(log_blob_name).parts[0]
print(f"container_root={container_root}")
print(f"container_exist={blob_service_client.get_container_client(container_root).exists()}")
if not blob_service_client.get_container_client(
container_root
).exists():

View File

@ -5,10 +5,7 @@ import logging
from enum import Enum
from typing import Literal
from graphrag.index.config.reporting import (
PipelineReportingConfig,
PipelineReportingConfigTypes,
)
from graphrag.logger.base import StatusLogger
from pydantic import Field as pydantic_Field
@ -19,9 +16,7 @@ class Logger(Enum):
APP_INSIGHTS = (4, "app_insights")
class PipelineAppInsightsReportingConfig(
PipelineReportingConfig[Literal["app_insights"]]
):
class PipelineAppInsightsLogger(StatusLogger):
"""Represents the ApplicationInsights reporting configuration for the pipeline."""
type: Literal["app_insights"] = Logger.APP_INSIGHTS.name.lower()
@ -45,6 +40,6 @@ class PipelineAppInsightsReportingConfig(
# add the new type to the existing PipelineReportingConfigTypes
PipelineReportingConfigTypes = (
PipelineReportingConfigTypes | PipelineAppInsightsReportingConfig
)
# StatusLogger = (
# StatusLogger | PipelineAppInsightsReportingConfig
# )

View File

@ -11,6 +11,7 @@ from azure.cosmos import (
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
from azure.storage.blob.aio import BlobServiceClient as BlobServiceClientAsync
from pathlib import PurePosixPath
ENDPOINT_ERROR_MSG = "Could not find connection string in environment variables"
@ -115,6 +116,9 @@ class AzureClientManager:
_BlobServiceClientSingletonAsync.get_instance()
)
# parse account hostname from the azure storage connection string or blob url
self.storage_account_hostname = PurePosixPath(self.storage_blob_url).parts[1]
# parse account name from the azure storage connection string or blob url
if self.storage_connection_string:
meta_info = {}
@ -127,12 +131,8 @@ class AzureClientManager:
meta_info[m[0]] = m[1]
self.storage_account_name = meta_info["AccountName"]
else:
self.storage_account_name = self.storage_blob_url.split("//")[1].split(".")[
0
]
self.storage_account_name = self.storage_account_hostname.split(".")[0]
# parse account hostname from the azure storage connection string or blob url
self.storage_account_hostname = self._blob_service_client.url.split("//")[1]
def get_blob_service_client(self) -> BlobServiceClient:
"""

2843
backend/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -51,7 +51,8 @@ fastapi = ">=0.110.0"
fastapi-offline = ">=1.7.3"
fastparquet = ">=2023.10.1"
fsspec = ">=2024.2.0"
graphrag = "==1.2.0"
# graphrag = "==1.2.0"
graphrag = { git = "https://github.com/microsoft/graphrag.git", rev = "3b1e70c06b5a22efe6ca38ac16e83d0428f660f0" }
httpx = ">=0.25.2"
kubernetes = ">=29.0.0"
networkx = ">=3.2.1"