Stabilize smoke tests for query community context building (#908)

* Stabilize smoke tests for query community context building

* Fix CODEOWNERS
This commit is contained in:
Alonso Guevara 2024-08-12 13:17:40 -06:00 committed by GitHub
parent 073f650ba9
commit 7fd23fa79c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 21 additions and 9 deletions

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Stabilize smoke tests for query context building"
}

View File

@ -2,5 +2,4 @@
# the repo. Unless a later match takes precedence,
# @global-owner1 and @global-owner2 will be requested for
# review when someone opens a pull request.
* @microsoft/societal-resilience
* @microsoft/graphrag-core-team
* @microsoft/societal-resilience @microsoft/graphrag-core-team

View File

@ -15,6 +15,10 @@ from graphrag.query.llm.text_utils import num_tokens
log = logging.getLogger(__name__)
NO_COMMUNITY_RECORDS_WARNING: str = (
"Warning: No community records added when building community context."
)
def build_community_context(
community_reports: list[CommunityReport],
@ -128,9 +132,9 @@ def build_community_context(
record_df = _convert_report_context_to_df(
context_records=batch_records,
header=header,
weight_column=community_weight_name
if entities and include_community_weight
else None,
weight_column=(
community_weight_name if entities and include_community_weight else None
),
rank_column=community_rank_name if include_community_rank else None,
)
if len(record_df) == 0:
@ -163,9 +167,7 @@ def build_community_context(
_cut_batch()
if len(all_context_records) == 0:
log.warning(
"Warning: No community records added when building community context."
)
log.warning(NO_COMMUNITY_RECORDS_WARNING)
return ([], {})
return all_context_text, {

View File

@ -16,6 +16,9 @@ import pandas as pd
import pytest
from graphrag.index.storage.blob_pipeline_storage import BlobPipelineStorage
from graphrag.query.context_builder.community_context import (
NO_COMMUNITY_RECORDS_WARNING,
)
log = logging.getLogger(__name__)
@ -25,6 +28,8 @@ gh_pages = os.environ.get("GH_PAGES") is not None
# cspell:disable-next-line well-known-key
WELL_KNOWN_AZURITE_CONNECTION_STRING = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1"
KNOWN_WARNINGS = [NO_COMMUNITY_RECORDS_WARNING]
def _load_fixtures():
"""Load all fixtures from the tests/data folder."""
@ -294,6 +299,8 @@ class TestIndexer:
result.stderr if "No existing dataset at" not in result.stderr else ""
)
assert stderror == "", f"Query failed with error: {stderror}"
assert (
stderror == "" or stderror.replace("\n", "") in KNOWN_WARNINGS
), f"Query failed with error: {stderror}"
assert result.stdout is not None, "Query returned no output"
assert len(result.stdout) > 0, "Query returned empty output"