graphrag/graphrag/index/operations/finalize_community_reports.py
Nathan Evans 61a309b182
Incremental model alignment (#1766)
* Used shared schema lists for all final columns

* Semver
2025-02-25 13:14:42 -06:00

34 lines
993 B
Python

# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""All the steps to transform final entities."""
from uuid import uuid4
import pandas as pd
from graphrag.data_model.schemas import COMMUNITY_REPORTS_FINAL_COLUMNS
def finalize_community_reports(
reports: pd.DataFrame,
communities: pd.DataFrame,
) -> pd.DataFrame:
"""All the steps to transform final community reports."""
# Merge with communities to add shared fields
community_reports = reports.merge(
communities.loc[:, ["community", "parent", "children", "size", "period"]],
on="community",
how="left",
copy=False,
)
community_reports["community"] = community_reports["community"].astype(int)
community_reports["human_readable_id"] = community_reports["community"]
community_reports["id"] = [uuid4().hex for _ in range(len(community_reports))]
return community_reports.loc[
:,
COMMUNITY_REPORTS_FINAL_COLUMNS,
]