diff --git a/.semversioner/next-release/patch-20250224201440103182.json b/.semversioner/next-release/patch-20250224201440103182.json new file mode 100644 index 00000000..94254328 --- /dev/null +++ b/.semversioner/next-release/patch-20250224201440103182.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Properly increment text unit IDs during updates." +} diff --git a/graphrag/index/update/incremental_index.py b/graphrag/index/update/incremental_index.py index 9eba64ab..3671d97e 100644 --- a/graphrag/index/update/incremental_index.py +++ b/graphrag/index/update/incremental_index.py @@ -301,6 +301,10 @@ def _update_and_merge_text_units( lambda x: [entity_id_mapping.get(i, i) for i in x] if x is not None else x ) + initial_id = old_text_units["human_readable_id"].max() + 1 + delta_text_units["human_readable_id"] = np.arange( + initial_id, initial_id + len(delta_text_units) + ) # Merge the final text units return pd.concat([old_text_units, delta_text_units], ignore_index=True, copy=False)