fix: Address graph disconnect (#7)

* fix: Address graph disconnect * chore: Remove valid_to and valid_from setting in extract edges step (will be handled during invalidation step)
2025-06-27 02:00:02 +00:00 · 2024-08-19 09:37:56 -04:00 · 2024-08-19 09:37:56 -04:00 · 40e74a2e97
commit 40e74a2e97
parent 4db3906049
6 changed files with 72 additions and 25 deletions
--- a/core/graphiti.py
+++ b/core/graphiti.py
@ -113,12 +113,16 @@ class Graphiti:
            await asyncio.gather(
                *[node.generate_name_embedding(embedder) for node in extracted_nodes]
            )
-
            existing_nodes = await get_relevant_nodes(extracted_nodes, self.driver)
-
+            logger.info(
+                f"Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}"
+            )
            new_nodes = await dedupe_extracted_nodes(
                self.llm_client, extracted_nodes, existing_nodes
            )
+            logger.info(
+                f"Deduped touched nodes: {[(n.name, n.uuid) for n in new_nodes]}"
+            )
            nodes.extend(new_nodes)

            extracted_edges = await extract_edges(
@ -130,11 +134,17 @@ class Graphiti:
            )

            existing_edges = await get_relevant_edges(extracted_edges, self.driver)
+            logger.info(f"Existing edges: {[(e.name, e.uuid) for e in existing_edges]}")
+            logger.info(
+                f"Extracted edges: {[(e.name, e.uuid) for e in extracted_edges]}"
+            )

            new_edges = await dedupe_extracted_edges(
                self.llm_client, extracted_edges, existing_edges
            )

+            logger.info(f"Deduped edges: {[(e.name, e.uuid) for e in new_edges]}")
+
            entity_edges.extend(new_edges)
            episodic_edges.extend(
                build_episodic_edges(
--- a/core/prompts/extract_edges.py
+++ b/core/prompts/extract_edges.py
@ -135,17 +135,18 @@ def v2(context: dict[str, any]) -> list[Message]:
        Message(
            role="user",
            content=f"""
-        Given the following context, extract new edges (relationships) that need to be added to the knowledge graph:
+        Given the following context, extract edges (relationships) that need to be added to the knowledge graph:
        Nodes:
        {json.dumps(context['nodes'], indent=2)}

-        New Episode:
-        Content: {context['episode_content']}
+        

-        Previous Episodes:
+        Episodes:
        {json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
+        {context['episode_content']} <-- New Episode
+        

-        Extract new entity edges based on the content of the current episode, the given nodes, and context from previous episodes.
+        Extract entity edges based on the content of the current episode, the given nodes, and context from previous episodes.

        Guidelines:
        1. Create edges only between the provided nodes.
@ -168,7 +169,7 @@ def v2(context: dict[str, any]) -> list[Message]:
            ]
        }}

-        If no new edges need to be added, return an empty list for "new_edges".
+        If no edges need to be added, return an empty list for "edges".
        """,
        ),
    ]
--- a/core/prompts/extract_nodes.py
+++ b/core/prompts/extract_nodes.py
@ -7,11 +7,13 @@ from .models import Message, PromptVersion, PromptFunction
 class Prompt(Protocol):
    v1: PromptVersion
    v2: PromptVersion
+    v3: PromptVersion


 class Versions(TypedDict):
    v1: PromptFunction
    v2: PromptFunction
+    v3: PromptFunction


 def v1(context: dict[str, any]) -> list[Message]:
@ -103,4 +105,37 @@ def v2(context: dict[str, any]) -> list[Message]:
    ]


-versions: Versions = {"v1": v1, "v2": v2}
+def v3(context: dict[str, any]) -> list[Message]:
+    sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
+
+    user_prompt = f"""
+Given the following conversation, extract entity nodes that are explicitly or implicitly mentioned:
+
+Conversation:
+{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
+{context["episode_content"]}
+
+Guidelines:
+1. ALWAYS extract the speaker/actor as the first node. The speaker is the part before the colon in each line of dialogue.
+2. Extract other significant entities, concepts, or actors mentioned in the conversation.
+3. Provide concise but informative summaries for each extracted node.
+4. Avoid creating nodes for relationships or actions.
+
+Respond with a JSON object in the following format:
+{{
+    "new_nodes": [
+        {{
+            "name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
+            "labels": ["Entity", "Speaker" for speaker nodes, "OptionalAdditionalLabel"],
+            "summary": "Brief summary of the node's role or significance"
+        }}
+    ]
+}}
+"""
+    return [
+        Message(role="system", content=sys_prompt),
+        Message(role="user", content=user_prompt),
+    ]
+
+
+versions: Versions = {"v1": v1, "v2": v2, "v3": v3}
--- a/core/utils/maintenance/edge_operations.py
+++ b/core/utils/maintenance/edge_operations.py
@ -170,20 +170,21 @@ async def extract_edges(
    # Convert the extracted data into EntityEdge objects
    edges = []
    for edge_data in edges_data:
-        edge = EntityEdge(
-            source_node_uuid=edge_data["source_node_uuid"],
-            target_node_uuid=edge_data["target_node_uuid"],
-            name=edge_data["relation_type"],
-            fact=edge_data["fact"],
-            episodes=[episode.uuid],
-            created_at=datetime.now(),
-            valid_at=edge_data["valid_at"],
-            invalid_at=edge_data["invalid_at"],
-        )
-        edges.append(edge)
-        logger.info(
-            f"Created new edge: {edge.name} from (UUID: {edge.source_node_uuid}) to (UUID: {edge.target_node_uuid})"
-        )
+        if edge_data["target_node_uuid"] and edge_data["source_node_uuid"]:
+            edge = EntityEdge(
+                source_node_uuid=edge_data["source_node_uuid"],
+                target_node_uuid=edge_data["target_node_uuid"],
+                name=edge_data["relation_type"],
+                fact=edge_data["fact"],
+                episodes=[episode.uuid],
+                created_at=datetime.now(),
+                valid_at=None,
+                invalid_at=None,
+            )
+            edges.append(edge)
+            logger.info(
+                f"Created new edge: {edge.name} from (UUID: {edge.source_node_uuid}) to (UUID: {edge.target_node_uuid})"
+            )

    return edges

--- a/core/utils/maintenance/node_operations.py
+++ b/core/utils/maintenance/node_operations.py
@ -84,7 +84,7 @@ async def extract_nodes(
    }

    llm_response = await llm_client.generate_response(
-        prompt_library.extract_nodes.v2(context)
+        prompt_library.extract_nodes.v3(context)
    )
    new_nodes_data = llm_response.get("new_nodes", [])
    logger.info(f"Extracted new nodes: {new_nodes_data}")
--- a/runner.py
+++ b/runner.py
@ -49,7 +49,7 @@ async def main():
    )
    await client.add_episode(
        name="Message 2",
-        episode_body="Paul: I love bananas",
+        episode_body="Paul: I own many bananas",
        source_description="WhatsApp Message",
    )
    await client.add_episode(