mirror of
https://github.com/getzep/graphiti.git
synced 2025-06-27 02:00:02 +00:00

* fix: Address graph disconnect * chore: Remove valid_to and valid_from setting in extract edges step (will be handled during invalidation step)
148 lines
4.4 KiB
Python
148 lines
4.4 KiB
Python
from datetime import datetime
|
|
|
|
from core.nodes import EntityNode, EpisodicNode
|
|
import logging
|
|
from core.llm_client import LLMClient
|
|
|
|
from core.prompts import prompt_library
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def extract_new_nodes(
|
|
llm_client: LLMClient,
|
|
episode: EpisodicNode,
|
|
relevant_schema: dict[str, any],
|
|
previous_episodes: list[EpisodicNode],
|
|
) -> list[EntityNode]:
|
|
# Prepare context for LLM
|
|
existing_nodes = [
|
|
{"name": node_name, "label": node_info["label"], "uuid": node_info["uuid"]}
|
|
for node_name, node_info in relevant_schema["nodes"].items()
|
|
]
|
|
|
|
context = {
|
|
"episode_content": episode.content,
|
|
"episode_timestamp": (
|
|
episode.valid_at.isoformat() if episode.valid_at else None
|
|
),
|
|
"existing_nodes": existing_nodes,
|
|
"previous_episodes": [
|
|
{
|
|
"content": ep.content,
|
|
"timestamp": ep.valid_at.isoformat() if ep.valid_at else None,
|
|
}
|
|
for ep in previous_episodes
|
|
],
|
|
}
|
|
|
|
llm_response = await llm_client.generate_response(
|
|
prompt_library.extract_nodes.v1(context)
|
|
)
|
|
new_nodes_data = llm_response.get("new_nodes", [])
|
|
logger.info(f"Extracted new nodes: {new_nodes_data}")
|
|
# Convert the extracted data into EntityNode objects
|
|
new_nodes = []
|
|
for node_data in new_nodes_data:
|
|
# Check if the node already exists
|
|
if not any(
|
|
existing_node["name"] == node_data["name"]
|
|
for existing_node in existing_nodes
|
|
):
|
|
new_node = EntityNode(
|
|
name=node_data["name"],
|
|
labels=node_data["labels"],
|
|
summary=node_data["summary"],
|
|
created_at=datetime.now(),
|
|
)
|
|
new_nodes.append(new_node)
|
|
logger.info(f"Created new node: {new_node.name} (UUID: {new_node.uuid})")
|
|
else:
|
|
logger.info(f"Node {node_data['name']} already exists, skipping creation.")
|
|
|
|
return new_nodes
|
|
|
|
|
|
async def extract_nodes(
|
|
llm_client: LLMClient,
|
|
episode: EpisodicNode,
|
|
previous_episodes: list[EpisodicNode],
|
|
) -> list[EntityNode]:
|
|
# Prepare context for LLM
|
|
context = {
|
|
"episode_content": episode.content,
|
|
"episode_timestamp": (
|
|
episode.valid_at.isoformat() if episode.valid_at else None
|
|
),
|
|
"previous_episodes": [
|
|
{
|
|
"content": ep.content,
|
|
"timestamp": ep.valid_at.isoformat() if ep.valid_at else None,
|
|
}
|
|
for ep in previous_episodes
|
|
],
|
|
}
|
|
|
|
llm_response = await llm_client.generate_response(
|
|
prompt_library.extract_nodes.v3(context)
|
|
)
|
|
new_nodes_data = llm_response.get("new_nodes", [])
|
|
logger.info(f"Extracted new nodes: {new_nodes_data}")
|
|
# Convert the extracted data into EntityNode objects
|
|
new_nodes = []
|
|
for node_data in new_nodes_data:
|
|
new_node = EntityNode(
|
|
name=node_data["name"],
|
|
labels=node_data["labels"],
|
|
summary=node_data["summary"],
|
|
created_at=datetime.now(),
|
|
)
|
|
new_nodes.append(new_node)
|
|
logger.info(f"Created new node: {new_node.name} (UUID: {new_node.uuid})")
|
|
|
|
return new_nodes
|
|
|
|
|
|
async def dedupe_extracted_nodes(
|
|
llm_client: LLMClient,
|
|
extracted_nodes: list[EntityNode],
|
|
existing_nodes: list[EntityNode],
|
|
) -> list[EntityNode]:
|
|
# build node map
|
|
node_map = {}
|
|
for node in existing_nodes:
|
|
node_map[node.name] = node
|
|
for node in extracted_nodes:
|
|
if node.name in node_map.keys():
|
|
continue
|
|
node_map[node.name] = node
|
|
|
|
# Prepare context for LLM
|
|
existing_nodes_context = [
|
|
{"name": node.name, "summary": node.summary} for node in existing_nodes
|
|
]
|
|
|
|
extracted_nodes_context = [
|
|
{"name": node.name, "summary": node.summary} for node in extracted_nodes
|
|
]
|
|
|
|
context = {
|
|
"existing_nodes": existing_nodes_context,
|
|
"extracted_nodes": extracted_nodes_context,
|
|
}
|
|
|
|
llm_response = await llm_client.generate_response(
|
|
prompt_library.dedupe_nodes.v1(context)
|
|
)
|
|
|
|
new_nodes_data = llm_response.get("new_nodes", [])
|
|
logger.info(f"Deduplicated nodes: {new_nodes_data}")
|
|
|
|
# Get full node data
|
|
nodes = []
|
|
for node_data in new_nodes_data:
|
|
node = node_map[node_data["name"]]
|
|
nodes.append(node)
|
|
|
|
return nodes
|