graphiti/core/utils/maintenance/node_operations.py

204 lines
5.8 KiB
Python
Raw Normal View History

from datetime import datetime
from time import time
from core.nodes import EntityNode, EpisodicNode
import logging
from core.llm_client import LLMClient
from core.prompts import prompt_library
logger = logging.getLogger(__name__)
async def extract_new_nodes(
llm_client: LLMClient,
episode: EpisodicNode,
relevant_schema: dict[str, any],
previous_episodes: list[EpisodicNode],
) -> list[EntityNode]:
# Prepare context for LLM
existing_nodes = [
{"name": node_name, "label": node_info["label"], "uuid": node_info["uuid"]}
for node_name, node_info in relevant_schema["nodes"].items()
]
context = {
"episode_content": episode.content,
"episode_timestamp": (
episode.valid_at.isoformat() if episode.valid_at else None
),
"existing_nodes": existing_nodes,
"previous_episodes": [
{
"content": ep.content,
"timestamp": ep.valid_at.isoformat() if ep.valid_at else None,
}
for ep in previous_episodes
],
}
llm_response = await llm_client.generate_response(
prompt_library.extract_nodes.v1(context)
)
new_nodes_data = llm_response.get("new_nodes", [])
logger.info(f"Extracted new nodes: {new_nodes_data}")
# Convert the extracted data into EntityNode objects
new_nodes = []
for node_data in new_nodes_data:
# Check if the node already exists
if not any(
existing_node["name"] == node_data["name"]
for existing_node in existing_nodes
):
new_node = EntityNode(
name=node_data["name"],
labels=node_data["labels"],
summary=node_data["summary"],
created_at=datetime.now(),
)
new_nodes.append(new_node)
logger.info(f"Created new node: {new_node.name} (UUID: {new_node.uuid})")
else:
logger.info(f"Node {node_data['name']} already exists, skipping creation.")
return new_nodes
async def extract_nodes(
llm_client: LLMClient,
episode: EpisodicNode,
previous_episodes: list[EpisodicNode],
) -> list[EntityNode]:
start = time()
# Prepare context for LLM
context = {
"episode_content": episode.content,
"episode_timestamp": (
episode.valid_at.isoformat() if episode.valid_at else None
),
"previous_episodes": [
{
"content": ep.content,
"timestamp": ep.valid_at.isoformat() if ep.valid_at else None,
}
for ep in previous_episodes
],
}
llm_response = await llm_client.generate_response(
prompt_library.extract_nodes.v3(context)
)
new_nodes_data = llm_response.get("new_nodes", [])
end = time()
logger.info(f"Extracted new nodes: {new_nodes_data} in {(end - start) * 1000} ms")
# Convert the extracted data into EntityNode objects
new_nodes = []
for node_data in new_nodes_data:
new_node = EntityNode(
name=node_data["name"],
labels=node_data["labels"],
summary=node_data["summary"],
created_at=datetime.now(),
)
new_nodes.append(new_node)
logger.info(f"Created new node: {new_node.name} (UUID: {new_node.uuid})")
return new_nodes
async def dedupe_extracted_nodes(
llm_client: LLMClient,
extracted_nodes: list[EntityNode],
existing_nodes: list[EntityNode],
) -> tuple[list[EntityNode], dict[str, str]]:
start = time()
# build existing node map
node_map = {}
for node in existing_nodes:
node_map[node.name] = node
# Prepare context for LLM
existing_nodes_context = [
{"name": node.name, "summary": node.summary} for node in existing_nodes
]
extracted_nodes_context = [
{"name": node.name, "summary": node.summary} for node in extracted_nodes
]
context = {
"existing_nodes": existing_nodes_context,
"extracted_nodes": extracted_nodes_context,
}
llm_response = await llm_client.generate_response(
prompt_library.dedupe_nodes.v2(context)
)
duplicate_data = llm_response.get("duplicates", [])
end = time()
logger.info(f"Deduplicated nodes: {duplicate_data} in {(end - start) * 1000} ms")
uuid_map = {}
for duplicate in duplicate_data:
uuid = node_map[duplicate["name"]].uuid
uuid_value = node_map[duplicate["duplicate_of"]].uuid
uuid_map[uuid] = uuid_value
nodes = []
for node in extracted_nodes:
if node.uuid in uuid_map:
existing_name = uuid_map[node.name]
existing_node = node_map[existing_name]
nodes.append(existing_node)
continue
nodes.append(node)
return nodes, uuid_map
async def dedupe_node_list(
llm_client: LLMClient,
nodes: list[EntityNode],
) -> tuple[list[EntityNode], dict[str, str]]:
start = time()
# build node map
node_map = {}
for node in nodes:
node_map[node.name] = node
# Prepare context for LLM
nodes_context = [{"name": node.name, "summary": node.summary} for node in nodes]
context = {
"nodes": nodes_context,
}
llm_response = await llm_client.generate_response(
prompt_library.dedupe_nodes.node_list(context)
)
nodes_data = llm_response.get("nodes", [])
end = time()
logger.info(f"Deduplicated nodes: {nodes_data} in {(end - start) * 1000} ms")
# Get full node data
unique_nodes = []
uuid_map: dict[str, str] = {}
for node_data in nodes_data:
node = node_map[node_data["names"][0]]
unique_nodes.append(node)
for name in node_data["names"][1:]:
uuid = node_map[name].uuid
uuid_value = node_map[node_data["names"][0]].uuid
uuid_map[uuid] = uuid_value
return unique_nodes, uuid_map