mirror of
https://github.com/getzep/graphiti.git
synced 2025-11-12 08:17:25 +00:00
Changes to `to_prompt_json()` helper to default to minified JSON (no indentation) instead of 2-space indentation. This reduces token consumption in LLM prompts while maintaining all necessary information. - Changed default `indent` parameter from `2` to `None` in `prompt_helpers.py` - Updated all prompt modules to remove explicit `indent=2` arguments - Minor code formatting fixes in LLM clients 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
320 lines
11 KiB
Python
320 lines
11 KiB
Python
"""
|
|
Copyright 2024, Zep Software, Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
from typing import Any, Protocol, TypedDict
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS
|
|
|
|
from .models import Message, PromptFunction, PromptVersion
|
|
from .prompt_helpers import to_prompt_json
|
|
from .snippets import summary_instructions
|
|
|
|
|
|
class ExtractedEntity(BaseModel):
|
|
name: str = Field(..., description='Name of the extracted entity')
|
|
entity_type_id: int = Field(
|
|
description='ID of the classified entity type. '
|
|
'Must be one of the provided entity_type_id integers.',
|
|
)
|
|
|
|
|
|
class ExtractedEntities(BaseModel):
|
|
extracted_entities: list[ExtractedEntity] = Field(..., description='List of extracted entities')
|
|
|
|
|
|
class MissedEntities(BaseModel):
|
|
missed_entities: list[str] = Field(..., description="Names of entities that weren't extracted")
|
|
|
|
|
|
class EntityClassificationTriple(BaseModel):
|
|
uuid: str = Field(description='UUID of the entity')
|
|
name: str = Field(description='Name of the entity')
|
|
entity_type: str | None = Field(
|
|
default=None,
|
|
description='Type of the entity. Must be one of the provided types or None',
|
|
)
|
|
|
|
|
|
class EntityClassification(BaseModel):
|
|
entity_classifications: list[EntityClassificationTriple] = Field(
|
|
..., description='List of entities classification triples.'
|
|
)
|
|
|
|
|
|
class EntitySummary(BaseModel):
|
|
summary: str = Field(
|
|
...,
|
|
description=f'Summary containing the important information about the entity. Under {MAX_SUMMARY_CHARS} characters.',
|
|
)
|
|
|
|
|
|
class Prompt(Protocol):
|
|
extract_message: PromptVersion
|
|
extract_json: PromptVersion
|
|
extract_text: PromptVersion
|
|
reflexion: PromptVersion
|
|
classify_nodes: PromptVersion
|
|
extract_attributes: PromptVersion
|
|
extract_summary: PromptVersion
|
|
|
|
|
|
class Versions(TypedDict):
|
|
extract_message: PromptFunction
|
|
extract_json: PromptFunction
|
|
extract_text: PromptFunction
|
|
reflexion: PromptFunction
|
|
classify_nodes: PromptFunction
|
|
extract_attributes: PromptFunction
|
|
extract_summary: PromptFunction
|
|
|
|
|
|
def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational messages.
|
|
Your primary task is to extract and classify the speaker and other significant entities mentioned in the conversation."""
|
|
|
|
user_prompt = f"""
|
|
<ENTITY TYPES>
|
|
{context['entity_types']}
|
|
</ENTITY TYPES>
|
|
|
|
<PREVIOUS MESSAGES>
|
|
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
</PREVIOUS MESSAGES>
|
|
|
|
<CURRENT MESSAGE>
|
|
{context['episode_content']}
|
|
</CURRENT MESSAGE>
|
|
|
|
Instructions:
|
|
|
|
You are given a conversation context and a CURRENT MESSAGE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT MESSAGE.
|
|
Pronoun references such as he/she/they or this/that/those should be disambiguated to the names of the
|
|
reference entities. Only extract distinct entities from the CURRENT MESSAGE. Don't extract pronouns like you, me, he/she/they, we/us as entities.
|
|
|
|
1. **Speaker Extraction**: Always extract the speaker (the part before the colon `:` in each dialogue line) as the first entity node.
|
|
- If the speaker is mentioned again in the message, treat both mentions as a **single entity**.
|
|
|
|
2. **Entity Identification**:
|
|
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT MESSAGE.
|
|
- **Exclude** entities mentioned only in the PREVIOUS MESSAGES (they are for context only).
|
|
|
|
3. **Entity Classification**:
|
|
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
|
- Assign the appropriate `entity_type_id` for each one.
|
|
|
|
4. **Exclusions**:
|
|
- Do NOT extract entities representing relationships or actions.
|
|
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
|
|
|
5. **Formatting**:
|
|
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
|
|
|
{context['custom_prompt']}
|
|
"""
|
|
return [
|
|
Message(role='system', content=sys_prompt),
|
|
Message(role='user', content=user_prompt),
|
|
]
|
|
|
|
|
|
def extract_json(context: dict[str, Any]) -> list[Message]:
|
|
sys_prompt = """You are an AI assistant that extracts entity nodes from JSON.
|
|
Your primary task is to extract and classify relevant entities from JSON files"""
|
|
|
|
user_prompt = f"""
|
|
<ENTITY TYPES>
|
|
{context['entity_types']}
|
|
</ENTITY TYPES>
|
|
|
|
<SOURCE DESCRIPTION>:
|
|
{context['source_description']}
|
|
</SOURCE DESCRIPTION>
|
|
<JSON>
|
|
{context['episode_content']}
|
|
</JSON>
|
|
|
|
{context['custom_prompt']}
|
|
|
|
Given the above source description and JSON, extract relevant entities from the provided JSON.
|
|
For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
|
|
Indicate the classified entity type by providing its entity_type_id.
|
|
|
|
Guidelines:
|
|
1. Extract all entities that the JSON represents. This will often be something like a "name" or "user" field
|
|
2. Extract all entities mentioned in all other properties throughout the JSON structure
|
|
3. Do NOT extract any properties that contain dates
|
|
"""
|
|
return [
|
|
Message(role='system', content=sys_prompt),
|
|
Message(role='user', content=user_prompt),
|
|
]
|
|
|
|
|
|
def extract_text(context: dict[str, Any]) -> list[Message]:
|
|
sys_prompt = """You are an AI assistant that extracts entity nodes from text.
|
|
Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text."""
|
|
|
|
user_prompt = f"""
|
|
<ENTITY TYPES>
|
|
{context['entity_types']}
|
|
</ENTITY TYPES>
|
|
|
|
<TEXT>
|
|
{context['episode_content']}
|
|
</TEXT>
|
|
|
|
Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned.
|
|
For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
|
|
Indicate the classified entity type by providing its entity_type_id.
|
|
|
|
{context['custom_prompt']}
|
|
|
|
Guidelines:
|
|
1. Extract significant entities, concepts, or actors mentioned in the conversation.
|
|
2. Avoid creating nodes for relationships or actions.
|
|
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
|
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
"""
|
|
return [
|
|
Message(role='system', content=sys_prompt),
|
|
Message(role='user', content=user_prompt),
|
|
]
|
|
|
|
|
|
def reflexion(context: dict[str, Any]) -> list[Message]:
|
|
sys_prompt = """You are an AI assistant that determines which entities have not been extracted from the given context"""
|
|
|
|
user_prompt = f"""
|
|
<PREVIOUS MESSAGES>
|
|
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
</PREVIOUS MESSAGES>
|
|
<CURRENT MESSAGE>
|
|
{context['episode_content']}
|
|
</CURRENT MESSAGE>
|
|
|
|
<EXTRACTED ENTITIES>
|
|
{context['extracted_entities']}
|
|
</EXTRACTED ENTITIES>
|
|
|
|
Given the above previous messages, current message, and list of extracted entities; determine if any entities haven't been
|
|
extracted.
|
|
"""
|
|
return [
|
|
Message(role='system', content=sys_prompt),
|
|
Message(role='user', content=user_prompt),
|
|
]
|
|
|
|
|
|
def classify_nodes(context: dict[str, Any]) -> list[Message]:
|
|
sys_prompt = """You are an AI assistant that classifies entity nodes given the context from which they were extracted"""
|
|
|
|
user_prompt = f"""
|
|
<PREVIOUS MESSAGES>
|
|
{to_prompt_json([ep for ep in context['previous_episodes']])}
|
|
</PREVIOUS MESSAGES>
|
|
<CURRENT MESSAGE>
|
|
{context['episode_content']}
|
|
</CURRENT MESSAGE>
|
|
|
|
<EXTRACTED ENTITIES>
|
|
{context['extracted_entities']}
|
|
</EXTRACTED ENTITIES>
|
|
|
|
<ENTITY TYPES>
|
|
{context['entity_types']}
|
|
</ENTITY TYPES>
|
|
|
|
Given the above conversation, extracted entities, and provided entity types and their descriptions, classify the extracted entities.
|
|
|
|
Guidelines:
|
|
1. Each entity must have exactly one type
|
|
2. Only use the provided ENTITY TYPES as types, do not use additional types to classify entities.
|
|
3. If none of the provided entity types accurately classify an extracted node, the type should be set to None
|
|
"""
|
|
return [
|
|
Message(role='system', content=sys_prompt),
|
|
Message(role='user', content=user_prompt),
|
|
]
|
|
|
|
|
|
def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
return [
|
|
Message(
|
|
role='system',
|
|
content='You are a helpful assistant that extracts entity properties from the provided text.',
|
|
),
|
|
Message(
|
|
role='user',
|
|
content=f"""
|
|
Given the MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
|
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
|
|
|
Guidelines:
|
|
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
|
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
|
|
|
<MESSAGES>
|
|
{to_prompt_json(context['previous_episodes'])}
|
|
{to_prompt_json(context['episode_content'])}
|
|
</MESSAGES>
|
|
|
|
<ENTITY>
|
|
{context['node']}
|
|
</ENTITY>
|
|
""",
|
|
),
|
|
]
|
|
|
|
|
|
def extract_summary(context: dict[str, Any]) -> list[Message]:
|
|
return [
|
|
Message(
|
|
role='system',
|
|
content='You are a helpful assistant that extracts entity summaries from the provided text.',
|
|
),
|
|
Message(
|
|
role='user',
|
|
content=f"""
|
|
Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
|
|
from the messages and relevant information from the existing summary.
|
|
|
|
{summary_instructions}
|
|
|
|
<MESSAGES>
|
|
{to_prompt_json(context['previous_episodes'])}
|
|
{to_prompt_json(context['episode_content'])}
|
|
</MESSAGES>
|
|
|
|
<ENTITY>
|
|
{context['node']}
|
|
</ENTITY>
|
|
""",
|
|
),
|
|
]
|
|
|
|
|
|
versions: Versions = {
|
|
'extract_message': extract_message,
|
|
'extract_json': extract_json,
|
|
'extract_text': extract_text,
|
|
'reflexion': reflexion,
|
|
'extract_summary': extract_summary,
|
|
'classify_nodes': classify_nodes,
|
|
'extract_attributes': extract_attributes,
|
|
}
|