2024-08-23 13:01:33 -07:00
|
|
|
"""
|
|
|
|
Copyright 2024, Zep Software, Inc.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
"""
|
|
|
|
|
2024-08-18 13:22:31 -04:00
|
|
|
import json
|
2024-08-23 08:15:44 -07:00
|
|
|
from typing import Any, Protocol, TypedDict
|
2024-08-18 13:22:31 -04:00
|
|
|
|
2024-08-22 12:26:13 -07:00
|
|
|
from .models import Message, PromptFunction, PromptVersion
|
2024-08-18 13:22:31 -04:00
|
|
|
|
|
|
|
|
|
|
|
class Prompt(Protocol):
|
2024-08-23 12:17:15 -04:00
|
|
|
v1: PromptVersion
|
|
|
|
v2: PromptVersion
|
|
|
|
edge_list: PromptVersion
|
2024-08-18 13:22:31 -04:00
|
|
|
|
|
|
|
|
|
|
|
class Versions(TypedDict):
|
2024-08-23 12:17:15 -04:00
|
|
|
v1: PromptFunction
|
|
|
|
v2: PromptFunction
|
|
|
|
edge_list: PromptFunction
|
2024-08-18 13:22:31 -04:00
|
|
|
|
|
|
|
|
2024-08-23 08:15:44 -07:00
|
|
|
def v1(context: dict[str, Any]) -> list[Message]:
|
2024-08-23 12:17:15 -04:00
|
|
|
return [
|
|
|
|
Message(
|
|
|
|
role='system',
|
|
|
|
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
|
|
),
|
|
|
|
Message(
|
|
|
|
role='user',
|
|
|
|
content=f"""
|
|
|
|
Given the following context, deduplicate facts from a list of new facts given a list of existing facts:
|
|
|
|
|
|
|
|
Existing Facts:
|
2024-08-18 13:22:31 -04:00
|
|
|
{json.dumps(context['existing_edges'], indent=2)}
|
|
|
|
|
2024-08-23 12:17:15 -04:00
|
|
|
New Facts:
|
2024-08-18 13:22:31 -04:00
|
|
|
{json.dumps(context['extracted_edges'], indent=2)}
|
|
|
|
|
|
|
|
Task:
|
2024-08-23 12:17:15 -04:00
|
|
|
If any facts in New Facts is a duplicate of a fact in Existing Facts,
|
|
|
|
do not return it in the list of unique facts.
|
2024-08-18 13:22:31 -04:00
|
|
|
|
|
|
|
Guidelines:
|
2024-08-23 12:17:15 -04:00
|
|
|
1. The facts do not have to be completely identical to be duplicates,
|
|
|
|
they just need to have similar factual content
|
2024-08-18 13:22:31 -04:00
|
|
|
|
|
|
|
Respond with a JSON object in the following format:
|
|
|
|
{{
|
2024-08-23 12:17:15 -04:00
|
|
|
"unique_facts": [
|
2024-08-18 13:22:31 -04:00
|
|
|
{{
|
2024-08-23 12:17:15 -04:00
|
|
|
"uuid": "unique identifier of the fact"
|
2024-08-18 13:22:31 -04:00
|
|
|
}}
|
|
|
|
]
|
|
|
|
}}
|
|
|
|
""",
|
2024-08-23 12:17:15 -04:00
|
|
|
),
|
|
|
|
]
|
2024-08-18 13:22:31 -04:00
|
|
|
|
|
|
|
|
2024-08-23 08:15:44 -07:00
|
|
|
def v2(context: dict[str, Any]) -> list[Message]:
|
2024-08-23 12:17:15 -04:00
|
|
|
return [
|
|
|
|
Message(
|
|
|
|
role='system',
|
|
|
|
content='You are a helpful assistant that de-duplicates relationship from edge lists.',
|
|
|
|
),
|
|
|
|
Message(
|
|
|
|
role='user',
|
|
|
|
content=f"""
|
2024-08-22 18:09:44 -04:00
|
|
|
Given the following context, deduplicate edges from a list of new edges given a list of existing edges:
|
|
|
|
|
|
|
|
Existing Edges:
|
|
|
|
{json.dumps(context['existing_edges'], indent=2)}
|
|
|
|
|
|
|
|
New Edges:
|
|
|
|
{json.dumps(context['extracted_edges'], indent=2)}
|
|
|
|
|
|
|
|
Task:
|
|
|
|
1. start with the list of edges from New Edges
|
|
|
|
2. If any edge in New Edges is a duplicate of an edge in Existing Edges, replace the new edge with the existing
|
|
|
|
edge in the list
|
|
|
|
3. Respond with the resulting list of edges
|
|
|
|
|
|
|
|
Guidelines:
|
|
|
|
1. Use both the triplet name and fact of edges to determine if they are duplicates,
|
|
|
|
duplicate edges may have different names meaning the same thing and slight variations in the facts.
|
|
|
|
2. If you encounter facts that are semantically equivalent or very similar, keep the original edge
|
|
|
|
|
|
|
|
Respond with a JSON object in the following format:
|
|
|
|
{{
|
|
|
|
"new_edges": [
|
|
|
|
{{
|
|
|
|
"triplet": "source_node_name-edge_name-target_node_name",
|
|
|
|
"fact": "one sentence description of the fact"
|
|
|
|
}}
|
|
|
|
]
|
|
|
|
}}
|
|
|
|
""",
|
2024-08-23 12:17:15 -04:00
|
|
|
),
|
|
|
|
]
|
2024-08-22 18:09:44 -04:00
|
|
|
|
|
|
|
|
2024-08-23 08:15:44 -07:00
|
|
|
def edge_list(context: dict[str, Any]) -> list[Message]:
|
2024-08-23 12:17:15 -04:00
|
|
|
return [
|
|
|
|
Message(
|
|
|
|
role='system',
|
|
|
|
content='You are a helpful assistant that de-duplicates edges from edge lists.',
|
|
|
|
),
|
|
|
|
Message(
|
|
|
|
role='user',
|
|
|
|
content=f"""
|
|
|
|
Given the following context, find all of the duplicates in a list of facts:
|
|
|
|
|
|
|
|
Facts:
|
2024-08-21 12:03:32 -04:00
|
|
|
{json.dumps(context['edges'], indent=2)}
|
|
|
|
|
|
|
|
Task:
|
2024-08-23 12:17:15 -04:00
|
|
|
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
2024-08-21 12:03:32 -04:00
|
|
|
|
|
|
|
Guidelines:
|
2024-08-23 12:17:15 -04:00
|
|
|
1. The facts do not have to be completely identical to be duplicates, they just need to have similar content
|
|
|
|
2. The final list should have only unique facts. If 3 facts are all duplicates of each other, only one of their
|
2024-08-21 12:03:32 -04:00
|
|
|
facts should be in the response
|
|
|
|
|
|
|
|
Respond with a JSON object in the following format:
|
|
|
|
{{
|
2024-08-23 12:17:15 -04:00
|
|
|
"unique_facts": [
|
2024-08-21 12:03:32 -04:00
|
|
|
{{
|
2024-08-23 12:17:15 -04:00
|
|
|
"uuid": "unique identifier of the fact",
|
|
|
|
"fact": "fact of a unique edge"
|
2024-08-21 12:03:32 -04:00
|
|
|
}}
|
|
|
|
]
|
|
|
|
}}
|
|
|
|
""",
|
2024-08-23 12:17:15 -04:00
|
|
|
),
|
|
|
|
]
|
2024-08-21 12:03:32 -04:00
|
|
|
|
|
|
|
|
2024-08-22 18:09:44 -04:00
|
|
|
versions: Versions = {'v1': v1, 'v2': v2, 'edge_list': edge_list}
|