dedupe fixes (#35)

This commit is contained in:
Preston Rasmussen 2024-08-23 18:06:42 -04:00 committed by GitHub
parent 57aed456fa
commit 0d2942daea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 11 additions and 7 deletions

View File

@ -47,7 +47,7 @@ class OpenAIClient(LLMClient):
response = await self.client.chat.completions.create(
model=self.model,
messages=openai_messages,
temperature=0.1,
temperature=0,
max_tokens=3000,
response_format={'type': 'json_object'},
)

View File

@ -54,8 +54,9 @@ def v1(context: dict[str, Any]) -> list[Message]:
do not return it in the list of unique facts.
Guidelines:
1. The facts do not have to be completely identical to be duplicates,
they just need to have similar factual content
1. identical or near identical facts are duplicates
2. Facts are also duplicates if they are represented by similar sentences
3. Facts will often discuss the same or similar relation between identical entities
Respond with a JSON object in the following format:
{{
@ -130,8 +131,10 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
Guidelines:
1. The facts do not have to be completely identical to be duplicates, they just need to have similar content
2. The final list should have only unique facts. If 3 facts are all duplicates of each other, only one of their
1. identical or near identical facts are duplicates
2. Facts are also duplicates if they are represented by similar sentences
3. Facts will often discuss the same or similar relation between identical entities
4. The final list should have only unique facts. If 3 facts are all duplicates of each other, only one of their
facts should be in the response
Respond with a JSON object in the following format:

View File

@ -122,7 +122,7 @@ def v2(context: dict[str, Any]) -> list[Message]:
"relation_type": "RELATION_TYPE_IN_CAPS",
"source_node_uuid": "uuid of the source entity node",
"target_node_uuid": "uuid of the target entity node",
"fact": "Detailed description of the relationship",
"fact": "brief description of the relationship",
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
}}

View File

@ -125,10 +125,11 @@ def v3(context: dict[str, Any]) -> list[Message]:
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
user_prompt = f"""
Given the following conversation, extract entity nodes that are explicitly or implicitly mentioned:
Given the following conversation, extract entity nodes from the CURRENT MESSAGE that are explicitly or implicitly mentioned:
Conversation:
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
<CURRENT MESSAGE>
{context["episode_content"]}
Guidelines: