fix: change default edge weight from 0.0 to 1.0 in entity extraction and graph storage

- Update extract_entities function in operate.py to use 1.0 as default weight
- Fix Neo4j implementation to use 1.0 instead of 0.0 for missing edge weights
- Fix Memgraph implementation to use 1.0 instead of 0.0 for missing edge weights
- Ensures consistent non-zero default weights across all graph storage backends
This commit is contained in:
yangdx 2025-07-17 11:30:49 +08:00
parent 946139a97a
commit 7184c7b3ab
3 changed files with 12 additions and 11 deletions

View File

@ -414,7 +414,7 @@ class MemgraphStorage(BaseGraphStorage):
if records:
edge_result = dict(records[0]["edge_properties"])
for key, default_value in {
"weight": 0.0,
"weight": 1.0,
"source_id": None,
"description": None,
"keywords": None,

View File

@ -535,7 +535,7 @@ class Neo4JStorage(BaseGraphStorage):
# logger.debug(f"Result: {edge_result}")
# Ensure required keys exist with defaults
required_keys = {
"weight": 0.0,
"weight": 1.0,
"source_id": None,
"description": None,
"keywords": None,
@ -559,7 +559,7 @@ class Neo4JStorage(BaseGraphStorage):
)
# Return default edge properties on error
return {
"weight": 0.0,
"weight": 1.0,
"source_id": None,
"description": None,
"keywords": None,
@ -610,7 +610,7 @@ class Neo4JStorage(BaseGraphStorage):
edge_props = edges[0] # choose the first if multiple exist
# Ensure required keys exist with defaults
for key, default in {
"weight": 0.0,
"weight": 1.0,
"source_id": None,
"description": None,
"keywords": None,
@ -621,7 +621,7 @@ class Neo4JStorage(BaseGraphStorage):
else:
# No edge found set default edge properties
edges_dict[(src, tgt)] = {
"weight": 0.0,
"weight": 1.0,
"source_id": None,
"description": None,
"keywords": None,

View File

@ -1016,8 +1016,8 @@ async def _merge_edges_then_upsert(
already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
# Handle the case where get_edge returns None or missing fields
if already_edge:
# Get weight with default 0.0 if missing
already_weights.append(already_edge.get("weight", 0.0))
# Get weight with default 1.0 if missing
already_weights.append(already_edge.get("weight", 1.0))
# Get source_id with empty string default if missing or None
if already_edge.get("source_id") is not None:
@ -1284,6 +1284,7 @@ async def merge_nodes_and_edges(
"content": f"{edge_data['src_id']}\t{edge_data['tgt_id']}\n{edge_data['keywords']}\n{edge_data['description']}",
"source_id": edge_data["source_id"],
"file_path": edge_data.get("file_path", "unknown_source"),
"weight": edge_data.get("weight", 1.0),
}
}
await relationships_vdb.upsert(data_for_vdb)
@ -2493,9 +2494,9 @@ async def _find_most_related_edges_from_entities(
if edge_props is not None:
if "weight" not in edge_props:
logger.warning(
f"Edge {pair} missing 'weight' attribute, using default value 0.0"
f"Edge {pair} missing 'weight' attribute, using default value 1.0"
)
edge_props["weight"] = 0.0
edge_props["weight"] = 1.0
combined = {
"src_tgt": pair,
@ -2548,9 +2549,9 @@ async def _get_edge_data(
if edge_props is not None:
if "weight" not in edge_props:
logger.warning(
f"Edge {pair} missing 'weight' attribute, using default value 0.0"
f"Edge {pair} missing 'weight' attribute, using default value 1.0"
)
edge_props["weight"] = 0.0
edge_props["weight"] = 1.0
# Use edge degree from the batch as rank.
combined = {