Perf: pass useless check for tidy graph (#8121)

### What problem does this PR solve?
Support passing the attribute check when the upstream has already made
sure it.

### Type of change
- [X] Performance Improvement
This commit is contained in:
Stephen Hu 2025-06-09 11:44:13 +08:00 committed by GitHub
parent ad1f89fea0
commit 2337bbf6ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 13 deletions

View File

@ -166,7 +166,7 @@ async def generate_subgraph(
) )
if ignored_rels: if ignored_rels:
callback(msg=f"ignored {ignored_rels} relations due to missing entities.") callback(msg=f"ignored {ignored_rels} relations due to missing entities.")
tidy_graph(subgraph, callback) tidy_graph(subgraph, callback, check_attribute=False)
subgraph.graph["source_id"] = [doc_id] subgraph.graph["source_id"] = [doc_id]
chunk = { chunk = {

View File

@ -157,20 +157,21 @@ def set_tags_to_cache(kb_ids, tags):
k = hasher.hexdigest() k = hasher.hexdigest()
REDIS_CONN.set(k, json.dumps(tags).encode("utf-8"), 600) REDIS_CONN.set(k, json.dumps(tags).encode("utf-8"), 600)
def tidy_graph(graph: nx.Graph, callback): def tidy_graph(graph: nx.Graph, callback, check_attribute: bool = True):
""" """
Ensure all nodes and edges in the graph have some essential attribute. Ensure all nodes and edges in the graph have some essential attribute.
""" """
def is_valid_node(node_attrs: dict) -> bool: def is_valid_item(node_attrs: dict) -> bool:
valid_node = True valid_node = True
for attr in ["description", "source_id"]: for attr in ["description", "source_id"]:
if attr not in node_attrs: if attr not in node_attrs:
valid_node = False valid_node = False
break break
return valid_node return valid_node
if check_attribute:
purged_nodes = [] purged_nodes = []
for node, node_attrs in graph.nodes(data=True): for node, node_attrs in graph.nodes(data=True):
if not is_valid_node(node_attrs): if not is_valid_item(node_attrs):
purged_nodes.append(node) purged_nodes.append(node)
for node in purged_nodes: for node in purged_nodes:
graph.remove_node(node) graph.remove_node(node)
@ -179,7 +180,8 @@ def tidy_graph(graph: nx.Graph, callback):
purged_edges = [] purged_edges = []
for source, target, attr in graph.edges(data=True): for source, target, attr in graph.edges(data=True):
if not is_valid_node(attr): if check_attribute:
if not is_valid_item(attr):
purged_edges.append((source, target)) purged_edges.append((source, target))
if "keywords" not in attr: if "keywords" not in attr:
attr["keywords"] = [] attr["keywords"] = []