mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-11-02 10:49:41 +00:00
Perf: pass useless check for tidy graph (#8121)
### What problem does this PR solve? Support passing the attribute check when the upstream has already made sure it. ### Type of change - [X] Performance Improvement
This commit is contained in:
parent
ad1f89fea0
commit
2337bbf6ca
@ -166,7 +166,7 @@ async def generate_subgraph(
|
||||
)
|
||||
if ignored_rels:
|
||||
callback(msg=f"ignored {ignored_rels} relations due to missing entities.")
|
||||
tidy_graph(subgraph, callback)
|
||||
tidy_graph(subgraph, callback, check_attribute=False)
|
||||
|
||||
subgraph.graph["source_id"] = [doc_id]
|
||||
chunk = {
|
||||
|
||||
@ -157,30 +157,32 @@ def set_tags_to_cache(kb_ids, tags):
|
||||
k = hasher.hexdigest()
|
||||
REDIS_CONN.set(k, json.dumps(tags).encode("utf-8"), 600)
|
||||
|
||||
def tidy_graph(graph: nx.Graph, callback):
|
||||
def tidy_graph(graph: nx.Graph, callback, check_attribute: bool = True):
|
||||
"""
|
||||
Ensure all nodes and edges in the graph have some essential attribute.
|
||||
"""
|
||||
def is_valid_node(node_attrs: dict) -> bool:
|
||||
def is_valid_item(node_attrs: dict) -> bool:
|
||||
valid_node = True
|
||||
for attr in ["description", "source_id"]:
|
||||
if attr not in node_attrs:
|
||||
valid_node = False
|
||||
break
|
||||
return valid_node
|
||||
purged_nodes = []
|
||||
for node, node_attrs in graph.nodes(data=True):
|
||||
if not is_valid_node(node_attrs):
|
||||
purged_nodes.append(node)
|
||||
for node in purged_nodes:
|
||||
graph.remove_node(node)
|
||||
if purged_nodes and callback:
|
||||
callback(msg=f"Purged {len(purged_nodes)} nodes from graph due to missing essential attributes.")
|
||||
if check_attribute:
|
||||
purged_nodes = []
|
||||
for node, node_attrs in graph.nodes(data=True):
|
||||
if not is_valid_item(node_attrs):
|
||||
purged_nodes.append(node)
|
||||
for node in purged_nodes:
|
||||
graph.remove_node(node)
|
||||
if purged_nodes and callback:
|
||||
callback(msg=f"Purged {len(purged_nodes)} nodes from graph due to missing essential attributes.")
|
||||
|
||||
purged_edges = []
|
||||
for source, target, attr in graph.edges(data=True):
|
||||
if not is_valid_node(attr):
|
||||
purged_edges.append((source, target))
|
||||
if check_attribute:
|
||||
if not is_valid_item(attr):
|
||||
purged_edges.append((source, target))
|
||||
if "keywords" not in attr:
|
||||
attr["keywords"] = []
|
||||
for source, target in purged_edges:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user