2024-10-26 19:29:45 -04:00
import asyncio
2025-01-04 21:47:52 +08:00
import inspect
2024-10-26 19:29:45 -04:00
import os
2025-02-08 16:06:07 +08:00
import re
2024-10-26 19:29:45 -04:00
from dataclasses import dataclass
2024-11-06 11:18:14 -05:00
from typing import Any , Union , Tuple , List , Dict
2025-01-27 09:36:10 +01:00
import pipmaster as pm
2025-02-11 00:55:52 +08:00
import configparser
2025-01-27 23:21:34 +08:00
2025-01-27 09:36:10 +01:00
if not pm . is_installed ( " neo4j " ) :
pm . install ( " neo4j " )
2025-01-04 21:47:52 +08:00
2024-11-06 11:18:14 -05:00
from neo4j import (
AsyncGraphDatabase ,
exceptions as neo4jExceptions ,
AsyncDriver ,
AsyncManagedTransaction ,
2025-01-04 22:33:35 +08:00
GraphDatabase ,
2024-10-30 17:48:14 -04:00
)
from tenacity import (
retry ,
stop_after_attempt ,
wait_exponential ,
retry_if_exception_type ,
)
2025-02-08 16:06:07 +08:00
from . . utils import logger
2025-01-04 21:47:52 +08:00
from . . base import BaseGraphStorage
2025-02-13 17:32:51 +08:00
from . . types import KnowledgeGraph , KnowledgeGraphNode , KnowledgeGraphEdge
2025-01-04 21:47:52 +08:00
2024-10-30 17:48:14 -04:00
2025-02-11 00:55:52 +08:00
config = configparser . ConfigParser ( )
config . read ( " config.ini " , " utf-8 " )
2025-02-11 03:29:40 +08:00
2024-10-26 19:29:45 -04:00
@dataclass
2024-11-02 18:35:07 -04:00
class Neo4JStorage ( BaseGraphStorage ) :
2024-10-26 19:29:45 -04:00
@staticmethod
2024-10-29 15:36:07 -04:00
def load_nx_graph ( file_name ) :
2024-11-06 11:18:14 -05:00
print ( " no preloading of graph with neo4j in production " )
2024-10-26 19:29:45 -04:00
2024-12-02 02:44:47 +05:30
def __init__ ( self , namespace , global_config , embedding_func ) :
super ( ) . __init__ (
namespace = namespace ,
global_config = global_config ,
embedding_func = embedding_func ,
)
2024-11-02 18:35:07 -04:00
self . _driver = None
self . _driver_lock = asyncio . Lock ( )
2025-02-11 00:55:52 +08:00
2025-02-13 18:07:24 +03:00
URI = os . environ . get ( " NEO4J_URI " , config . get ( " neo4j " , " uri " , fallback = None ) )
USERNAME = os . environ . get (
2025-02-11 03:29:40 +08:00
" NEO4J_USERNAME " , config . get ( " neo4j " , " username " , fallback = None )
2025-02-13 18:07:24 +03:00
)
2025-02-13 18:09:24 +03:00
PASSWORD = os . environ . get (
2025-02-11 03:29:40 +08:00
" NEO4J_PASSWORD " , config . get ( " neo4j " , " password " , fallback = None )
2025-02-13 18:07:24 +03:00
)
2025-02-13 15:26:45 +03:00
MAX_CONNECTION_POOL_SIZE = int (
os . environ . get (
" NEO4J_MAX_CONNECTION_POOL_SIZE " ,
config . get ( " neo4j " , " connection_pool_size " , fallback = 800 ) ,
)
2025-02-11 03:29:40 +08:00
)
2025-01-04 21:47:52 +08:00
DATABASE = os . environ . get (
2025-02-08 16:06:07 +08:00
" NEO4J_DATABASE " , re . sub ( r " [^a-zA-Z0-9-] " , " - " , namespace )
)
2025-02-11 00:55:52 +08:00
2024-11-06 11:18:14 -05:00
self . _driver : AsyncDriver = AsyncGraphDatabase . driver (
URI , auth = ( USERNAME , PASSWORD )
)
2025-02-08 16:06:07 +08:00
# Try to connect to the database
2025-01-13 07:27:30 +00:00
with GraphDatabase . driver (
URI ,
auth = ( USERNAME , PASSWORD ) ,
max_connection_pool_size = MAX_CONNECTION_POOL_SIZE ,
) as _sync_driver :
2025-02-08 16:06:07 +08:00
for database in ( DATABASE , None ) :
self . _DATABASE = database
connected = False
2025-01-04 21:47:52 +08:00
try :
2025-02-08 16:06:07 +08:00
with _sync_driver . session ( database = database ) as session :
try :
session . run ( " MATCH (n) RETURN n LIMIT 0 " )
logger . info ( f " Connected to { database } at { URI } " )
connected = True
except neo4jExceptions . ServiceUnavailable as e :
logger . error (
f " { database } at { URI } is not available " . capitalize ( )
)
raise e
except neo4jExceptions . AuthError as e :
logger . error ( f " Authentication failed for { database } at { URI } " )
raise e
2025-01-04 21:47:52 +08:00
except neo4jExceptions . ClientError as e :
2025-02-08 16:06:07 +08:00
if e . code == " Neo.ClientError.Database.DatabaseNotFound " :
logger . info (
f " { database } at { URI } not found. Try to create specified database. " . capitalize ( )
2025-01-04 22:33:35 +08:00
)
2025-02-08 16:06:07 +08:00
try :
with _sync_driver . session ( ) as session :
session . run (
f " CREATE DATABASE ` { database } ` IF NOT EXISTS "
)
logger . info ( f " { database } at { URI } created " . capitalize ( ) )
connected = True
except (
neo4jExceptions . ClientError ,
neo4jExceptions . DatabaseError ,
) as e :
if (
e . code
== " Neo.ClientError.Statement.UnsupportedAdministrationCommand "
) or (
e . code == " Neo.DatabaseError.Statement.ExecutionFailed "
) :
if database is not None :
logger . warning (
" This Neo4j instance does not support creating databases. Try to use Neo4j Desktop/Enterprise version or DozerDB instead. Fallback to use the default database. "
)
if database is None :
logger . error ( f " Failed to create { database } at { URI } " )
raise e
if connected :
break
2024-11-02 18:35:07 -04:00
2024-10-26 19:29:45 -04:00
def __post_init__ ( self ) :
self . _node_embed_algorithms = {
" node2vec " : self . _node2vec_embed ,
}
2024-11-02 18:35:07 -04:00
async def close ( self ) :
if self . _driver :
await self . _driver . close ( )
self . _driver = None
async def __aexit__ ( self , exc_type , exc , tb ) :
if self . _driver :
await self . _driver . close ( )
2024-10-26 19:29:45 -04:00
async def index_done_callback ( self ) :
2024-11-06 11:18:14 -05:00
print ( " KG successfully indexed. " )
2024-11-02 18:35:07 -04:00
2025-02-14 16:04:06 +01:00
async def _label_exists ( self , label : str ) - > bool :
""" Check if a label exists in the Neo4j database. """
query = " CALL db.labels() YIELD label RETURN label "
try :
async with self . _driver . session ( database = self . _DATABASE ) as session :
result = await session . run ( query )
labels = [ record [ " label " ] for record in await result . data ( ) ]
return label in labels
except Exception as e :
logger . error ( f " Error checking label existence: { e } " )
return False
2024-10-26 19:29:45 -04:00
2025-02-14 16:04:06 +01:00
async def _ensure_label ( self , label : str ) - > str :
""" Ensure a label exists by validating it. """
clean_label = label . strip ( ' " ' )
if not await self . _label_exists ( clean_label ) :
logger . warning ( f " Label ' { clean_label } ' does not exist in Neo4j " )
return clean_label
async def has_node ( self , node_id : str ) - > bool :
entity_name_label = await self . _ensure_label ( node_id )
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2024-11-06 11:18:14 -05:00
query = (
f " MATCH (n:` { entity_name_label } `) RETURN count(n) > 0 AS node_exists "
)
result = await session . run ( query )
2024-11-02 18:35:07 -04:00
single_result = await result . single ( )
2024-11-01 11:01:50 -04:00
logger . debug (
2025-02-08 16:06:07 +08:00
f " { inspect . currentframe ( ) . f_code . co_name } :query: { query } :result: { single_result [ ' node_exists ' ] } "
2024-11-06 11:18:14 -05:00
)
2024-10-29 15:36:07 -04:00
return single_result [ " node_exists " ]
2024-11-06 11:18:14 -05:00
2024-10-29 15:36:07 -04:00
async def has_edge ( self , source_node_id : str , target_node_id : str ) - > bool :
2024-11-06 11:18:14 -05:00
entity_name_label_source = source_node_id . strip ( ' " ' )
entity_name_label_target = target_node_id . strip ( ' " ' )
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2024-11-06 11:18:14 -05:00
query = (
f " MATCH (a:` { entity_name_label_source } `)-[r]-(b:` { entity_name_label_target } `) "
" RETURN COUNT(r) > 0 AS edgeExists "
)
result = await session . run ( query )
2024-11-02 18:35:07 -04:00
single_result = await result . single ( )
2024-11-01 11:01:50 -04:00
logger . debug (
2025-02-08 16:06:07 +08:00
f " { inspect . currentframe ( ) . f_code . co_name } :query: { query } :result: { single_result [ ' edgeExists ' ] } "
2024-11-06 11:18:14 -05:00
)
2024-10-29 15:36:07 -04:00
return single_result [ " edgeExists " ]
2024-10-26 19:29:45 -04:00
async def get_node ( self , node_id : str ) - > Union [ dict , None ] :
2025-02-14 16:04:06 +01:00
""" Get node by its label identifier.
Args :
node_id : The node label to look up
Returns :
dict : Node properties if found
None : If node not found
"""
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2025-02-14 16:04:06 +01:00
entity_name_label = await self . _ensure_label ( node_id )
2024-11-02 18:35:07 -04:00
query = f " MATCH (n:` { entity_name_label } `) RETURN n "
result = await session . run ( query )
record = await result . single ( )
if record :
node = record [ " n " ]
node_dict = dict ( node )
2024-11-01 11:01:50 -04:00
logger . debug (
2024-11-06 11:18:14 -05:00
f " { inspect . currentframe ( ) . f_code . co_name } : query: { query } , result: { node_dict } "
2024-11-02 18:35:07 -04:00
)
return node_dict
return None
2024-10-26 19:29:45 -04:00
async def node_degree ( self , node_id : str ) - > int :
2024-11-06 11:18:14 -05:00
entity_name_label = node_id . strip ( ' " ' )
2024-10-29 15:36:07 -04:00
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2024-11-02 18:35:07 -04:00
query = f """
MATCH ( n : ` { entity_name_label } ` )
RETURN COUNT { { ( n ) - - ( ) } } AS totalEdgeCount
"""
2024-11-06 11:18:14 -05:00
result = await session . run ( query )
record = await result . single ( )
2024-11-02 18:35:07 -04:00
if record :
2024-11-06 11:18:14 -05:00
edge_count = record [ " totalEdgeCount " ]
2024-11-02 18:35:07 -04:00
logger . debug (
2024-11-06 11:18:14 -05:00
f " { inspect . currentframe ( ) . f_code . co_name } :query: { query } :result: { edge_count } "
)
2024-11-02 18:35:07 -04:00
return edge_count
2024-11-06 11:18:14 -05:00
else :
2024-11-02 18:35:07 -04:00
return None
2024-10-26 19:29:45 -04:00
async def edge_degree ( self , src_id : str , tgt_id : str ) - > int :
2024-11-06 11:18:14 -05:00
entity_name_label_source = src_id . strip ( ' " ' )
entity_name_label_target = tgt_id . strip ( ' " ' )
2024-11-02 18:35:07 -04:00
src_degree = await self . node_degree ( entity_name_label_source )
trg_degree = await self . node_degree ( entity_name_label_target )
2024-11-06 11:18:14 -05:00
2024-11-02 18:35:07 -04:00
# Convert None to 0 for addition
src_degree = 0 if src_degree is None else src_degree
trg_degree = 0 if trg_degree is None else trg_degree
degrees = int ( src_degree ) + int ( trg_degree )
logger . debug (
2024-11-06 11:18:14 -05:00
f " { inspect . currentframe ( ) . f_code . co_name } :query:src_Degree+trg_degree:result: { degrees } "
)
2024-11-02 18:35:07 -04:00
return degrees
2024-11-06 11:18:14 -05:00
async def get_edge (
2025-01-04 22:33:35 +08:00
self , source_node_id : str , target_node_id : str
2024-11-06 11:18:14 -05:00
) - > Union [ dict , None ] :
2025-02-14 16:04:06 +01:00
""" Find edge between two nodes identified by their labels.
2024-11-11 10:45:22 +08:00
2024-10-26 19:29:45 -04:00
Args :
2025-02-14 16:04:06 +01:00
source_node_id ( str ) : Label of the source node
target_node_id ( str ) : Label of the target node
2024-11-11 10:45:22 +08:00
2024-10-26 19:29:45 -04:00
Returns :
2025-02-14 16:04:06 +01:00
dict : Edge properties if found , with at least { " weight " : 0.0 }
None : If error occurs
2024-10-26 19:29:45 -04:00
"""
2025-02-14 16:04:06 +01:00
try :
entity_name_label_source = source_node_id . strip ( ' " ' )
entity_name_label_target = target_node_id . strip ( ' " ' )
async with self . _driver . session ( database = self . _DATABASE ) as session :
query = f """
MATCH ( start : ` { entity_name_label_source } ` ) - [ r ] - > ( end : ` { entity_name_label_target } ` )
RETURN properties ( r ) as edge_properties
LIMIT 1
""" .format(
entity_name_label_source = entity_name_label_source ,
entity_name_label_target = entity_name_label_target ,
)
result = await session . run ( query )
record = await result . single ( )
if record and " edge_properties " in record :
try :
result = dict ( record [ " edge_properties " ] )
# Ensure required keys exist with defaults
required_keys = {
" weight " : 0.0 ,
" source_id " : None ,
" target_id " : None ,
}
for key , default_value in required_keys . items ( ) :
if key not in result :
result [ key ] = default_value
logger . warning (
f " Edge between { entity_name_label_source } and { entity_name_label_target } "
f " missing { key } , using default: { default_value } "
)
logger . debug (
f " { inspect . currentframe ( ) . f_code . co_name } :query: { query } :result: { result } "
)
return result
except ( KeyError , TypeError , ValueError ) as e :
logger . error (
f " Error processing edge properties between { entity_name_label_source } "
f " and { entity_name_label_target } : { str ( e ) } "
)
# Return default edge properties on error
return { " weight " : 0.0 , " source_id " : None , " target_id " : None }
2024-11-06 11:18:14 -05:00
2024-11-01 11:01:50 -04:00
logger . debug (
2025-02-14 16:04:06 +01:00
f " { inspect . currentframe ( ) . f_code . co_name } : No edge found between { entity_name_label_source } and { entity_name_label_target } "
2024-11-06 11:18:14 -05:00
)
2025-02-14 16:04:06 +01:00
# Return default edge properties when no edge found
return { " weight " : 0.0 , " source_id " : None , " target_id " : None }
except Exception as e :
logger . error (
f " Error in get_edge between { source_node_id } and { target_node_id } : { str ( e ) } "
)
# Return default edge properties on error
return { " weight " : 0.0 , " source_id " : None , " target_id " : None }
2024-10-29 15:36:07 -04:00
2024-11-06 11:18:14 -05:00
async def get_node_edges ( self , source_node_id : str ) - > List [ Tuple [ str , str ] ] :
node_label = source_node_id . strip ( ' " ' )
2024-10-29 15:36:07 -04:00
"""
2024-11-02 18:35:07 -04:00
Retrieves all edges ( relationships ) for a particular node identified by its label .
2024-10-29 15:36:07 -04:00
: return : List of dictionaries containing edge information
"""
2024-11-02 18:35:07 -04:00
query = f """ MATCH (n:` { node_label } `)
2024-10-29 15:36:07 -04:00
OPTIONAL MATCH ( n ) - [ r ] - ( connected )
RETURN n , r , connected """
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2024-11-02 18:35:07 -04:00
results = await session . run ( query )
2024-10-29 15:36:07 -04:00
edges = [ ]
2024-11-02 18:35:07 -04:00
async for record in results :
2024-11-06 11:18:14 -05:00
source_node = record [ " n " ]
connected_node = record [ " connected " ]
source_label = (
list ( source_node . labels ) [ 0 ] if source_node . labels else None
)
target_label = (
list ( connected_node . labels ) [ 0 ]
if connected_node and connected_node . labels
else None
)
2024-10-29 15:36:07 -04:00
if source_label and target_label :
2024-10-30 17:48:14 -04:00
edges . append ( ( source_label , target_label ) )
2024-10-29 15:36:07 -04:00
2024-11-06 11:18:14 -05:00
return edges
2024-10-29 15:36:07 -04:00
2024-10-30 17:48:14 -04:00
@retry (
stop = stop_after_attempt ( 3 ) ,
wait = wait_exponential ( multiplier = 1 , min = 4 , max = 10 ) ,
2024-11-06 11:18:14 -05:00
retry = retry_if_exception_type (
(
2025-01-04 22:33:35 +08:00
neo4jExceptions . ServiceUnavailable ,
neo4jExceptions . TransientError ,
neo4jExceptions . WriteServiceUnavailable ,
neo4jExceptions . ClientError ,
2024-11-06 11:18:14 -05:00
)
) ,
2024-10-30 17:48:14 -04:00
)
2024-11-02 18:35:07 -04:00
async def upsert_node ( self , node_id : str , node_data : Dict [ str , Any ] ) :
2024-10-26 19:29:45 -04:00
"""
2024-11-02 18:35:07 -04:00
Upsert a node in the Neo4j database .
2024-10-26 19:29:45 -04:00
Args :
2024-11-02 18:35:07 -04:00
node_id : The unique identifier for the node ( used as label )
node_data : Dictionary of node properties
2024-10-26 19:29:45 -04:00
"""
2025-02-14 16:04:06 +01:00
label = await self . _ensure_label ( node_id )
2024-11-02 18:35:07 -04:00
properties = node_data
2024-10-29 15:36:07 -04:00
2024-11-02 18:35:07 -04:00
async def _do_upsert ( tx : AsyncManagedTransaction ) :
2024-10-26 19:29:45 -04:00
query = f """
2024-10-29 15:36:07 -04:00
MERGE ( n : ` { label } ` )
SET n + = $ properties
2024-10-26 19:29:45 -04:00
"""
2024-11-02 18:35:07 -04:00
await tx . run ( query , properties = properties )
2024-11-06 11:18:14 -05:00
logger . debug (
f " Upserted node with label ' { label } ' and properties: { properties } "
)
2024-11-02 18:35:07 -04:00
try :
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2024-11-02 18:35:07 -04:00
await session . execute_write ( _do_upsert )
except Exception as e :
logger . error ( f " Error during upsert: { str ( e ) } " )
raise
2024-11-06 11:18:14 -05:00
2024-11-02 18:35:07 -04:00
@retry (
stop = stop_after_attempt ( 3 ) ,
wait = wait_exponential ( multiplier = 1 , min = 4 , max = 10 ) ,
2024-11-06 11:18:14 -05:00
retry = retry_if_exception_type (
(
2025-01-04 22:33:35 +08:00
neo4jExceptions . ServiceUnavailable ,
neo4jExceptions . TransientError ,
neo4jExceptions . WriteServiceUnavailable ,
2025-02-14 16:04:06 +01:00
neo4jExceptions . ClientError ,
2024-11-06 11:18:14 -05:00
)
) ,
2024-11-02 18:35:07 -04:00
)
2024-11-06 11:18:14 -05:00
async def upsert_edge (
2025-01-04 22:33:35 +08:00
self , source_node_id : str , target_node_id : str , edge_data : Dict [ str , Any ]
2024-11-06 11:18:14 -05:00
) :
2024-10-26 19:29:45 -04:00
"""
Upsert an edge and its properties between two nodes identified by their labels .
2024-11-02 18:35:07 -04:00
2024-10-26 19:29:45 -04:00
Args :
2024-11-02 18:35:07 -04:00
source_node_id ( str ) : Label of the source node ( used as identifier )
target_node_id ( str ) : Label of the target node ( used as identifier )
edge_data ( dict ) : Dictionary of properties to set on the edge
2024-10-26 19:29:45 -04:00
"""
2025-02-14 16:04:06 +01:00
source_label = await self . _ensure_label ( source_node_id )
target_label = await self . _ensure_label ( target_node_id )
2024-11-02 18:35:07 -04:00
edge_properties = edge_data
2024-10-29 15:36:07 -04:00
2024-11-02 18:35:07 -04:00
async def _do_upsert_edge ( tx : AsyncManagedTransaction ) :
2024-10-29 15:36:07 -04:00
query = f """
2025-02-14 16:04:06 +01:00
MATCH ( source : ` { source_label } ` )
2024-10-29 15:36:07 -04:00
WITH source
2025-02-14 16:04:06 +01:00
MATCH ( target : ` { target_label } ` )
2024-10-26 19:29:45 -04:00
MERGE ( source ) - [ r : DIRECTED ] - > ( target )
2024-10-29 15:36:07 -04:00
SET r + = $ properties
RETURN r
"""
2025-02-14 16:04:06 +01:00
result = await tx . run ( query , properties = edge_properties )
record = await result . single ( )
2024-11-06 11:18:14 -05:00
logger . debug (
2025-02-14 16:04:06 +01:00
f " Upserted edge from ' { source_label } ' to ' { target_label } ' with properties: { edge_properties } , result: { record [ ' r ' ] if record else None } "
2024-11-06 11:18:14 -05:00
)
2024-11-02 18:35:07 -04:00
try :
2025-01-04 21:47:52 +08:00
async with self . _driver . session ( database = self . _DATABASE ) as session :
2024-11-02 18:35:07 -04:00
await session . execute_write ( _do_upsert_edge )
except Exception as e :
logger . error ( f " Error during edge upsert: { str ( e ) } " )
raise
2024-11-06 11:18:14 -05:00
2024-10-26 19:29:45 -04:00
async def _node2vec_embed ( self ) :
2024-11-06 11:18:14 -05:00
print ( " Implemented but never called. " )
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
async def get_knowledge_graph (
self , node_label : str , max_depth : int = 5
2025-02-13 17:32:51 +08:00
) - > KnowledgeGraph :
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
"""
2025-01-27 02:07:06 +01:00
Get complete connected subgraph for specified node ( including the starting node itself )
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
2025-01-27 02:07:06 +01:00
Key fixes :
1. Include the starting node itself
2. Handle multi - label nodes
3. Clarify relationship directions
4. Add depth control
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
"""
label = node_label . strip ( ' " ' )
2025-02-13 17:32:51 +08:00
result = KnowledgeGraph ( )
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
seen_nodes = set ( )
seen_edges = set ( )
async with self . _driver . session ( database = self . _DATABASE ) as session :
try :
2025-02-09 22:22:59 +08:00
main_query = " "
2025-02-10 00:33:39 +08:00
if label == " * " :
2025-02-09 22:22:59 +08:00
main_query = """
MATCH ( n )
WITH collect ( DISTINCT n ) AS nodes
MATCH ( ) - [ r ] - ( )
RETURN nodes , collect ( DISTINCT r ) AS relationships ;
"""
else :
# Critical debug step: first verify if starting node exists
validate_query = f " MATCH (n:` { label } `) RETURN n LIMIT 1 "
validate_result = await session . run ( validate_query )
if not await validate_result . single ( ) :
2025-02-13 18:04:23 +08:00
logger . warning ( f " Starting node { label } does not exist! " )
2025-02-09 22:22:59 +08:00
return result
# Optimized query (including direction handling and self-loops)
main_query = f """
MATCH ( start : ` { label } ` )
WITH start
CALL apoc . path . subgraphAll ( start , { {
relationshipFilter : ' > ' ,
minLevel : 0 ,
maxLevel : { max_depth } ,
bfs : true
} } )
YIELD nodes , relationships
RETURN nodes , relationships
"""
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
result_set = await session . run ( main_query )
record = await result_set . single ( )
if record :
2025-01-27 02:07:06 +01:00
# Handle nodes (compatible with multi-label cases)
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
for node in record [ " nodes " ] :
2025-01-27 02:07:06 +01:00
# Use node ID + label combination as unique identifier
2025-02-09 22:22:59 +08:00
node_id = node . id
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
if node_id not in seen_nodes :
2025-02-13 18:04:23 +08:00
result . nodes . append (
KnowledgeGraphNode (
id = f " { node_id } " ,
labels = list ( node . labels ) ,
properties = dict ( node ) ,
)
)
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
seen_nodes . add ( node_id )
2025-01-27 02:07:06 +01:00
# Handle relationships (including direction information)
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
for rel in record [ " relationships " ] :
2025-02-09 22:22:59 +08:00
edge_id = rel . id
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
if edge_id not in seen_edges :
start = rel . start_node
end = rel . end_node
2025-02-13 18:04:23 +08:00
result . edges . append (
KnowledgeGraphEdge (
id = f " { edge_id } " ,
type = rel . type ,
source = f " { start . id } " ,
target = f " { end . id } " ,
properties = dict ( rel ) ,
)
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
)
seen_edges . add ( edge_id )
logger . info (
2025-02-13 17:32:51 +08:00
f " Subgraph query successful | Node count: { len ( result . nodes ) } | Edge count: { len ( result . edges ) } "
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
)
except neo4jExceptions . ClientError as e :
2025-01-27 02:07:06 +01:00
logger . error ( f " APOC query failed: { str ( e ) } " )
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
return await self . _robust_fallback ( label , max_depth )
return result
async def _robust_fallback (
self , label : str , max_depth : int
) - > Dict [ str , List [ Dict ] ] :
2025-01-27 02:07:06 +01:00
""" Enhanced fallback query solution """
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
result = { " nodes " : [ ] , " edges " : [ ] }
visited_nodes = set ( )
visited_edges = set ( )
async def traverse ( current_label : str , current_depth : int ) :
if current_depth > max_depth :
return
2025-01-27 02:07:06 +01:00
# Get current node details
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
node = await self . get_node ( current_label )
if not node :
return
node_id = f " { current_label } "
if node_id in visited_nodes :
return
visited_nodes . add ( node_id )
2025-01-27 02:07:06 +01:00
# Add node data (with complete labels)
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
node_data = { k : v for k , v in node . items ( ) }
2025-01-27 02:10:24 +01:00
node_data [ " labels " ] = [
current_label
] # Assume get_node method returns label information
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
result [ " nodes " ] . append ( node_data )
2025-01-27 02:07:06 +01:00
# Get all outgoing and incoming edges
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
query = f """
MATCH ( a ) - [ r ] - ( b )
WHERE a : ` { current_label } ` OR b : ` { current_label } `
RETURN a , r , b ,
CASE WHEN startNode ( r ) = a THEN ' OUTGOING ' ELSE ' INCOMING ' END AS direction
"""
async with self . _driver . session ( database = self . _DATABASE ) as session :
results = await session . run ( query )
async for record in results :
2025-01-27 02:07:06 +01:00
# Handle edges
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
rel = record [ " r " ]
edge_id = f " { rel . id } _ { rel . type } "
if edge_id not in visited_edges :
edge_data = dict ( rel )
edge_data . update (
{
" source " : list ( record [ " a " ] . labels ) [ 0 ] ,
" target " : list ( record [ " b " ] . labels ) [ 0 ] ,
" type " : rel . type ,
" direction " : record [ " direction " ] ,
}
)
result [ " edges " ] . append ( edge_data )
visited_edges . add ( edge_id )
2025-01-27 02:07:06 +01:00
# Recursively traverse adjacent nodes
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
next_label = (
list ( record [ " b " ] . labels ) [ 0 ]
if record [ " direction " ] == " OUTGOING "
else list ( record [ " a " ] . labels ) [ 0 ]
)
await traverse ( next_label , current_depth + 1 )
await traverse ( label , 0 )
return result
async def get_all_labels ( self ) - > List [ str ] :
"""
2025-01-27 02:07:06 +01:00
Get all existing node labels in the database
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
Returns :
2025-01-27 02:07:06 +01:00
[ " Person " , " Company " , . . . ] # Alphabetically sorted label list
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
"""
async with self . _driver . session ( database = self . _DATABASE ) as session :
2025-01-27 02:07:06 +01:00
# Method 1: Direct metadata query (Available for Neo4j 4.3+)
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
# query = "CALL db.labels() YIELD label RETURN label"
2025-01-27 02:07:06 +01:00
# Method 2: Query compatible with older versions
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
query = """
2025-01-26 09:13:11 +08:00
MATCH ( n )
WITH DISTINCT labels ( n ) AS node_labels
UNWIND node_labels AS label
RETURN DISTINCT label
ORDER BY label
feat: Added webui management, including file upload, text upload, Q&A query, graph database management (can view tags, view knowledge graph based on tags), system status (whether it is good, data storage status, model status, path),request /webui/index.html
2025-01-25 18:38:46 +08:00
"""
result = await session . run ( query )
labels = [ ]
async for record in result :
labels . append ( record [ " label " ] )
return labels