mirror of
				https://github.com/HKUDS/LightRAG.git
				synced 2025-11-03 19:29:38 +00:00 
			
		
		
		
	- Fix the bug from main stream that using doc['status']
- Improve the performance of Apache AGE. - Revise the README.md for Apache AGE indexing.
This commit is contained in:
		
							parent
							
								
									75b5739f81
								
							
						
					
					
						commit
						02ac96ff8e
					
				
							
								
								
									
										33
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								README.md
									
									
									
									
									
								
							@ -455,9 +455,38 @@ For production level scenarios you will most likely want to leverage an enterpri
 | 
			
		||||
* If you prefer docker, please start with this image if you are a beginner to avoid hiccups (DO read the overview): https://hub.docker.com/r/shangor/postgres-for-rag
 | 
			
		||||
* How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
 | 
			
		||||
* Create index for AGE example: (Change below `dickens` to your graph name if necessary)
 | 
			
		||||
  ```
 | 
			
		||||
  ```sql
 | 
			
		||||
  load 'age';
 | 
			
		||||
  SET search_path = ag_catalog, "$user", public;
 | 
			
		||||
  CREATE INDEX idx_entity ON dickens."Entity" USING gin (agtype_access_operator(properties, '"node_id"'));
 | 
			
		||||
  CREATE INDEX CONCURRENTLY entity_p_idx ON dickens."Entity" (id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY vertex_p_idx ON dickens."_ag_label_vertex" (id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY directed_p_idx ON dickens."DIRECTED" (id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY directed_eid_idx ON dickens."DIRECTED" (end_id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY directed_sid_idx ON dickens."DIRECTED" (start_id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY directed_seid_idx ON dickens."DIRECTED" (start_id,end_id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY edge_p_idx ON dickens."_ag_label_edge" (id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY edge_sid_idx ON dickens."_ag_label_edge" (start_id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY edge_eid_idx ON dickens."_ag_label_edge" (end_id);
 | 
			
		||||
  CREATE INDEX CONCURRENTLY edge_seid_idx ON dickens."_ag_label_edge" (start_id,end_id);
 | 
			
		||||
  create INDEX CONCURRENTLY vertex_idx_node_id ON dickens."_ag_label_vertex" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
 | 
			
		||||
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
 | 
			
		||||
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
 | 
			
		||||
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
 | 
			
		||||
 | 
			
		||||
  -- drop if necessary
 | 
			
		||||
  drop INDEX entity_p_idx;
 | 
			
		||||
  drop INDEX vertex_p_idx;
 | 
			
		||||
  drop INDEX directed_p_idx;
 | 
			
		||||
  drop INDEX directed_eid_idx;
 | 
			
		||||
  drop INDEX directed_sid_idx;
 | 
			
		||||
  drop INDEX directed_seid_idx;
 | 
			
		||||
  drop INDEX edge_p_idx;
 | 
			
		||||
  drop INDEX edge_sid_idx;
 | 
			
		||||
  drop INDEX edge_eid_idx;
 | 
			
		||||
  drop INDEX edge_seid_idx;
 | 
			
		||||
  drop INDEX vertex_idx_node_id;
 | 
			
		||||
  drop INDEX entity_idx_node_id;
 | 
			
		||||
  drop INDEX entity_node_id_gin_idx;
 | 
			
		||||
  ```
 | 
			
		||||
* Known issue of the Apache AGE: The released versions got below issue:
 | 
			
		||||
  > You might find that the properties of the nodes/edges are empty.
 | 
			
		||||
 | 
			
		||||
@ -30,6 +30,7 @@ from ..base import (
 | 
			
		||||
    DocStatus,
 | 
			
		||||
    DocProcessingStatus,
 | 
			
		||||
    BaseGraphStorage,
 | 
			
		||||
    T,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if sys.platform.startswith("win"):
 | 
			
		||||
@ -442,6 +443,22 @@ class PGDocStatusStorage(DocStatusStorage):
 | 
			
		||||
            existed = set([element["id"] for element in result])
 | 
			
		||||
            return set(data) - existed
 | 
			
		||||
 | 
			
		||||
    async def get_by_id(self, id: str) -> Union[T, None]:
 | 
			
		||||
        sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
 | 
			
		||||
        params = {"workspace": self.db.workspace, "id": id}
 | 
			
		||||
        result = await self.db.query(sql, params, True)
 | 
			
		||||
        if result is None:
 | 
			
		||||
            return None
 | 
			
		||||
        else:
 | 
			
		||||
            return DocProcessingStatus(
 | 
			
		||||
                content_length=result[0]["content_length"],
 | 
			
		||||
                content_summary=result[0]["content_summary"],
 | 
			
		||||
                status=result[0]["status"],
 | 
			
		||||
                chunks_count=result[0]["chunks_count"],
 | 
			
		||||
                created_at=result[0]["created_at"],
 | 
			
		||||
                updated_at=result[0]["updated_at"],
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    async def get_status_counts(self) -> Dict[str, int]:
 | 
			
		||||
        """Get counts of documents in each status"""
 | 
			
		||||
        sql = """SELECT status as "status", COUNT(1) as "count"
 | 
			
		||||
@ -884,9 +901,9 @@ class PGGraphStorage(BaseGraphStorage):
 | 
			
		||||
 | 
			
		||||
        query = """SELECT * FROM cypher('%s', $$
 | 
			
		||||
                      MATCH (n:Entity {node_id: "%s"})
 | 
			
		||||
                      OPTIONAL MATCH (n)-[r]-(connected)
 | 
			
		||||
                      RETURN n, r, connected
 | 
			
		||||
                    $$) AS (n agtype, r agtype, connected agtype)""" % (
 | 
			
		||||
                      OPTIONAL MATCH (n)-[]-(connected)
 | 
			
		||||
                      RETURN n, connected
 | 
			
		||||
                    $$) AS (n agtype, connected agtype)""" % (
 | 
			
		||||
            self.graph_name,
 | 
			
		||||
            label,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
@ -373,7 +373,7 @@ class LightRAG:
 | 
			
		||||
            doc_id
 | 
			
		||||
            for doc_id in new_docs.keys()
 | 
			
		||||
            if (current_doc := await self.doc_status.get_by_id(doc_id)) is None
 | 
			
		||||
            or current_doc["status"] == DocStatus.FAILED
 | 
			
		||||
            or current_doc.status == DocStatus.FAILED
 | 
			
		||||
        }
 | 
			
		||||
        new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user