Optimize node label retrieval with aggregation

- Enable allowDiskUse for large datasets
2025-12-30 00:13:15 +00:00 · 2025-06-26 14:14:52 +08:00 · 2025-06-26 14:14:52 +08:00 · c51079335e
commit c51079335e
parent 778ad4f23a
1 changed files with 6 additions and 1 deletions
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@ -697,7 +697,12 @@ class MongoGraphStorage(BaseGraphStorage):
            [id1, id2, ...]  # Alphabetically sorted id list
        """

-        cursor = self.collection.find({}, projection={"_id": 1}, sort=[("_id", 1)])
+        # Use aggregation with allowDiskUse for large datasets
+        pipeline = [
+            {"$project": {"_id": 1}},
+            {"$sort": {"_id": 1}}
+        ]
+        cursor = await self.collection.aggregate(pipeline, allowDiskUse=True)
        labels = []
        async for doc in cursor:
            labels.append(doc["_id"])