From d4e6e2bd21e3aa29088206d06efef0d2ac66c61e Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 23 Jun 2025 14:54:01 +0800 Subject: [PATCH] Fix: doc_aggs issue. (#8418) ### What problem does this PR solve? #8406 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/search.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 855468c9e..ad93c220e 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -391,14 +391,20 @@ class Dealer: for i in idx: if sim[i] < similarity_threshold: break - if len(ranks["chunks"]) >= page_size: - if aggs: - continue - break + id = sres.ids[i] chunk = sres.field[id] dnm = chunk.get("docnm_kwd", "") did = chunk.get("doc_id", "") + + if len(ranks["chunks"]) >= page_size: + if aggs: + if dnm not in ranks["doc_aggs"]: + ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0} + ranks["doc_aggs"][dnm]["count"] += 1 + continue + break + position_int = chunk.get("position_int", []) d = { "chunk_id": id,