diff --git a/rag/app/tag.py b/rag/app/tag.py index 7263bee5b..2c17fd7b9 100644 --- a/rag/app/tag.py +++ b/rag/app/tag.py @@ -27,7 +27,7 @@ def beAdoc(d, q, a, eng, row_num=-1): d["content_with_weight"] = q d["content_ltks"] = rag_tokenizer.tokenize(q) d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - d["tag_kwd"] = [t.strip() for t in a.split(",") if t.strip()] + d["tag_kwd"] = [t.strip().replace(".", "_") for t in a.split(",") if t.strip()] if row_num >= 0: d["top_int"] = [row_num] return d diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 7604bf2c1..d0d041452 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -465,7 +465,7 @@ class Dealer: cnt = np.sum([c for _, c in aggs]) tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags] - doc[TAG_FLD] = {a: c for a, c in tag_fea if c > 0} + doc[TAG_FLD] = {a.replace(".", "_"): c for a, c in tag_fea if c > 0} return True def tag_query(self, question: str, tenant_ids: str | list[str], kb_ids: list[str], all_tags, topn_tags=3, S=1000): @@ -481,4 +481,4 @@ class Dealer: cnt = np.sum([c for _, c in aggs]) tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags] - return {a: max(1, c) for a, c in tag_fea} + return {a.replace(".", "_"): max(1, c) for a, c in tag_fea}