Fix: Document parse via API will alot problen (#8407)

### What problem does this PR solve?
#8391
#8404

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Stephen Hu 2025-06-23 13:08:11 +08:00 committed by GitHub
parent 3a50908946
commit 794a4102c2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 1 additions and 2 deletions

View File

@ -371,7 +371,7 @@ class ParserConfig(Base):
raptor: RaptorConfig | None = None
tag_kb_ids: list[str] = Field(default_factory=list)
topn_tags: int = Field(default=1, ge=1, le=10)
filename_embd_weight: float | None = Field(default=None, ge=0.0, le=1.0)
filename_embd_weight: float | None = Field(default=0.1, ge=0.0, le=1.0)
task_page_size: int | None = Field(default=None, ge=1)
pages: list[list[int]] | None = None

View File

@ -434,7 +434,6 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
tk_count += c
callback(prog=0.7 + 0.2 * (i + 1) / len(cnts), msg="")
cnts = cnts_
title_w = float(parser_config.get("filename_embd_weight", 0.1))
vects = (title_w * tts + (1 - title_w) *
cnts) if len(tts) == len(cnts) else cnts