diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index b3bc55513..fa7a87ad3 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -142,7 +142,7 @@ class DocumentService(CommonService): @classmethod @DB.connection_context() def get_unfinished_docs(cls): - fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg] + fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run] docs = cls.model.select(*fields) \ .where( cls.model.status == StatusEnum.VALID.value, @@ -311,7 +311,7 @@ class DocumentService(CommonService): prg = 0 finished = True bad = 0 - status = TaskStatus.RUNNING.value + status = d["run"]#TaskStatus.RUNNING.value for t in tsks: if 0 <= t.progress < 1: finished = False diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 586b81f06..4b29802c7 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -92,12 +92,6 @@ "max_tokens": 32768, "model_type": "chat" }, - { - "llm_name": "qwen-max-1201", - "tags": "LLM,CHAT,6K", - "max_tokens": 5899, - "model_type": "chat" - }, { "llm_name": "text-embedding-v2", "tags": "TEXT EMBEDDING,2K", diff --git a/graphrag/mind_map_prompt.py b/graphrag/mind_map_prompt.py index fac8f8bea..f6016b7eb 100644 --- a/graphrag/mind_map_prompt.py +++ b/graphrag/mind_map_prompt.py @@ -22,7 +22,6 @@ MIND_MAP_EXTRACTION_PROMPT = """ 3. If the subject matter is really complex, split them into sub-sections. - Output requirement: - - Always try to maximize the number of sub-sections. - In language of - MUST IN FORMAT OF MARKDOWN diff --git a/rag/app/knowledge_graph.py b/rag/app/knowledge_graph.py index 9f47769b0..12b87fe09 100644 --- a/rag/app/knowledge_graph.py +++ b/rag/app/knowledge_graph.py @@ -13,7 +13,8 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000, eng = lang.lower() == "english" parser_config["layout_recognize"] = False - sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, callback=callback ,parser_config=parser_config) + sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, + parser_config=parser_config, callback=callback) chunks = build_knowlege_graph_chunks(tenant_id, sections, callback, parser_config.get("entity_types", ["organization", "person", "location", "event", "time"]) ) @@ -27,4 +28,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000, doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) chunks.extend(tokenize_chunks(sections, doc, eng)) - return chunks + return chunks \ No newline at end of file