diff --git a/kag/builder/component/extractor/schema_constraint_extractor.py b/kag/builder/component/extractor/schema_constraint_extractor.py index 4dfbb2ac..fd857e6f 100644 --- a/kag/builder/component/extractor/schema_constraint_extractor.py +++ b/kag/builder/component/extractor/schema_constraint_extractor.py @@ -90,7 +90,9 @@ class SchemaConstraintExtractor(ExtractorABC): Returns: The result of the named entity recognition operation. """ - ner_result = self.llm.invoke({"input": passage}, self.ner_prompt) + ner_result = self.llm.invoke( + {"input": passage}, self.ner_prompt, with_except=False + ) if self.external_graph: extra_ner_result = self.external_graph.ner(passage) else: @@ -133,7 +135,9 @@ class SchemaConstraintExtractor(ExtractorABC): The result of the named entity standardization operation. """ return self.llm.invoke( - {"input": passage, "named_entities": entities}, self.std_prompt + {"input": passage, "named_entities": entities}, + self.std_prompt, + with_except=False, ) @retry(stop=stop_after_attempt(3)) @@ -153,7 +157,9 @@ class SchemaConstraintExtractor(ExtractorABC): return [] return self.llm.invoke( - {"input": passage, "entity_list": entities}, self.relation_prompt + {"input": passage, "entity_list": entities}, + self.relation_prompt, + with_except=False, ) @retry(stop=stop_after_attempt(3)) @@ -170,7 +176,7 @@ class SchemaConstraintExtractor(ExtractorABC): if self.event_prompt is None: logger.debug("Event extraction prompt not configured, skip.") return [] - return self.llm.invoke({"input": passage}, self.event_prompt) + return self.llm.invoke({"input": passage}, self.event_prompt, with_except=False) def parse_nodes_and_edges(self, entities: List[Dict], category: str = None): """ diff --git a/kag/builder/component/extractor/schema_free_extractor.py b/kag/builder/component/extractor/schema_free_extractor.py index ccf29128..da932265 100644 --- a/kag/builder/component/extractor/schema_free_extractor.py +++ b/kag/builder/component/extractor/schema_free_extractor.py @@ -98,7 +98,9 @@ class SchemaFreeExtractor(ExtractorABC): Returns: The result of the named entity recognition operation. """ - ner_result = self.llm.invoke({"input": passage}, self.ner_prompt) + ner_result = self.llm.invoke( + {"input": passage}, self.ner_prompt, with_except=False + ) if self.external_graph: extra_ner_result = self.external_graph.ner(passage) else: @@ -140,7 +142,9 @@ class SchemaFreeExtractor(ExtractorABC): Standardized entity information. """ return self.llm.invoke( - {"input": passage, "named_entities": entities}, self.std_prompt + {"input": passage, "named_entities": entities}, + self.std_prompt, + with_except=False, ) @retry(stop=stop_after_attempt(3)) @@ -154,7 +158,9 @@ class SchemaFreeExtractor(ExtractorABC): The result of the triples extraction operation. """ return self.llm.invoke( - {"input": passage, "entity_list": entities}, self.triple_prompt + {"input": passage, "entity_list": entities}, + self.triple_prompt, + with_except=False, ) def assemble_sub_graph_with_spg_records(self, entities: List[Dict]): diff --git a/kag/interface/common/llm_client.py b/kag/interface/common/llm_client.py index f9571a71..aba82756 100644 --- a/kag/interface/common/llm_client.py +++ b/kag/interface/common/llm_client.py @@ -77,7 +77,7 @@ class LLMClient(Registrable): variables: Dict[str, Any], prompt_op: PromptABC, with_json_parse: bool = True, - with_except: bool = False, + with_except: bool = True, ): """ Call the model and process the result. @@ -108,12 +108,12 @@ class LLMClient(Registrable): logger.debug(f"Result: {result}") except Exception as e: import traceback - - logger.debug(f"Error {e} during invocation: {traceback.format_exc()}") + logger.info(f"Error {e} during invocation: {traceback.format_exc()}") if with_except: raise RuntimeError( f"LLM invoke exception, info: {e}\nllm input: \n{prompt}\nllm output: \n{response}" ) + return result def batch( diff --git a/kag/solver/retriever/impl/default_chunk_retrieval.py b/kag/solver/retriever/impl/default_chunk_retrieval.py index 90037122..b8d33696 100644 --- a/kag/solver/retriever/impl/default_chunk_retrieval.py +++ b/kag/solver/retriever/impl/default_chunk_retrieval.py @@ -171,7 +171,6 @@ class KAGRetriever(ChunkRetriever): Returns: dict: A dictionary with keys as document chunk IDs and values as the vector similarity scores. """ - scores = dict() try: scores = query_sim_doc_cache.get(query) if scores: @@ -186,6 +185,7 @@ class KAGRetriever(ChunkRetriever): scores = {item["node"]["id"]: item["score"] for item in top_k} query_sim_doc_cache.put(query, scores) except Exception as e: + scores = dict() logger.error(f"run calculate_sim_scores failed, info: {e}", exc_info=True) return scores @@ -386,14 +386,20 @@ class KAGRetriever(ChunkRetriever): return matched_entities def _parse_ner_list(self, query): - ner_list = ner_cache.get(query) - if ner_list: - return ner_list - ner_list = self.named_entity_recognition(query) - if self.with_semantic: - std_ner_list = self.named_entity_standardization(query, ner_list) - self.append_official_name(ner_list, std_ner_list) - ner_cache.put(query, ner_list) + ner_list = [] + try: + ner_list = ner_cache.get(query) + if ner_list: + return ner_list + ner_list = self.named_entity_recognition(query) + if self.with_semantic: + std_ner_list = self.named_entity_standardization(query, ner_list) + self.append_official_name(ner_list, std_ner_list) + ner_cache.put(query, ner_list) + except Exception as e: + if not ner_list: + ner_list = [] + logger.warning(f"_parse_ner_list {query} failed {e}", exc_info=True) return ner_list def recall_docs( @@ -504,15 +510,20 @@ class KAGRetriever(ChunkRetriever): else: doc_score = doc_ids[doc_id] counter += 1 - node = self.graph_api.get_entity_prop_by_id( - label=self.schema.get_label_within_prefix(CHUNK_TYPE), - biz_id=doc_id, - ) - node_dict = dict(node.items()) - matched_docs.append( - f"#{node_dict['name']}#{node_dict['content']}#{doc_score}" - ) - hits_docs.add(node_dict["name"]) + try: + node = self.graph_api.get_entity_prop_by_id( + label=self.schema.get_label_within_prefix(CHUNK_TYPE), + biz_id=doc_id, + ) + node_dict = dict(node.items()) + matched_docs.append( + f"#{node_dict['name']}#{node_dict['content']}#{doc_score}" + ) + hits_docs.add(node_dict["name"]) + except Exception as e: + logger.warning( + f"{doc_id} get_entity_prop_by_id failed: {e}", exc_info=True + ) query = "\n".join(queries) try: text_matched = self.search_api.search_text(