This commit is contained in:
jyong 2025-06-10 18:16:30 +08:00
parent 80b219707e
commit e934503fa0
5 changed files with 39 additions and 3 deletions

View File

@ -283,6 +283,15 @@ class DatasetApi(Resource):
location="json", location="json",
help="Invalid external knowledge api id.", help="Invalid external knowledge api id.",
) )
parser.add_argument(
"icon_info",
type=dict,
required=False,
nullable=True,
location="json",
help="Invalid icon info.",
)
args = parser.parse_args() args = parser.parse_args()
data = request.get_json() data = request.get_json()

View File

@ -1,18 +1,21 @@
import datetime import datetime
import logging import logging
from collections.abc import Mapping from collections.abc import Mapping
import time
from typing import Any, cast from typing import Any, cast
from sqlalchemy import func
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.workflow.entities.node_entities import NodeRunResult from core.workflow.entities.node_entities import NodeRunResult
from core.workflow.entities.variable_pool import VariablePool from core.workflow.entities.variable_pool import VariablePool
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
from core.workflow.enums import SystemVariableKey from core.workflow.enums import SystemVariableKey
from core.workflow.nodes.enums import NodeType from core.workflow.nodes.enums import NodeType
from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
from ..base import BaseNode from ..base import BaseNode
from .entities import KnowledgeIndexNodeData from .entities import KnowledgeIndexNodeData
@ -111,13 +114,19 @@ class KnowledgeIndexNode(BaseNode[KnowledgeIndexNodeData]):
document = db.session.query(Document).filter_by(id=document_id.value).first() document = db.session.query(Document).filter_by(id=document_id.value).first()
if not document: if not document:
raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.") raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.")
# chunk nodes by chunk size
indexing_start_at = time.perf_counter()
index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor() index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
index_processor.index(dataset, document, chunks) index_processor.index(dataset, document, chunks)
indexing_end_at = time.perf_counter()
document.indexing_latency = indexing_end_at - indexing_start_at
# update document status # update document status
document.indexing_status = "completed" document.indexing_status = "completed"
document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
document.word_count = db.session.query(func.sum(DocumentSegment.word_count)).filter(
DocumentSegment.document_id == document.id,
DocumentSegment.dataset_id == dataset.id,
).scalar()
db.session.add(document) db.session.add(document)
# update document segment status # update document segment status
db.session.query(DocumentSegment).filter( db.session.query(DocumentSegment).filter(

View File

@ -88,6 +88,8 @@ dataset_detail_fields = {
"chunk_structure": fields.String, "chunk_structure": fields.String,
"icon_info": fields.Nested(icon_info_fields), "icon_info": fields.Nested(icon_info_fields),
"is_published": fields.Boolean, "is_published": fields.Boolean,
"total_documents": fields.Integer,
"total_available_documents": fields.Integer,
} }
dataset_query_detail_fields = { dataset_query_detail_fields = {

View File

@ -311,6 +311,19 @@ class DatasetProcessRule(Base):
return json.loads(self.rules) if self.rules else None return json.loads(self.rules) if self.rules else None
except JSONDecodeError: except JSONDecodeError:
return None return None
@property
def total_documents(self):
return db.session.query(func.count(Document.id)).filter(Document.dataset_id == self.dataset_id).scalar()
@property
def total_available_documents(self):
return db.session.query(func.count(Document.id)).filter(
Document.dataset_id == self.dataset_id,
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
).scalar()
class Document(Base): class Document(Base):

View File

@ -484,6 +484,9 @@ class DatasetService:
# update Retrieval model # update Retrieval model
filtered_data["retrieval_model"] = data["retrieval_model"] filtered_data["retrieval_model"] = data["retrieval_model"]
# update icon info
if data.get("icon_info"):
filtered_data["icon_info"] = data.get("icon_info")
db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data) db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data)
db.session.commit() db.session.commit()