This commit is contained in:
jyong 2025-06-10 18:16:30 +08:00
parent 80b219707e
commit e934503fa0
5 changed files with 39 additions and 3 deletions

View File

@ -283,6 +283,15 @@ class DatasetApi(Resource):
location="json",
help="Invalid external knowledge api id.",
)
parser.add_argument(
"icon_info",
type=dict,
required=False,
nullable=True,
location="json",
help="Invalid icon info.",
)
args = parser.parse_args()
data = request.get_json()

View File

@ -1,18 +1,21 @@
import datetime
import logging
from collections.abc import Mapping
import time
from typing import Any, cast
from sqlalchemy import func
from core.app.entities.app_invoke_entities import InvokeFrom
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.workflow.entities.node_entities import NodeRunResult
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
from core.workflow.enums import SystemVariableKey
from core.workflow.nodes.enums import NodeType
from extensions.ext_database import db
from models.dataset import Dataset, Document, DocumentSegment
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
from ..base import BaseNode
from .entities import KnowledgeIndexNodeData
@ -111,13 +114,19 @@ class KnowledgeIndexNode(BaseNode[KnowledgeIndexNodeData]):
document = db.session.query(Document).filter_by(id=document_id.value).first()
if not document:
raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.")
# chunk nodes by chunk size
indexing_start_at = time.perf_counter()
index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
index_processor.index(dataset, document, chunks)
indexing_end_at = time.perf_counter()
document.indexing_latency = indexing_end_at - indexing_start_at
# update document status
document.indexing_status = "completed"
document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
document.word_count = db.session.query(func.sum(DocumentSegment.word_count)).filter(
DocumentSegment.document_id == document.id,
DocumentSegment.dataset_id == dataset.id,
).scalar()
db.session.add(document)
# update document segment status
db.session.query(DocumentSegment).filter(

View File

@ -88,6 +88,8 @@ dataset_detail_fields = {
"chunk_structure": fields.String,
"icon_info": fields.Nested(icon_info_fields),
"is_published": fields.Boolean,
"total_documents": fields.Integer,
"total_available_documents": fields.Integer,
}
dataset_query_detail_fields = {

View File

@ -311,6 +311,19 @@ class DatasetProcessRule(Base):
return json.loads(self.rules) if self.rules else None
except JSONDecodeError:
return None
@property
def total_documents(self):
return db.session.query(func.count(Document.id)).filter(Document.dataset_id == self.dataset_id).scalar()
@property
def total_available_documents(self):
return db.session.query(func.count(Document.id)).filter(
Document.dataset_id == self.dataset_id,
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
).scalar()
class Document(Base):

View File

@ -484,6 +484,9 @@ class DatasetService:
# update Retrieval model
filtered_data["retrieval_model"] = data["retrieval_model"]
# update icon info
if data.get("icon_info"):
filtered_data["icon_info"] = data.get("icon_info")
db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data)
db.session.commit()