mirror of
https://github.com/langgenius/dify.git
synced 2025-11-07 23:13:24 +00:00
r2
This commit is contained in:
parent
80b219707e
commit
e934503fa0
@ -283,6 +283,15 @@ class DatasetApi(Resource):
|
|||||||
location="json",
|
location="json",
|
||||||
help="Invalid external knowledge api id.",
|
help="Invalid external knowledge api id.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"icon_info",
|
||||||
|
type=dict,
|
||||||
|
required=False,
|
||||||
|
nullable=True,
|
||||||
|
location="json",
|
||||||
|
help="Invalid icon info.",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
|
|
||||||
|
|||||||
@ -1,18 +1,21 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
|
import time
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
|
from sqlalchemy import func
|
||||||
|
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from core.workflow.entities.node_entities import NodeRunResult
|
from core.workflow.entities.node_entities import NodeRunResult
|
||||||
from core.workflow.entities.variable_pool import VariablePool
|
from core.workflow.entities.variable_pool import VariablePool
|
||||||
|
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
|
||||||
from core.workflow.enums import SystemVariableKey
|
from core.workflow.enums import SystemVariableKey
|
||||||
from core.workflow.nodes.enums import NodeType
|
from core.workflow.nodes.enums import NodeType
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
|
|
||||||
|
|
||||||
from ..base import BaseNode
|
from ..base import BaseNode
|
||||||
from .entities import KnowledgeIndexNodeData
|
from .entities import KnowledgeIndexNodeData
|
||||||
@ -111,13 +114,19 @@ class KnowledgeIndexNode(BaseNode[KnowledgeIndexNodeData]):
|
|||||||
document = db.session.query(Document).filter_by(id=document_id.value).first()
|
document = db.session.query(Document).filter_by(id=document_id.value).first()
|
||||||
if not document:
|
if not document:
|
||||||
raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.")
|
raise KnowledgeIndexNodeError(f"Document {document_id.value} not found.")
|
||||||
|
# chunk nodes by chunk size
|
||||||
|
indexing_start_at = time.perf_counter()
|
||||||
index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
|
index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
|
||||||
index_processor.index(dataset, document, chunks)
|
index_processor.index(dataset, document, chunks)
|
||||||
|
indexing_end_at = time.perf_counter()
|
||||||
|
document.indexing_latency = indexing_end_at - indexing_start_at
|
||||||
# update document status
|
# update document status
|
||||||
document.indexing_status = "completed"
|
document.indexing_status = "completed"
|
||||||
document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||||
|
document.word_count = db.session.query(func.sum(DocumentSegment.word_count)).filter(
|
||||||
|
DocumentSegment.document_id == document.id,
|
||||||
|
DocumentSegment.dataset_id == dataset.id,
|
||||||
|
).scalar()
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
# update document segment status
|
# update document segment status
|
||||||
db.session.query(DocumentSegment).filter(
|
db.session.query(DocumentSegment).filter(
|
||||||
|
|||||||
@ -88,6 +88,8 @@ dataset_detail_fields = {
|
|||||||
"chunk_structure": fields.String,
|
"chunk_structure": fields.String,
|
||||||
"icon_info": fields.Nested(icon_info_fields),
|
"icon_info": fields.Nested(icon_info_fields),
|
||||||
"is_published": fields.Boolean,
|
"is_published": fields.Boolean,
|
||||||
|
"total_documents": fields.Integer,
|
||||||
|
"total_available_documents": fields.Integer,
|
||||||
}
|
}
|
||||||
|
|
||||||
dataset_query_detail_fields = {
|
dataset_query_detail_fields = {
|
||||||
|
|||||||
@ -311,6 +311,19 @@ class DatasetProcessRule(Base):
|
|||||||
return json.loads(self.rules) if self.rules else None
|
return json.loads(self.rules) if self.rules else None
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_documents(self):
|
||||||
|
return db.session.query(func.count(Document.id)).filter(Document.dataset_id == self.dataset_id).scalar()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_available_documents(self):
|
||||||
|
return db.session.query(func.count(Document.id)).filter(
|
||||||
|
Document.dataset_id == self.dataset_id,
|
||||||
|
Document.indexing_status == "completed",
|
||||||
|
Document.enabled == True,
|
||||||
|
Document.archived == False,
|
||||||
|
).scalar()
|
||||||
|
|
||||||
|
|
||||||
class Document(Base):
|
class Document(Base):
|
||||||
|
|||||||
@ -484,6 +484,9 @@ class DatasetService:
|
|||||||
# update Retrieval model
|
# update Retrieval model
|
||||||
filtered_data["retrieval_model"] = data["retrieval_model"]
|
filtered_data["retrieval_model"] = data["retrieval_model"]
|
||||||
|
|
||||||
|
# update icon info
|
||||||
|
if data.get("icon_info"):
|
||||||
|
filtered_data["icon_info"] = data.get("icon_info")
|
||||||
db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data)
|
db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data)
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user