Merge branch 'feat/rag-pipeline' into deploy/rag-dev

This commit is contained in:
twwu 2025-06-12 15:18:49 +08:00
commit d3dbfbe8b3
24 changed files with 438 additions and 119 deletions

View File

@ -1,3 +1,4 @@
import logging
import time
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, Union
@ -33,6 +34,8 @@ from models.model import App, AppMode, Message, MessageAnnotation
if TYPE_CHECKING:
from core.file.models import File
_logger = logging.getLogger(__name__)
class AppRunner:
def get_pre_calculate_rest_tokens(
@ -298,7 +301,7 @@ class AppRunner:
)
def _handle_invoke_result_stream(
self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
) -> None:
"""
Handle invoke result
@ -317,18 +320,28 @@ class AppRunner:
else:
queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
text += result.delta.message.content
message = result.delta.message
if isinstance(message.content, str):
text += message.content
elif isinstance(message.content, list):
for content in message.content:
if not isinstance(content, str):
# TODO(QuantumGhost): Add multimodal output support for easy ui.
_logger.warning("received multimodal output, type=%s", type(content))
text += content.data
else:
text += content # failback to str
if not model:
model = result.model
if not prompt_messages:
prompt_messages = result.prompt_messages
prompt_messages = list(result.prompt_messages)
if result.delta.usage:
usage = result.delta.usage
if not usage:
if usage is None:
usage = LLMUsage.empty_usage()
llm_result = LLMResult(

View File

@ -48,6 +48,7 @@ from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
TextPromptMessageContent,
)
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName
@ -309,6 +310,23 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
delta_text = chunk.delta.message.content
if delta_text is None:
continue
if isinstance(chunk.delta.message.content, list):
delta_text = ""
for content in chunk.delta.message.content:
logger.debug(
"The content type %s in LLM chunk delta message content.: %r", type(content), content
)
if isinstance(content, TextPromptMessageContent):
delta_text += content.data
elif isinstance(content, str):
delta_text += content # failback to str
else:
logger.warning(
"Unsupported content type %s in LLM chunk delta message content.: %r",
type(content),
content,
)
continue
if not self._task_state.llm_result.prompt_messages:
self._task_state.llm_result.prompt_messages = chunk.prompt_messages

View File

@ -525,6 +525,8 @@ class LLMNode(BaseNode[LLMNodeData]):
# Set appropriate response format based on model capabilities
self._set_response_format(completion_params, model_schema.parameter_rules)
model_config_with_cred.parameters = completion_params
# NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`.
node_data_model.completion_params = completion_params
return model, model_config_with_cred
def _fetch_prompt_messages(

View File

@ -14,7 +14,7 @@ const MarkdownButton = ({ node }: any) => {
size={size}
className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')}
onClick={() => {
if (isValidUrl(link)) {
if (link && isValidUrl(link)) {
window.open(link, '_blank')
return
}

View File

@ -32,10 +32,7 @@ const CreateFormPipeline = () => {
const { t } = useTranslation()
const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling)
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)
const [datasource, setDatasource] = useState<Datasource>()
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
const [batchId, setBatchId] = useState('')
@ -302,11 +299,8 @@ const CreateFormPipeline = () => {
{
currentStep === 3 && (
<Processing
datasetId={datasetId!}
batchId={batchId}
documents={documents}
indexingType={indexingType!}
retrievalMethod={retrievalMethod!}
/>
)
}
@ -326,12 +320,12 @@ const CreateFormPipeline = () => {
currentStep === 2 && (
<div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
<ChunkPreview
datasource={datasource!}
dataSourceType={datasource!.type}
files={fileList.map(file => file.file)}
onlineDocuments={onlineDocuments}
websitePages={websitePages}
isIdle={isIdle}
isPending={isPending}
isPending={isPending && isPreview.current}
estimateData={estimateData}
onPreview={onClickPreview}
handlePreviewFileChange={handlePreviewFileChange}

View File

@ -2,7 +2,6 @@ import React, { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { PreviewContainer } from '../../../preview/container'
import { PreviewHeader } from '../../../preview/header'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import { ChunkingMode } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
@ -18,7 +17,7 @@ import Button from '@/app/components/base/button'
import { DatasourceType } from '@/models/pipeline'
type ChunkPreviewProps = {
datasource: Datasource
dataSourceType: DatasourceType
files: CustomFile[]
onlineDocuments: NotionPage[]
websitePages: CrawlResultItem[]
@ -32,7 +31,7 @@ type ChunkPreviewProps = {
}
const ChunkPreview = ({
datasource,
dataSourceType,
files,
onlineDocuments,
websitePages,
@ -51,8 +50,6 @@ const ChunkPreview = ({
const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0])
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])
const dataSourceType = datasource?.type
return (
<PreviewContainer
header={<PreviewHeader
@ -185,9 +182,7 @@ const ChunkPreview = ({
<p className='text-sm text-text-tertiary'>
{t('datasetCreation.stepTwo.previewChunkTip')}
</p>
<Button
onClick={onPreview}
>
<Button onClick={onPreview}>
{t('datasetPipeline.addDocuments.stepTwo.previewChunks')}
</Button>
</div>

View File

@ -4,34 +4,30 @@ import { useTranslation } from 'react-i18next'
import { RiBookOpenLine } from '@remixicon/react'
import { useGetDocLanguage } from '@/context/i18n'
import EmbeddingProcess from './embedding-process'
import type { IndexingType } from '../../../create/step-two'
import type { RETRIEVE_METHOD } from '@/types/app'
import type { InitialDocumentDetail } from '@/models/pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
type ProcessingProps = {
datasetId: string
indexingType: IndexingType
retrievalMethod: RETRIEVE_METHOD
batchId: string
documents: InitialDocumentDetail[]
}
const Processing = ({
datasetId,
batchId,
documents,
indexingType,
retrievalMethod,
}: ProcessingProps) => {
const { t } = useTranslation()
const docLanguage = useGetDocLanguage()
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)
return (
<div className='flex h-full w-full justify-center overflow-hidden'>
<div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'>
<div className='max-w-[640px]'>
<EmbeddingProcess
datasetId={datasetId}
datasetId={datasetId!}
batchId={batchId}
documents={documents}
indexingType={indexingType}

View File

@ -18,6 +18,8 @@ import { useEventEmitterContextContext } from '@/context/event-emitter'
import { formatNumber } from '@/utils/format'
import classNames from '@/utils/classnames'
import Divider from '@/app/components/base/divider'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { IndexingType } from '../../../create/step-two'
type ISegmentDetailProps = {
segInfo?: Partial<SegmentDetailModel> & { id: string }
@ -48,6 +50,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
const mode = useDocumentContext(s => s.mode)
const parentMode = useDocumentContext(s => s.parentMode)
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
eventEmitter?.useSubscription((v) => {
if (v === 'update-segment')
@ -103,6 +106,8 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
}, [isParentChildMode, t])
const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL
return (
<div className={'flex h-full flex-col'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
@ -149,7 +154,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
isEditMode={isEditMode}
/>
</div>
{mode === 'custom' && <Keywords
{isECOIndexing && <Keywords
className={fullScreen ? 'w-1/5' : ''}
actionType={isEditMode ? 'edit' : 'view'}
segInfo={segInfo}

View File

@ -12,7 +12,6 @@ import Keywords from './completed/common/keywords'
import ChunkContent from './completed/common/chunk-content'
import AddAnother from './completed/common/add-another'
import Dot from './completed/common/dot'
import { useDocumentContext } from './index'
import { useStore as useAppStore } from '@/app/components/app/store'
import { ToastContext } from '@/app/components/base/toast'
import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
@ -20,6 +19,8 @@ import classNames from '@/utils/classnames'
import { formatNumber } from '@/utils/format'
import Divider from '@/app/components/base/divider'
import { useAddSegment } from '@/service/knowledge/use-segment'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { IndexingType } from '../../create/step-two'
type NewSegmentModalProps = {
onCancel: () => void
@ -44,7 +45,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
const [addAnother, setAddAnother] = useState(true)
const fullScreen = useSegmentListContext(s => s.fullScreen)
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
const mode = useDocumentContext(s => s.mode)
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const { appSidebarExpand } = useAppStore(useShallow(state => ({
appSidebarExpand: state.appSidebarExpand,
})))
@ -137,6 +138,8 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [question.length, answer.length, isQAModel])
const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL
return (
<div className={'flex h-full flex-col'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
@ -182,7 +185,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
isEditMode={true}
/>
</div>
{mode === 'custom' && <Keywords
{isECOIndexing && <Keywords
className={fullScreen ? 'w-1/5' : ''}
actionType='add'
keywords={keywords}

View File

@ -0,0 +1,94 @@
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile, DataSourceType } from '@/models/datasets'
import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
import AccountSetting from '@/app/components/header/account-setting'
import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
type DocumentSettingsProps = {
datasetId: string
documentId: string
}
const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
}
const cancelHandler = () => router.back()
const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})
const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
page_id: documentDetail?.data_source_info.notion_page_id,
page_name: documentDetail?.name,
page_icon: documentDetail?.data_source_info.notion_page_icon,
type: documentDetail?.data_source_type,
}
}, [documentDetail])
if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
return (
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
<div className='grow'>
{!documentDetail && <Loading type='app' />}
{dataset && documentDetail && (
<StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={showSetAPIKey}
datasetId={datasetId}
dataSourceType={documentDetail.data_source_type as DataSourceType}
notionPages={[currentPage as unknown as NotionPage]}
websitePages={[
{
title: documentDetail.name,
source_url: documentDetail.data_source_info?.url,
markdown: '',
description: '',
},
]}
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
onSave={saveHandler}
onCancel={cancelHandler}
/>
)}
</div>
{isShowSetAPIKey && <AccountSetting activeTab='provider' onCancel={async () => {
hideSetAPIkey()
}} />}
</div>
)
}
export default DocumentSettings

View File

@ -1,96 +1,35 @@
'use client'
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile } from '@/models/datasets'
import React from 'react'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import DocumentSettings from './document-settings'
import PipelineSettings from './pipeline-settings'
import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
import AccountSetting from '@/app/components/header/account-setting'
import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
type DocumentSettingsProps = {
type SettingsProps = {
datasetId: string
documentId: string
}
const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
const Settings = ({
datasetId,
documentId,
}: SettingsProps) => {
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
if (!pipelineId) {
return (
<DocumentSettings
datasetId={datasetId}
documentId={documentId}
/>
)
}
const cancelHandler = () => router.back()
const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})
const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
page_id: documentDetail?.data_source_info.notion_page_id,
page_name: documentDetail?.name,
page_icon: documentDetail?.data_source_info.notion_page_icon,
type: documentDetail?.data_source_type,
}
}, [documentDetail])
if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
return (
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
<div className="grow">
{!documentDetail && <Loading type='app' />}
{dataset && documentDetail && (
<StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={showSetAPIKey}
datasetId={datasetId}
dataSourceType={documentDetail.data_source_type}
notionPages={[currentPage as unknown as NotionPage]}
websitePages={[
{
title: documentDetail.name,
source_url: documentDetail.data_source_info?.url,
markdown: '',
description: '',
},
]}
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
onSave={saveHandler}
onCancel={cancelHandler}
/>
)}
</div>
{isShowSetAPIKey && <AccountSetting activeTab="provider" onCancel={async () => {
hideSetAPIkey()
}} />}
</div>
<PipelineSettings
datasetId={datasetId}
documentId={documentId}
/>
)
}
export default DocumentSettings
export default Settings

View File

@ -0,0 +1,120 @@
import { useCallback, useRef, useState } from 'react'
import type { CrawlResultItem, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
import { useTranslation } from 'react-i18next'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useDocumentDetail } from '@/service/knowledge/use-document'
import AppUnavailable from '@/app/components/base/app-unavailable'
import ChunkPreview from '../../../create-from-pipeline/preview/chunk-preview'
import Loading from '@/app/components/base/loading'
import type { DatasourceType } from '@/models/pipeline'
import ProcessDocuments from './process-documents'
import LeftHeader from './left-header'
type PipelineSettingsProps = {
datasetId: string
documentId: string
}
const PipelineSettings = ({
datasetId,
documentId,
}: PipelineSettingsProps) => {
const { t } = useTranslation()
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
const isPreview = useRef(false)
const formRef = useRef<any>(null)
const { data: documentDetail, error, isFetching: isFetchingDocumentDetail } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})
const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
// todo: Preview
}, [])
const handleProcess = useCallback(async (data: Record<string, any>) => {
// todo: Process
}, [])
const onClickProcess = useCallback(() => {
isPreview.current = false
formRef.current?.submit()
}, [])
const onClickPreview = useCallback(() => {
isPreview.current = true
formRef.current?.submit()
}, [])
const handleSubmit = useCallback((data: Record<string, any>) => {
isPreview.current ? handlePreviewChunks(data) : handleProcess(data)
}, [handlePreviewChunks, handleProcess])
const handlePreviewFileChange = useCallback((file: DocumentItem) => {
onClickPreview()
}, [onClickPreview])
const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
onClickPreview()
}, [onClickPreview])
const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
onClickPreview()
}, [onClickPreview])
if (isFetchingDocumentDetail) {
return (
<Loading type='app' />
)
}
if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
return (
<div
className='relative flex h-[calc(100vh-56px)] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
>
<div className='flex h-full flex-1 flex-col px-14'>
<LeftHeader title={t('datasetPipeline.documentSettings.title')} />
<div className='grow overflow-y-auto'>
<ProcessDocuments
ref={formRef}
documentId={documentId}
onProcess={onClickProcess}
onPreview={onClickPreview}
onSubmit={handleSubmit}
/>
</div>
</div>
{/* Preview */}
<div className='flex h-full flex-1 shrink-0 pl-2 pt-2'>
<ChunkPreview
dataSourceType={documentDetail!.data_source_type as DatasourceType}
// @ts-expect-error mock data // todo: remove mock data
files={[{
id: '12345678',
name: 'test-file',
extension: 'txt',
}]}
onlineDocuments={[]}
websitePages={[]}
isIdle={true}
isPending={true}
estimateData={estimateData}
onPreview={onClickPreview}
handlePreviewFileChange={handlePreviewFileChange}
handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
/>
</div>
</div>
)
}
export default PipelineSettings

View File

@ -0,0 +1,42 @@
import React, { useCallback } from 'react'
import { RiArrowLeftLine } from '@remixicon/react'
import Button from '@/app/components/base/button'
import { useRouter } from 'next/navigation'
import Effect from '@/app/components/base/effect'
import { useTranslation } from 'react-i18next'
type LeftHeaderProps = {
title: string
}
const LeftHeader = ({
title,
}: LeftHeaderProps) => {
const { t } = useTranslation()
const { back } = useRouter()
const navigateBack = useCallback(() => {
back()
}, [back])
return (
<div className='relative flex flex-col gap-y-0.5 pb-2 pt-4'>
<div className='system-2xs-semibold-uppercase bg-pipeline-add-documents-title-bg bg-clip-text text-transparent'>
{title}
</div>
<div className='system-md-semibold text-text-primary'>
{t('datasetPipeline.addDocuments.steps.processDocuments')}
</div>
<Button
variant='secondary-accent'
className='absolute -left-11 top-3.5 size-9 rounded-full p-0'
onClick={navigateBack}
>
<RiArrowLeftLine className='size-5 ' />
</Button>
<Effect className='left-8 top-[-34px] opacity-20' />
</div>
)
}
export default React.memo(LeftHeader)

View File

@ -0,0 +1,26 @@
import React from 'react'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
type ActionsProps = {
onProcess: () => void
}
const Actions = ({
onProcess,
}: ActionsProps) => {
const { t } = useTranslation()
return (
<div className='flex items-center justify-end'>
<Button
variant='primary'
onClick={onProcess}
>
{t('datasetPipeline.operations.saveAndProcess')}
</Button>
</div>
)
}
export default React.memo(Actions)

View File

@ -0,0 +1,11 @@
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
export const useConfigurations = (documentdId: string) => {
const initialData: Record<string, any> = {}
const configurations: BaseConfiguration[] = []
return {
initialData,
configurations,
}
}

View File

@ -0,0 +1,39 @@
import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
import { useConfigurations } from './hooks'
import Actions from './actions'
import Form from '../../../../create-from-pipeline/process-documents/form'
type ProcessDocumentsProps = {
documentId: string
ref: React.RefObject<any>
onProcess: () => void
onPreview: () => void
onSubmit: (data: Record<string, any>) => void
}
const ProcessDocuments = ({
documentId,
onProcess,
onPreview,
onSubmit,
ref,
}: ProcessDocumentsProps) => {
const { initialData, configurations } = useConfigurations(documentId)
const schema = generateZodSchema(configurations)
return (
<div className='flex flex-col gap-y-4 pt-4'>
<Form
ref={ref}
initialData={initialData}
configurations={configurations}
schema={schema}
onSubmit={onSubmit}
onPreview={onPreview}
/>
<Actions onProcess={onProcess} />
</div>
)
}
export default ProcessDocuments

View File

@ -21,8 +21,8 @@ export const tagKeys = [
export const categoryKeys = [
'model',
'tool',
'datasource',
'agent-strategy',
'extension',
'bundle',
'datasource',
]

View File

@ -44,6 +44,10 @@ const Description = async ({
<span className='relative z-[2] lowercase'>{t('category.tools')}</span>
</span>
,
<span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
<span className='relative z-[2] lowercase'>{t('category.datasources')}</span>
</span>
,
<span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
<span className='relative z-[2] lowercase'>{t('category.agents')}</span>
</span>

View File

@ -21,6 +21,7 @@ export const PLUGIN_TYPE_SEARCH_MAP = {
tool: PluginType.tool,
agent: PluginType.agent,
extension: PluginType.extension,
datasource: PluginType.datasource,
bundle: 'bundle',
}
type PluginTypeSwitchProps = {
@ -56,6 +57,11 @@ const PluginTypeSwitch = ({
text: t('plugin.category.tools'),
icon: <RiHammerLine className='mr-1.5 h-4 w-4' />,
},
{
value: PLUGIN_TYPE_SEARCH_MAP.datasource,
text: t('plugin.category.datasources'),
icon: <RiHammerLine className='mr-1.5 h-4 w-4' />,
},
{
value: PLUGIN_TYPE_SEARCH_MAP.agent,
text: t('plugin.category.agents'),

View File

@ -7,6 +7,7 @@ export enum PluginType {
model = 'model',
extension = 'extension',
agent = 'agent-strategy',
datasource = 'datasource',
}
export enum PluginSource {

View File

@ -71,6 +71,10 @@ const Marketplace = ({
{t('plugin.category.tools')}
</span>
,
<span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
{t('plugin.category.datasources')}
</span>
,
<span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
{t('plugin.category.agents')}
</span>

View File

@ -103,6 +103,9 @@ const translation = {
},
characters: 'characters',
},
documentSettings: {
title: 'Document Settings',
},
}
export default translation

View File

@ -103,6 +103,9 @@ const translation = {
},
characters: '字符',
},
documentSettings: {
title: '文档设置',
},
}
export default translation

View File

@ -6,6 +6,7 @@ import type { MetadataFilteringVariableType } from '@/app/components/workflow/no
import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
import type { DatasourceType } from './pipeline'
export enum DataSourceType {
FILE = 'upload_file',
@ -318,7 +319,7 @@ export type InitialDocumentDetail = {
batch: string
position: number
dataset_id: string
data_source_type: DataSourceType
data_source_type: DataSourceType | DatasourceType
data_source_info: DataSourceInfo
dataset_process_rule_id: string
name: string