From 9b9640b3db00b4fe1688af5bbb38da6fb0ce5a7b Mon Sep 17 00:00:00 2001 From: twwu Date: Fri, 6 Jun 2025 18:52:32 +0800 Subject: [PATCH] refactor: remove job ID handling from website crawl components and update related hooks --- .../website-crawl/base/crawler.tsx | 61 ++++++++++++++----- .../data-source/website-crawl/index.tsx | 3 - .../components/panel/test-run/hooks.ts | 3 - .../components/panel/test-run/index.tsx | 3 - web/models/pipeline.ts | 21 ++++++- web/service/use-pipeline.ts | 32 ++++++++++ 6 files changed, 97 insertions(+), 26 deletions(-) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx index f91fd08630..622b16bae7 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx @@ -9,12 +9,15 @@ import ErrorMessage from './error-message' import CrawledResult from './crawled-result' import { useDraftDatasourceNodeRun, + useDraftDatasourceNodeRunStatus, useDraftPipelinePreProcessingParams, usePublishedDatasourceNodeRun, + usePublishedDatasourceNodeRunStatus, usePublishedPipelinePreProcessingParams, } from '@/service/use-pipeline' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import { DatasourceType } from '@/models/pipeline' +import { sleep } from '@/utils' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -22,7 +25,6 @@ type CrawlerProps = { nodeId: string checkedCrawlResult: CrawlResultItem[] onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void - onJobIdChange: (jobId: string) => void headerInfo: { title: string docTitle: string @@ -43,7 +45,6 @@ const Crawler = ({ checkedCrawlResult, headerInfo, onCheckedCrawlResultChange, - onJobIdChange, onPreview, isInPipeline = false, }: CrawlerProps) => { @@ -74,31 +75,59 @@ const Crawler = ({ const showError = isCrawlFinished && crawlErrorMessage const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun) + const useDatasourceNodeRunStatus = useRef(!isInPipeline ? usePublishedDatasourceNodeRunStatus : useDraftDatasourceNodeRunStatus) const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun.current() + const { mutateAsync: getDatasourceNodeRunStatus } = useDatasourceNodeRunStatus.current() + + const checkCrawlStatus = useCallback(async (jobId: string) => { + const res = await getDatasourceNodeRunStatus({ + node_id: nodeId, + pipeline_id: pipelineId!, + job_id: jobId, + datasource_type: DatasourceType.websiteCrawl, + }, { + onError: async (error: any) => { + const message = await error.json() + setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`)) + }, + }) as any + if (res.status === 'completed') { + setCrawlResult(res) + onCheckedCrawlResultChange(res.result || []) // default select the crawl result + setCrawlErrorMessage('') + setStep(Step.finished) + } + else if (res.status === 'processing') { + await sleep(2500) + await checkCrawlStatus(jobId) + } + }, [getDatasourceNodeRunStatus, nodeId, pipelineId, t, onCheckedCrawlResultChange]) const handleRun = useCallback(async (value: Record) => { setStep(Step.running) - await runDatasourceNode({ + const res = await runDatasourceNode({ node_id: nodeId, pipeline_id: pipelineId!, inputs: value, datasource_type: DatasourceType.websiteCrawl, }, { - onSuccess: (res: any) => { - const jobId = res.job_id - onJobIdChange(jobId) - setCrawlResult(res) - onCheckedCrawlResultChange(res.result || []) // default select the crawl result - setCrawlErrorMessage('') - }, - onError: (error) => { - setCrawlErrorMessage(error.message || t(`${I18N_PREFIX}.unknownError`)) - }, - onSettled: () => { + onError: async (error: any) => { + const message = await error.json() + setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`)) setStep(Step.finished) }, - }) - }, [runDatasourceNode, nodeId, pipelineId, onJobIdChange, onCheckedCrawlResultChange, t]) + }) as any + const jobId = res.job_id + if (!jobId && res.status === 'completed') { + setCrawlResult(res) + onCheckedCrawlResultChange(res.result || []) // default select the crawl result + setStep(Step.finished) + } + else if (jobId) { + await checkCrawlStatus(jobId) + } + setCrawlErrorMessage('') + }, [runDatasourceNode, nodeId, pipelineId, onCheckedCrawlResultChange, checkCrawlStatus, t]) return (
diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx index 90548c60b3..2ee9b4db6c 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx @@ -7,7 +7,6 @@ type WebsiteCrawlProps = { nodeId: string checkedCrawlResult: CrawlResultItem[] onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void - onJobIdChange: (jobId: string) => void headerInfo: { title: string docTitle: string @@ -22,7 +21,6 @@ const WebsiteCrawl = ({ checkedCrawlResult, headerInfo, onCheckedCrawlResultChange, - onJobIdChange, onPreview, isInPipeline, }: WebsiteCrawlProps) => { @@ -32,7 +30,6 @@ const WebsiteCrawl = ({ checkedCrawlResult={checkedCrawlResult} headerInfo={headerInfo} onCheckedCrawlResultChange={onCheckedCrawlResultChange} - onJobIdChange={onJobIdChange} onPreview={onPreview} isInPipeline={isInPipeline} /> diff --git a/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts b/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts index 20dca3b4bc..e3f32e07ac 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts +++ b/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts @@ -116,12 +116,9 @@ export const useOnlineDocuments = () => { export const useWebsiteCrawl = () => { const [websitePages, setWebsitePages] = useState([]) - const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('') return { websitePages, - websiteCrawlJobId, setWebsitePages, - setWebsiteCrawlJobId, } } diff --git a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx index 7b59575210..391cd36e1c 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx @@ -40,9 +40,7 @@ const TestRunPanel = () => { } = useOnlineDocuments() const { websitePages, - // websiteCrawlJobId, // todo: Add status query setWebsitePages, - setWebsiteCrawlJobId, } = useWebsiteCrawl() const { handleRun } = useWorkflowRun() @@ -147,7 +145,6 @@ const TestRunPanel = () => { docLink: datasource.docLink || '', }} onCheckedCrawlResultChange={setWebsitePages} - onJobIdChange={setWebsiteCrawlJobId} isInPipeline /> )} diff --git a/web/models/pipeline.ts b/web/models/pipeline.ts index 18df4333da..ed07b1eac6 100644 --- a/web/models/pipeline.ts +++ b/web/models/pipeline.ts @@ -158,7 +158,26 @@ export type PipelineDatasourceNodeRunRequest = { datasource_type: DatasourceType } -export type PipelineDatasourceNodeRunResponse = Record +export type PipelineDatasourceNodeRunResponse = { + job_id?: string + status: 'processing' | 'completed' + result: Record + provider_type: DatasourceType +} + +export type PipelineDatasourceNodeRunStatusRequest = { + pipeline_id: string + node_id: string + job_id: string + datasource_type: DatasourceType +} + +export type PipelineDatasourceNodeRunStatusResponse = { + provider_type: DatasourceType + result: Record + status: 'processing' | 'completed' + job_id: string +} export type PublishedPipelineInfoResponse = { id: string diff --git a/web/service/use-pipeline.ts b/web/service/use-pipeline.ts index 22d0c076ca..4736b28948 100644 --- a/web/service/use-pipeline.ts +++ b/web/service/use-pipeline.ts @@ -10,6 +10,8 @@ import type { PipelineCheckDependenciesResponse, PipelineDatasourceNodeRunRequest, PipelineDatasourceNodeRunResponse, + PipelineDatasourceNodeRunStatusRequest, + PipelineDatasourceNodeRunStatusResponse, PipelinePreProcessingParamsRequest, PipelinePreProcessingParamsResponse, PipelineProcessingParamsRequest, @@ -153,6 +155,36 @@ export const usePublishedDatasourceNodeRun = ( }) } +export const useDraftDatasourceNodeRunStatus = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationKey: [NAME_SPACE, 'draft-datasource-node-run-status'], + mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => { + const { pipeline_id, node_id, ...rest } = request + return post(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, { + body: rest, + }) + }, + ...mutationOptions, + }) +} + +export const usePublishedDatasourceNodeRunStatus = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationKey: [NAME_SPACE, 'published-datasource-node-run-status'], + mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => { + const { pipeline_id, node_id, ...rest } = request + return post(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, { + body: rest, + }) + }, + ...mutationOptions, + }) +} + export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => { const { pipeline_id, node_id } = params return useQuery({