refactor: remove job ID handling from website crawl components and update related hooks

This commit is contained in:
twwu 2025-06-06 18:52:32 +08:00
parent 3e2f12b065
commit 9b9640b3db
6 changed files with 97 additions and 26 deletions

View File

@ -9,12 +9,15 @@ import ErrorMessage from './error-message'
import CrawledResult from './crawled-result'
import {
useDraftDatasourceNodeRun,
useDraftDatasourceNodeRunStatus,
useDraftPipelinePreProcessingParams,
usePublishedDatasourceNodeRun,
usePublishedDatasourceNodeRunStatus,
usePublishedPipelinePreProcessingParams,
} from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DatasourceType } from '@/models/pipeline'
import { sleep } from '@/utils'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
@ -22,7 +25,6 @@ type CrawlerProps = {
nodeId: string
checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
headerInfo: {
title: string
docTitle: string
@ -43,7 +45,6 @@ const Crawler = ({
checkedCrawlResult,
headerInfo,
onCheckedCrawlResultChange,
onJobIdChange,
onPreview,
isInPipeline = false,
}: CrawlerProps) => {
@ -74,31 +75,59 @@ const Crawler = ({
const showError = isCrawlFinished && crawlErrorMessage
const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun)
const useDatasourceNodeRunStatus = useRef(!isInPipeline ? usePublishedDatasourceNodeRunStatus : useDraftDatasourceNodeRunStatus)
const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun.current()
const { mutateAsync: getDatasourceNodeRunStatus } = useDatasourceNodeRunStatus.current()
const checkCrawlStatus = useCallback(async (jobId: string) => {
const res = await getDatasourceNodeRunStatus({
node_id: nodeId,
pipeline_id: pipelineId!,
job_id: jobId,
datasource_type: DatasourceType.websiteCrawl,
}, {
onError: async (error: any) => {
const message = await error.json()
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
},
}) as any
if (res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setCrawlErrorMessage('')
setStep(Step.finished)
}
else if (res.status === 'processing') {
await sleep(2500)
await checkCrawlStatus(jobId)
}
}, [getDatasourceNodeRunStatus, nodeId, pipelineId, t, onCheckedCrawlResultChange])
const handleRun = useCallback(async (value: Record<string, any>) => {
setStep(Step.running)
await runDatasourceNode({
const res = await runDatasourceNode({
node_id: nodeId,
pipeline_id: pipelineId!,
inputs: value,
datasource_type: DatasourceType.websiteCrawl,
}, {
onSuccess: (res: any) => {
const jobId = res.job_id
onJobIdChange(jobId)
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setCrawlErrorMessage('')
},
onError: (error) => {
setCrawlErrorMessage(error.message || t(`${I18N_PREFIX}.unknownError`))
},
onSettled: () => {
onError: async (error: any) => {
const message = await error.json()
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
setStep(Step.finished)
},
})
}, [runDatasourceNode, nodeId, pipelineId, onJobIdChange, onCheckedCrawlResultChange, t])
}) as any
const jobId = res.job_id
if (!jobId && res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setStep(Step.finished)
}
else if (jobId) {
await checkCrawlStatus(jobId)
}
setCrawlErrorMessage('')
}, [runDatasourceNode, nodeId, pipelineId, onCheckedCrawlResultChange, checkCrawlStatus, t])
return (
<div className='flex flex-col'>

View File

@ -7,7 +7,6 @@ type WebsiteCrawlProps = {
nodeId: string
checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
headerInfo: {
title: string
docTitle: string
@ -22,7 +21,6 @@ const WebsiteCrawl = ({
checkedCrawlResult,
headerInfo,
onCheckedCrawlResultChange,
onJobIdChange,
onPreview,
isInPipeline,
}: WebsiteCrawlProps) => {
@ -32,7 +30,6 @@ const WebsiteCrawl = ({
checkedCrawlResult={checkedCrawlResult}
headerInfo={headerInfo}
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
onJobIdChange={onJobIdChange}
onPreview={onPreview}
isInPipeline={isInPipeline}
/>

View File

@ -116,12 +116,9 @@ export const useOnlineDocuments = () => {
export const useWebsiteCrawl = () => {
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
return {
websitePages,
websiteCrawlJobId,
setWebsitePages,
setWebsiteCrawlJobId,
}
}

View File

@ -40,9 +40,7 @@ const TestRunPanel = () => {
} = useOnlineDocuments()
const {
websitePages,
// websiteCrawlJobId, // todo: Add status query
setWebsitePages,
setWebsiteCrawlJobId,
} = useWebsiteCrawl()
const { handleRun } = useWorkflowRun()
@ -147,7 +145,6 @@ const TestRunPanel = () => {
docLink: datasource.docLink || '',
}}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
isInPipeline
/>
)}

View File

@ -158,7 +158,26 @@ export type PipelineDatasourceNodeRunRequest = {
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunResponse = Record<string, any>
export type PipelineDatasourceNodeRunResponse = {
job_id?: string
status: 'processing' | 'completed'
result: Record<string, any>
provider_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusRequest = {
pipeline_id: string
node_id: string
job_id: string
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusResponse = {
provider_type: DatasourceType
result: Record<string, any>
status: 'processing' | 'completed'
job_id: string
}
export type PublishedPipelineInfoResponse = {
id: string

View File

@ -10,6 +10,8 @@ import type {
PipelineCheckDependenciesResponse,
PipelineDatasourceNodeRunRequest,
PipelineDatasourceNodeRunResponse,
PipelineDatasourceNodeRunStatusRequest,
PipelineDatasourceNodeRunStatusResponse,
PipelinePreProcessingParamsRequest,
PipelinePreProcessingParamsResponse,
PipelineProcessingParamsRequest,
@ -153,6 +155,36 @@ export const usePublishedDatasourceNodeRun = (
})
}
export const useDraftDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'draft-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const usePublishedDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'published-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => {
const { pipeline_id, node_id } = params
return useQuery<PipelineProcessingParamsResponse>({