refactor: remove job ID handling from website crawl components and update related hooks

This commit is contained in:
twwu 2025-06-06 18:52:32 +08:00
parent 3e2f12b065
commit 9b9640b3db
6 changed files with 97 additions and 26 deletions

View File

@ -9,12 +9,15 @@ import ErrorMessage from './error-message'
import CrawledResult from './crawled-result' import CrawledResult from './crawled-result'
import { import {
useDraftDatasourceNodeRun, useDraftDatasourceNodeRun,
useDraftDatasourceNodeRunStatus,
useDraftPipelinePreProcessingParams, useDraftPipelinePreProcessingParams,
usePublishedDatasourceNodeRun, usePublishedDatasourceNodeRun,
usePublishedDatasourceNodeRunStatus,
usePublishedPipelinePreProcessingParams, usePublishedPipelinePreProcessingParams,
} from '@/service/use-pipeline' } from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DatasourceType } from '@/models/pipeline' import { DatasourceType } from '@/models/pipeline'
import { sleep } from '@/utils'
const I18N_PREFIX = 'datasetCreation.stepOne.website' const I18N_PREFIX = 'datasetCreation.stepOne.website'
@ -22,7 +25,6 @@ type CrawlerProps = {
nodeId: string nodeId: string
checkedCrawlResult: CrawlResultItem[] checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
headerInfo: { headerInfo: {
title: string title: string
docTitle: string docTitle: string
@ -43,7 +45,6 @@ const Crawler = ({
checkedCrawlResult, checkedCrawlResult,
headerInfo, headerInfo,
onCheckedCrawlResultChange, onCheckedCrawlResultChange,
onJobIdChange,
onPreview, onPreview,
isInPipeline = false, isInPipeline = false,
}: CrawlerProps) => { }: CrawlerProps) => {
@ -74,31 +75,59 @@ const Crawler = ({
const showError = isCrawlFinished && crawlErrorMessage const showError = isCrawlFinished && crawlErrorMessage
const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun) const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun)
const useDatasourceNodeRunStatus = useRef(!isInPipeline ? usePublishedDatasourceNodeRunStatus : useDraftDatasourceNodeRunStatus)
const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun.current() const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun.current()
const { mutateAsync: getDatasourceNodeRunStatus } = useDatasourceNodeRunStatus.current()
const checkCrawlStatus = useCallback(async (jobId: string) => {
const res = await getDatasourceNodeRunStatus({
node_id: nodeId,
pipeline_id: pipelineId!,
job_id: jobId,
datasource_type: DatasourceType.websiteCrawl,
}, {
onError: async (error: any) => {
const message = await error.json()
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
},
}) as any
if (res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setCrawlErrorMessage('')
setStep(Step.finished)
}
else if (res.status === 'processing') {
await sleep(2500)
await checkCrawlStatus(jobId)
}
}, [getDatasourceNodeRunStatus, nodeId, pipelineId, t, onCheckedCrawlResultChange])
const handleRun = useCallback(async (value: Record<string, any>) => { const handleRun = useCallback(async (value: Record<string, any>) => {
setStep(Step.running) setStep(Step.running)
await runDatasourceNode({ const res = await runDatasourceNode({
node_id: nodeId, node_id: nodeId,
pipeline_id: pipelineId!, pipeline_id: pipelineId!,
inputs: value, inputs: value,
datasource_type: DatasourceType.websiteCrawl, datasource_type: DatasourceType.websiteCrawl,
}, { }, {
onSuccess: (res: any) => { onError: async (error: any) => {
const jobId = res.job_id const message = await error.json()
onJobIdChange(jobId) setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setCrawlErrorMessage('')
},
onError: (error) => {
setCrawlErrorMessage(error.message || t(`${I18N_PREFIX}.unknownError`))
},
onSettled: () => {
setStep(Step.finished) setStep(Step.finished)
}, },
}) }) as any
}, [runDatasourceNode, nodeId, pipelineId, onJobIdChange, onCheckedCrawlResultChange, t]) const jobId = res.job_id
if (!jobId && res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setStep(Step.finished)
}
else if (jobId) {
await checkCrawlStatus(jobId)
}
setCrawlErrorMessage('')
}, [runDatasourceNode, nodeId, pipelineId, onCheckedCrawlResultChange, checkCrawlStatus, t])
return ( return (
<div className='flex flex-col'> <div className='flex flex-col'>

View File

@ -7,7 +7,6 @@ type WebsiteCrawlProps = {
nodeId: string nodeId: string
checkedCrawlResult: CrawlResultItem[] checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
headerInfo: { headerInfo: {
title: string title: string
docTitle: string docTitle: string
@ -22,7 +21,6 @@ const WebsiteCrawl = ({
checkedCrawlResult, checkedCrawlResult,
headerInfo, headerInfo,
onCheckedCrawlResultChange, onCheckedCrawlResultChange,
onJobIdChange,
onPreview, onPreview,
isInPipeline, isInPipeline,
}: WebsiteCrawlProps) => { }: WebsiteCrawlProps) => {
@ -32,7 +30,6 @@ const WebsiteCrawl = ({
checkedCrawlResult={checkedCrawlResult} checkedCrawlResult={checkedCrawlResult}
headerInfo={headerInfo} headerInfo={headerInfo}
onCheckedCrawlResultChange={onCheckedCrawlResultChange} onCheckedCrawlResultChange={onCheckedCrawlResultChange}
onJobIdChange={onJobIdChange}
onPreview={onPreview} onPreview={onPreview}
isInPipeline={isInPipeline} isInPipeline={isInPipeline}
/> />

View File

@ -116,12 +116,9 @@ export const useOnlineDocuments = () => {
export const useWebsiteCrawl = () => { export const useWebsiteCrawl = () => {
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([]) const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
return { return {
websitePages, websitePages,
websiteCrawlJobId,
setWebsitePages, setWebsitePages,
setWebsiteCrawlJobId,
} }
} }

View File

@ -40,9 +40,7 @@ const TestRunPanel = () => {
} = useOnlineDocuments() } = useOnlineDocuments()
const { const {
websitePages, websitePages,
// websiteCrawlJobId, // todo: Add status query
setWebsitePages, setWebsitePages,
setWebsiteCrawlJobId,
} = useWebsiteCrawl() } = useWebsiteCrawl()
const { handleRun } = useWorkflowRun() const { handleRun } = useWorkflowRun()
@ -147,7 +145,6 @@ const TestRunPanel = () => {
docLink: datasource.docLink || '', docLink: datasource.docLink || '',
}} }}
onCheckedCrawlResultChange={setWebsitePages} onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
isInPipeline isInPipeline
/> />
)} )}

View File

@ -158,7 +158,26 @@ export type PipelineDatasourceNodeRunRequest = {
datasource_type: DatasourceType datasource_type: DatasourceType
} }
export type PipelineDatasourceNodeRunResponse = Record<string, any> export type PipelineDatasourceNodeRunResponse = {
job_id?: string
status: 'processing' | 'completed'
result: Record<string, any>
provider_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusRequest = {
pipeline_id: string
node_id: string
job_id: string
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusResponse = {
provider_type: DatasourceType
result: Record<string, any>
status: 'processing' | 'completed'
job_id: string
}
export type PublishedPipelineInfoResponse = { export type PublishedPipelineInfoResponse = {
id: string id: string

View File

@ -10,6 +10,8 @@ import type {
PipelineCheckDependenciesResponse, PipelineCheckDependenciesResponse,
PipelineDatasourceNodeRunRequest, PipelineDatasourceNodeRunRequest,
PipelineDatasourceNodeRunResponse, PipelineDatasourceNodeRunResponse,
PipelineDatasourceNodeRunStatusRequest,
PipelineDatasourceNodeRunStatusResponse,
PipelinePreProcessingParamsRequest, PipelinePreProcessingParamsRequest,
PipelinePreProcessingParamsResponse, PipelinePreProcessingParamsResponse,
PipelineProcessingParamsRequest, PipelineProcessingParamsRequest,
@ -153,6 +155,36 @@ export const usePublishedDatasourceNodeRun = (
}) })
} }
export const useDraftDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'draft-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const usePublishedDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'published-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => { export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => {
const { pipeline_id, node_id } = params const { pipeline_id, node_id } = params
return useQuery<PipelineProcessingParamsResponse>({ return useQuery<PipelineProcessingParamsResponse>({