mirror of
https://github.com/langgenius/dify.git
synced 2025-11-02 03:43:12 +00:00
refactor: website data source components and hooks
This commit is contained in:
parent
cf73faf174
commit
20343facad
@ -23,16 +23,14 @@ const Actions = ({
|
||||
return CustomActions(form)
|
||||
|
||||
return (
|
||||
<div className='flex items-center justify-end p-4 pt-2'>
|
||||
<Button
|
||||
variant='primary'
|
||||
disabled = {isSubmitting || !canSubmit}
|
||||
loading={isSubmitting}
|
||||
onClick={() => form.handleSubmit()}
|
||||
>
|
||||
{t('common.operation.submit')}
|
||||
</Button>
|
||||
</div>
|
||||
<Button
|
||||
variant='primary'
|
||||
disabled={isSubmitting || !canSubmit}
|
||||
loading={isSubmitting}
|
||||
onClick={() => form.handleSubmit()}
|
||||
>
|
||||
{t('common.operation.submit')}
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -156,6 +156,7 @@ const BaseField = ({
|
||||
allowed_file_extensions: allowedFileExtensions,
|
||||
allowed_file_types: allowedFileTypes,
|
||||
allowed_file_upload_methods: allowedFileUploadMethods,
|
||||
number_limits: 1,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
@ -45,7 +45,7 @@ export default function WorkspaceSelector({
|
||||
<MenuItems
|
||||
className='absolute left-0 top-8 z-10 w-80
|
||||
origin-top-right rounded-lg border-[0.5px]
|
||||
border-components-panel-border bg-components-panel-bg-blur shadow-lg shadow-shadow-shadow-5 backdrop-blur-[5px]'
|
||||
border-components-panel-border bg-components-panel-bg-blur shadow-lg shadow-shadow-shadow-5'
|
||||
>
|
||||
<div className="max-h-50 overflow-auto p-1">
|
||||
{
|
||||
|
||||
@ -6,9 +6,9 @@ import { RiBookOpenLine, RiEqualizer2Line } from '@remixicon/react'
|
||||
|
||||
type HeaderProps = {
|
||||
isInPipeline?: boolean
|
||||
onClickConfiguration: () => void
|
||||
onClickConfiguration?: () => void
|
||||
title: string
|
||||
buttonText: string
|
||||
buttonText?: string
|
||||
docTitle: string
|
||||
docLink: string
|
||||
}
|
||||
@ -31,21 +31,21 @@ const Header = ({
|
||||
{title}
|
||||
</div>
|
||||
<Divider type='vertical' className='mx-1 h-3.5' />
|
||||
<Button
|
||||
variant='secondary'
|
||||
size='small'
|
||||
className={cn(isInPipeline ? 'px-1' : 'px-1.5')}
|
||||
>
|
||||
<RiEqualizer2Line
|
||||
className='h-4 w-4'
|
||||
onClick={onClickConfiguration}
|
||||
/>
|
||||
{!isInPipeline && (
|
||||
{!isInPipeline && (
|
||||
<Button
|
||||
variant='secondary'
|
||||
size='small'
|
||||
className='px-1.5'
|
||||
>
|
||||
<RiEqualizer2Line
|
||||
className='h-4 w-4'
|
||||
onClick={onClickConfiguration}
|
||||
/>
|
||||
<span className='system-xs-medium'>
|
||||
{buttonText}
|
||||
</span>
|
||||
)}
|
||||
</Button>
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
<a
|
||||
className='system-xs-medium flex items-center gap-x-1 overflow-hidden text-text-accent'
|
||||
|
||||
@ -0,0 +1,133 @@
|
||||
'use client'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import Header from '@/app/components/datasets/create/website/base/header'
|
||||
import Options from '../base/options'
|
||||
import Crawling from '../base/crawling'
|
||||
import ErrorMessage from '../base/error-message'
|
||||
import CrawledResult from '../base/crawled-result'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import { useDatasourceNodeRun } from '@/service/use-pipeline'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import { useWebCrawlerHeaderInfo } from '../../../hooks'
|
||||
import type { DataSourceProvider } from '@/models/common'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
type CrawlerProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
datasourceProvider: DataSourceProvider
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
}
|
||||
|
||||
enum Step {
|
||||
init = 'init',
|
||||
running = 'running',
|
||||
finished = 'finished',
|
||||
}
|
||||
|
||||
const Crawler = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
datasourceProvider,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
}: CrawlerProps) => {
|
||||
const { t } = useTranslation()
|
||||
const [step, setStep] = useState<Step>(Step.init)
|
||||
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
||||
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
|
||||
|
||||
const headerInfoMap = useWebCrawlerHeaderInfo()
|
||||
|
||||
useEffect(() => {
|
||||
if (step !== Step.init)
|
||||
setControlFoldOptions(Date.now())
|
||||
}, [step])
|
||||
|
||||
const isInit = step === Step.init
|
||||
const isCrawlFinished = step === Step.finished
|
||||
const isRunning = step === Step.running
|
||||
const [crawlResult, setCrawlResult] = useState<{
|
||||
result: CrawlResultItem[]
|
||||
time_consuming: number | string
|
||||
} | undefined>(undefined)
|
||||
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
||||
const showError = isCrawlFinished && crawlErrorMessage
|
||||
|
||||
const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun()
|
||||
|
||||
const handleRun = useCallback(async (value: Record<string, any>) => {
|
||||
setStep(Step.running)
|
||||
await runDatasourceNode({
|
||||
node_id: nodeId,
|
||||
pipeline_id: pipelineId!,
|
||||
inputs: value,
|
||||
}, {
|
||||
onSuccess: (res: any) => {
|
||||
const jobId = res.job_id
|
||||
onJobIdChange(jobId)
|
||||
setCrawlResult(res)
|
||||
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
|
||||
setCrawlErrorMessage('')
|
||||
},
|
||||
onError: (error) => {
|
||||
setCrawlErrorMessage(error.message || t(`${I18N_PREFIX}.unknownError`))
|
||||
},
|
||||
onSettled: () => {
|
||||
setStep(Step.finished)
|
||||
},
|
||||
})
|
||||
}, [runDatasourceNode, nodeId, pipelineId, onJobIdChange, onCheckedCrawlResultChange, t])
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Header
|
||||
isInPipeline
|
||||
{...headerInfoMap[datasourceProvider]}
|
||||
/>
|
||||
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
|
||||
<Options
|
||||
variables={variables}
|
||||
isRunning={isRunning}
|
||||
controlFoldOptions={controlFoldOptions}
|
||||
onSubmit={(value) => {
|
||||
handleRun(value)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
{!isInit && (
|
||||
<div className='relative'>
|
||||
{isRunning && (
|
||||
<Crawling
|
||||
crawledNum={0}
|
||||
totalNum={0}
|
||||
/>
|
||||
)}
|
||||
{showError && (
|
||||
<ErrorMessage
|
||||
className='mt-2'
|
||||
title={t(`${I18N_PREFIX}.exceptionErrorTitle`)}
|
||||
errorMsg={crawlErrorMessage}
|
||||
/>
|
||||
)}
|
||||
{isCrawlFinished && !showError && (
|
||||
<CrawledResult
|
||||
className='mt-2'
|
||||
list={crawlResult?.result || []}
|
||||
checkedList={checkedCrawlResult}
|
||||
onSelectedChange={onCheckedCrawlResultChange}
|
||||
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
export default React.memo(Crawler)
|
||||
@ -0,0 +1,50 @@
|
||||
import type { BaseConfiguration, BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { PipelineInputVarType, type RAGPipelineVariables } from '@/models/pipeline'
|
||||
import { useMemo } from 'react'
|
||||
|
||||
export const useInitialData = (variables: RAGPipelineVariables) => {
|
||||
const initialData = useMemo(() => {
|
||||
const initialData: Record<string, any> = {}
|
||||
variables.forEach((item) => {
|
||||
if ([PipelineInputVarType.textInput, PipelineInputVarType.paragraph, PipelineInputVarType.select].includes(item.type))
|
||||
initialData[item.variable] = item.default_value || ''
|
||||
if (item.type === PipelineInputVarType.number)
|
||||
initialData[item.variable] = item.default_value || 0
|
||||
if ([PipelineInputVarType.singleFile, PipelineInputVarType.multiFiles].includes(item.type))
|
||||
initialData[item.variable] = item.default_value || []
|
||||
if (item.type === PipelineInputVarType.checkbox)
|
||||
initialData[item.variable] = item.default_value || true
|
||||
})
|
||||
return initialData
|
||||
}, [variables])
|
||||
|
||||
return initialData
|
||||
}
|
||||
|
||||
export const useConfigurations = (variables: RAGPipelineVariables) => {
|
||||
const configurations = useMemo(() => {
|
||||
const configurations: BaseConfiguration[] = []
|
||||
variables.forEach((item) => {
|
||||
configurations.push({
|
||||
type: item.type as unknown as BaseFieldType,
|
||||
variable: item.variable,
|
||||
label: item.label,
|
||||
required: item.required,
|
||||
placeholder: item.placeholder,
|
||||
tooltip: item.tooltips,
|
||||
options: item.options?.map(option => ({
|
||||
label: option,
|
||||
value: option,
|
||||
})),
|
||||
maxLength: item.max_length,
|
||||
showConditions: [],
|
||||
allowedFileUploadMethods: item.allowed_file_upload_methods,
|
||||
allowedFileTypes: item.allowed_file_types,
|
||||
allowedFileExtensions: item.allowed_file_extensions,
|
||||
})
|
||||
})
|
||||
return configurations
|
||||
}, [variables])
|
||||
|
||||
return configurations
|
||||
}
|
||||
@ -1,35 +1,39 @@
|
||||
import Button from '@/app/components/base/button'
|
||||
import { useAppForm } from '@/app/components/base/form'
|
||||
import BaseField from '@/app/components/base/form/form-scenarios/base/field'
|
||||
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { ArrowDownRoundFill } from '@/app/components/base/icons/src/vender/solid/general'
|
||||
import cn from '@/utils/classnames'
|
||||
import { RiPlayLargeLine } from '@remixicon/react'
|
||||
import { useBoolean } from 'ahooks'
|
||||
import { useEffect } from 'react'
|
||||
import { useEffect, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import type { ZodSchema } from 'zod'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import { useConfigurations, useInitialData } from './hooks'
|
||||
import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
type OptionsProps = {
|
||||
initialData: Record<string, any>
|
||||
configurations: BaseConfiguration[]
|
||||
variables: RAGPipelineVariables
|
||||
isRunning: boolean
|
||||
controlFoldOptions?: number
|
||||
schema: ZodSchema
|
||||
onSubmit: (data: Record<string, any>) => void
|
||||
}
|
||||
|
||||
const Options = ({
|
||||
initialData,
|
||||
configurations,
|
||||
variables,
|
||||
isRunning,
|
||||
controlFoldOptions,
|
||||
schema,
|
||||
onSubmit,
|
||||
}: OptionsProps) => {
|
||||
const { t } = useTranslation()
|
||||
const initialData = useInitialData(variables)
|
||||
const configurations = useConfigurations(variables)
|
||||
const schema = useMemo(() => {
|
||||
return generateZodSchema(configurations)
|
||||
}, [configurations])
|
||||
|
||||
const form = useAppForm({
|
||||
defaultValues: initialData,
|
||||
validators: {
|
||||
@ -53,8 +57,6 @@ const Options = ({
|
||||
},
|
||||
})
|
||||
|
||||
const { t } = useTranslation()
|
||||
|
||||
const [fold, {
|
||||
toggle: foldToggle,
|
||||
setTrue: foldHide,
|
||||
@ -1,89 +0,0 @@
|
||||
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { z } from 'zod'
|
||||
|
||||
const ERROR_I18N_PREFIX = 'common.errorMsg'
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
export const useConfigurations = () => {
|
||||
const { t } = useTranslation()
|
||||
const configurations: BaseConfiguration[] = [
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'url',
|
||||
label: 'URL',
|
||||
required: true,
|
||||
showConditions: [],
|
||||
placeholder: 'https://docs.dify.ai',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.numberInput,
|
||||
variable: 'limit',
|
||||
label: t(`${I18N_PREFIX}.limit`),
|
||||
required: true,
|
||||
showConditions: [],
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.numberInput,
|
||||
variable: 'max_depth',
|
||||
label: t(`${I18N_PREFIX}.maxDepth`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
tooltip: t(`${I18N_PREFIX}.maxDepthTooltip`),
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'excludes',
|
||||
label: t(`${I18N_PREFIX}.excludePaths`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
placeholder: 'blog/*, /about/*',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'includes',
|
||||
label: t(`${I18N_PREFIX}.includeOnlyPaths`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
placeholder: 'articles/*',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.checkbox,
|
||||
variable: 'crawl_sub_pages',
|
||||
label: t(`${I18N_PREFIX}.crawlSubPage`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.checkbox,
|
||||
variable: 'only_main_content',
|
||||
label: t(`${I18N_PREFIX}.extractOnlyMainContent`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
},
|
||||
]
|
||||
|
||||
return configurations
|
||||
}
|
||||
|
||||
export const useSchema = () => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const Schema = z.object({
|
||||
url: z.string().nonempty({
|
||||
message: t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: 'url',
|
||||
}),
|
||||
}).regex(/^https?:\/\//, {
|
||||
message: t(`${ERROR_I18N_PREFIX}.urlError`),
|
||||
}),
|
||||
limit: z.number().positive({
|
||||
message: t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: t(`${I18N_PREFIX}.limit`),
|
||||
}),
|
||||
}).int(),
|
||||
}).passthrough()
|
||||
|
||||
return Schema
|
||||
}
|
||||
@ -1,202 +1,34 @@
|
||||
'use client'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useModalContextSelector } from '@/context/modal-context'
|
||||
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
||||
import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets'
|
||||
import { sleep } from '@/utils'
|
||||
import Header from '@/app/components/datasets/create/website/base/header'
|
||||
import Options from '../base/options'
|
||||
import { useConfigurations, useSchema } from './hooks'
|
||||
import Crawling from '../base/crawling'
|
||||
import ErrorMessage from '../base/error-message'
|
||||
import CrawledResult from '../base/crawled-result'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
import React from 'react'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import Crawler from '../base/crawler'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
|
||||
type FireCrawlProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
crawlOptions: CrawlOptions
|
||||
onCrawlOptionsChange: (payload: CrawlOptions) => void
|
||||
}
|
||||
|
||||
enum Step {
|
||||
init = 'init',
|
||||
running = 'running',
|
||||
finished = 'finished',
|
||||
}
|
||||
|
||||
const FireCrawl = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
crawlOptions,
|
||||
onCrawlOptionsChange,
|
||||
}: FireCrawlProps) => {
|
||||
const { t } = useTranslation()
|
||||
const [step, setStep] = useState<Step>(Step.init)
|
||||
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
||||
const configurations = useConfigurations()
|
||||
const schema = useSchema()
|
||||
|
||||
useEffect(() => {
|
||||
if (step !== Step.init)
|
||||
setControlFoldOptions(Date.now())
|
||||
}, [step])
|
||||
|
||||
const setShowAccountSettingModal = useModalContextSelector(s => s.setShowAccountSettingModal)
|
||||
const handleSetting = useCallback(() => {
|
||||
setShowAccountSettingModal({
|
||||
payload: 'data-source',
|
||||
})
|
||||
}, [setShowAccountSettingModal])
|
||||
|
||||
const isInit = step === Step.init
|
||||
const isCrawlFinished = step === Step.finished
|
||||
const isRunning = step === Step.running
|
||||
const [crawlResult, setCrawlResult] = useState<{
|
||||
current: number
|
||||
total: number
|
||||
data: CrawlResultItem[]
|
||||
time_consuming: number | string
|
||||
} | undefined>(undefined)
|
||||
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
||||
const showError = isCrawlFinished && crawlErrorMessage
|
||||
|
||||
const waitForCrawlFinished = useCallback(async (jobId: string) => {
|
||||
try {
|
||||
const res = await checkFirecrawlTaskStatus(jobId) as any
|
||||
if (res.status === 'completed') {
|
||||
return {
|
||||
isError: false,
|
||||
data: {
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
},
|
||||
}
|
||||
}
|
||||
if (res.status === 'error' || !res.status) {
|
||||
// can't get the error message from the firecrawl api
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: res.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
// update the progress
|
||||
setCrawlResult({
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
})
|
||||
onCheckedCrawlResultChange(res.data || []) // default select the crawl result
|
||||
await sleep(2500)
|
||||
return await waitForCrawlFinished(jobId)
|
||||
}
|
||||
catch (e: any) {
|
||||
const errorBody = await e.json()
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: errorBody.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
}, [crawlOptions.limit, onCheckedCrawlResultChange])
|
||||
|
||||
const handleRun = useCallback(async (value: Record<string, any>) => {
|
||||
const { url, ...crawlOptions } = value
|
||||
onCrawlOptionsChange(crawlOptions as CrawlOptions)
|
||||
setStep(Step.running)
|
||||
try {
|
||||
const passToServerCrawlOptions: any = {
|
||||
...crawlOptions,
|
||||
}
|
||||
if (crawlOptions.max_depth === '')
|
||||
delete passToServerCrawlOptions.max_depth
|
||||
|
||||
const res = await createFirecrawlTask({
|
||||
url,
|
||||
options: passToServerCrawlOptions,
|
||||
}) as any
|
||||
const jobId = res.job_id
|
||||
onJobIdChange(jobId)
|
||||
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
|
||||
if (isError) {
|
||||
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
|
||||
}
|
||||
else {
|
||||
setCrawlResult(data)
|
||||
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
|
||||
setCrawlErrorMessage('')
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
|
||||
console.log(e)
|
||||
}
|
||||
finally {
|
||||
setStep(Step.finished)
|
||||
}
|
||||
}, [onCrawlOptionsChange, onJobIdChange, t, waitForCrawlFinished, onCheckedCrawlResultChange])
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Header
|
||||
isInPipeline
|
||||
onClickConfiguration={handleSetting}
|
||||
title={t(`${I18N_PREFIX}.firecrawlTitle`)}
|
||||
buttonText={t(`${I18N_PREFIX}.configureFirecrawl`)}
|
||||
docTitle={t(`${I18N_PREFIX}.firecrawlDoc`)}
|
||||
docLink={'https://docs.firecrawl.dev/introduction'}
|
||||
/>
|
||||
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
|
||||
<Options
|
||||
initialData={{
|
||||
...crawlOptions,
|
||||
url: '',
|
||||
}}
|
||||
configurations={configurations}
|
||||
isRunning={isRunning}
|
||||
controlFoldOptions={controlFoldOptions}
|
||||
schema={schema}
|
||||
onSubmit={(value) => {
|
||||
handleRun(value)
|
||||
console.log('submit')
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
{!isInit && (
|
||||
<div className='relative'>
|
||||
{isRunning && (
|
||||
<Crawling
|
||||
crawledNum={crawlResult?.current || 0}
|
||||
totalNum={crawlResult?.total || Number.parseFloat(crawlOptions.limit as string) || 0}
|
||||
/>
|
||||
)}
|
||||
{showError && (
|
||||
<ErrorMessage
|
||||
className='mt-2'
|
||||
title={t(`${I18N_PREFIX}.exceptionErrorTitle`)}
|
||||
errorMsg={crawlErrorMessage}
|
||||
/>
|
||||
)}
|
||||
{isCrawlFinished && !showError && (
|
||||
<CrawledResult
|
||||
className='mt-2'
|
||||
list={crawlResult?.data || []}
|
||||
checkedList={checkedCrawlResult}
|
||||
onSelectedChange={onCheckedCrawlResultChange}
|
||||
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<Crawler
|
||||
nodeId={nodeId}
|
||||
variables={variables}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
datasourceProvider={DataSourceProvider.fireCrawl}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
/>
|
||||
)
|
||||
}
|
||||
export default React.memo(FireCrawl)
|
||||
export default FireCrawl
|
||||
|
||||
@ -1,66 +0,0 @@
|
||||
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { z } from 'zod'
|
||||
|
||||
const ERROR_I18N_PREFIX = 'common.errorMsg'
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
export const useConfigurations = () => {
|
||||
const { t } = useTranslation()
|
||||
const configurations: BaseConfiguration[] = [
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'url',
|
||||
label: 'URL',
|
||||
required: true,
|
||||
showConditions: [],
|
||||
placeholder: 'https://docs.dify.ai',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.numberInput,
|
||||
variable: 'limit',
|
||||
label: t(`${I18N_PREFIX}.limit`),
|
||||
required: true,
|
||||
showConditions: [],
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.checkbox,
|
||||
variable: 'crawl_sub_pages',
|
||||
label: t(`${I18N_PREFIX}.crawlSubPage`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.checkbox,
|
||||
variable: 'use_sitemap',
|
||||
label: t(`${I18N_PREFIX}.useSitemap`),
|
||||
tooltip: t(`${I18N_PREFIX}.useSitemapTooltip`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
},
|
||||
]
|
||||
|
||||
return configurations
|
||||
}
|
||||
|
||||
export const useSchema = () => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const Schema = z.object({
|
||||
url: z.string().nonempty({
|
||||
message: t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: 'url',
|
||||
}),
|
||||
}).regex(/^https?:\/\//, {
|
||||
message: t(`${ERROR_I18N_PREFIX}.urlError`),
|
||||
}),
|
||||
limit: z.number().positive({
|
||||
message: t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: t(`${I18N_PREFIX}.limit`),
|
||||
}),
|
||||
}).int(),
|
||||
}).passthrough()
|
||||
|
||||
return Schema
|
||||
}
|
||||
@ -1,215 +1,34 @@
|
||||
'use client'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import CrawledResult from '../base/crawled-result'
|
||||
import Crawling from '../base/crawling'
|
||||
import ErrorMessage from '../base/error-message'
|
||||
import { useModalContextSelector } from '@/context/modal-context'
|
||||
import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datasets'
|
||||
import { sleep } from '@/utils'
|
||||
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
||||
import Header from '@/app/components/datasets/create/website/base/header'
|
||||
import Options from '../base/options'
|
||||
import { useConfigurations, useSchema } from './hooks'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
import React from 'react'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import Crawler from '../base/crawler'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
|
||||
type JinaReaderProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
crawlOptions: CrawlOptions
|
||||
onCrawlOptionsChange: (payload: CrawlOptions) => void
|
||||
}
|
||||
|
||||
enum Step {
|
||||
init = 'init',
|
||||
running = 'running',
|
||||
finished = 'finished',
|
||||
}
|
||||
|
||||
const JinaReader = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
crawlOptions,
|
||||
onCrawlOptionsChange,
|
||||
}: JinaReaderProps) => {
|
||||
const { t } = useTranslation()
|
||||
const [step, setStep] = useState<Step>(Step.init)
|
||||
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
||||
const configurations = useConfigurations()
|
||||
const schema = useSchema()
|
||||
|
||||
useEffect(() => {
|
||||
if (step !== Step.init)
|
||||
setControlFoldOptions(Date.now())
|
||||
}, [step])
|
||||
|
||||
const setShowAccountSettingModal = useModalContextSelector(state => state.setShowAccountSettingModal)
|
||||
const handleSetting = useCallback(() => {
|
||||
setShowAccountSettingModal({
|
||||
payload: 'data-source',
|
||||
})
|
||||
}, [setShowAccountSettingModal])
|
||||
|
||||
const isInit = step === Step.init
|
||||
const isCrawlFinished = step === Step.finished
|
||||
const isRunning = step === Step.running
|
||||
const [crawlResult, setCrawlResult] = useState<{
|
||||
current: number
|
||||
total: number
|
||||
data: CrawlResultItem[]
|
||||
time_consuming: number | string
|
||||
} | undefined>(undefined)
|
||||
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
||||
const showError = isCrawlFinished && crawlErrorMessage
|
||||
|
||||
const waitForCrawlFinished = useCallback(async (jobId: string) => {
|
||||
try {
|
||||
const res = await checkJinaReaderTaskStatus(jobId) as any
|
||||
if (res.status === 'completed') {
|
||||
return {
|
||||
isError: false,
|
||||
data: {
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
},
|
||||
}
|
||||
}
|
||||
if (res.status === 'failed' || !res.status) {
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: res.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
// update the progress
|
||||
setCrawlResult({
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
})
|
||||
onCheckedCrawlResultChange(res.data || []) // default select the crawl result
|
||||
await sleep(2500)
|
||||
return await waitForCrawlFinished(jobId)
|
||||
}
|
||||
catch (e: any) {
|
||||
const errorBody = await e.json()
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: errorBody.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
}, [crawlOptions.limit, onCheckedCrawlResultChange])
|
||||
|
||||
const handleRun = useCallback(async (value: Record<string, any>) => {
|
||||
const { url, ...crawlOptions } = value
|
||||
onCrawlOptionsChange(crawlOptions as CrawlOptions)
|
||||
setStep(Step.running)
|
||||
try {
|
||||
const startTime = Date.now()
|
||||
const res = await createJinaReaderTask({
|
||||
url,
|
||||
options: crawlOptions,
|
||||
}) as any
|
||||
|
||||
if (res.data) {
|
||||
const data = {
|
||||
current: 1,
|
||||
total: 1,
|
||||
data: [{
|
||||
title: res.data.title,
|
||||
markdown: res.data.content,
|
||||
description: res.data.description,
|
||||
source_url: res.data.url,
|
||||
}],
|
||||
time_consuming: (Date.now() - startTime) / 1000,
|
||||
}
|
||||
setCrawlResult(data)
|
||||
onCheckedCrawlResultChange(data.data || [])
|
||||
setCrawlErrorMessage('')
|
||||
}
|
||||
else if (res.job_id) {
|
||||
const jobId = res.job_id
|
||||
onJobIdChange(jobId)
|
||||
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
|
||||
if (isError) {
|
||||
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
|
||||
}
|
||||
else {
|
||||
setCrawlResult(data)
|
||||
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
|
||||
setCrawlErrorMessage('')
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
|
||||
console.log(e)
|
||||
}
|
||||
finally {
|
||||
setStep(Step.finished)
|
||||
}
|
||||
}, [onCrawlOptionsChange, onCheckedCrawlResultChange, onJobIdChange, t, waitForCrawlFinished])
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Header
|
||||
isInPipeline
|
||||
onClickConfiguration={handleSetting}
|
||||
title={t(`${I18N_PREFIX}.jinaReaderTitle`)}
|
||||
buttonText={t(`${I18N_PREFIX}.configureJinaReader`)}
|
||||
docTitle={t(`${I18N_PREFIX}.jinaReaderDoc`)}
|
||||
docLink={'https://jina.ai/reader'}
|
||||
/>
|
||||
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
|
||||
<Options
|
||||
initialData={{
|
||||
...crawlOptions,
|
||||
url: '',
|
||||
}}
|
||||
configurations={configurations}
|
||||
isRunning={isRunning}
|
||||
controlFoldOptions={controlFoldOptions}
|
||||
schema={schema}
|
||||
onSubmit={(value) => {
|
||||
handleRun(value)
|
||||
console.log('submit')
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
{!isInit && (
|
||||
<div className='relative'>
|
||||
{isRunning && (
|
||||
<Crawling
|
||||
crawledNum={crawlResult?.current || 0}
|
||||
totalNum={crawlResult?.total || Number.parseFloat(crawlOptions.limit as string) || 0}
|
||||
/>
|
||||
)}
|
||||
{showError && (
|
||||
<ErrorMessage
|
||||
className='mt-2'
|
||||
title={t(`${I18N_PREFIX}.exceptionErrorTitle`)}
|
||||
errorMsg={crawlErrorMessage}
|
||||
/>
|
||||
)}
|
||||
{isCrawlFinished && !showError && (
|
||||
<CrawledResult
|
||||
className='mt-2'
|
||||
list={crawlResult?.data || []}
|
||||
checkedList={checkedCrawlResult}
|
||||
onSelectedChange={onCheckedCrawlResultChange}
|
||||
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<Crawler
|
||||
nodeId={nodeId}
|
||||
variables={variables}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
datasourceProvider={DataSourceProvider.jinaReader}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
/>
|
||||
)
|
||||
}
|
||||
export default React.memo(JinaReader)
|
||||
|
||||
@ -1,89 +0,0 @@
|
||||
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { z } from 'zod'
|
||||
|
||||
const ERROR_I18N_PREFIX = 'common.errorMsg'
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
export const useConfigurations = () => {
|
||||
const { t } = useTranslation()
|
||||
const configurations: BaseConfiguration[] = [
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'url',
|
||||
label: 'URL',
|
||||
required: true,
|
||||
showConditions: [],
|
||||
placeholder: 'https://docs.dify.ai',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.numberInput,
|
||||
variable: 'limit',
|
||||
label: t(`${I18N_PREFIX}.limit`),
|
||||
required: true,
|
||||
showConditions: [],
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.numberInput,
|
||||
variable: 'max_depth',
|
||||
label: t(`${I18N_PREFIX}.maxDepth`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
tooltip: t(`${I18N_PREFIX}.maxDepthTooltip`),
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'excludes',
|
||||
label: t(`${I18N_PREFIX}.excludePaths`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
placeholder: 'blog/*, /about/*',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.textInput,
|
||||
variable: 'includes',
|
||||
label: t(`${I18N_PREFIX}.includeOnlyPaths`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
placeholder: 'articles/*',
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.checkbox,
|
||||
variable: 'crawl_sub_pages',
|
||||
label: t(`${I18N_PREFIX}.crawlSubPage`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
},
|
||||
{
|
||||
type: BaseFieldType.checkbox,
|
||||
variable: 'only_main_content',
|
||||
label: t(`${I18N_PREFIX}.extractOnlyMainContent`),
|
||||
required: false,
|
||||
showConditions: [],
|
||||
},
|
||||
]
|
||||
|
||||
return configurations
|
||||
}
|
||||
|
||||
export const useSchema = () => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const Schema = z.object({
|
||||
url: z.string().nonempty({
|
||||
message: t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: 'url',
|
||||
}),
|
||||
}).regex(/^https?:\/\//, {
|
||||
message: t(`${ERROR_I18N_PREFIX}.urlError`),
|
||||
}),
|
||||
limit: z.number().positive({
|
||||
message: t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: t(`${I18N_PREFIX}.limit`),
|
||||
}),
|
||||
}).int(),
|
||||
}).passthrough()
|
||||
|
||||
return Schema
|
||||
}
|
||||
@ -1,202 +1,34 @@
|
||||
'use client'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useModalContextSelector } from '@/context/modal-context'
|
||||
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
||||
import { checkWatercrawlTaskStatus, createWatercrawlTask } from '@/service/datasets'
|
||||
import { sleep } from '@/utils'
|
||||
import Header from '@/app/components/datasets/create/website/base/header'
|
||||
import Options from '../base/options'
|
||||
import { useConfigurations, useSchema } from './hooks'
|
||||
import Crawling from '../base/crawling'
|
||||
import ErrorMessage from '../base/error-message'
|
||||
import CrawledResult from '../base/crawled-result'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
import React from 'react'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import Crawler from '../base/crawler'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
|
||||
type WaterCrawlProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
crawlOptions: CrawlOptions
|
||||
onCrawlOptionsChange: (payload: CrawlOptions) => void
|
||||
}
|
||||
|
||||
enum Step {
|
||||
init = 'init',
|
||||
running = 'running',
|
||||
finished = 'finished',
|
||||
}
|
||||
|
||||
const WaterCrawl = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
crawlOptions,
|
||||
onCrawlOptionsChange,
|
||||
}: WaterCrawlProps) => {
|
||||
const { t } = useTranslation()
|
||||
const [step, setStep] = useState<Step>(Step.init)
|
||||
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
||||
const configurations = useConfigurations()
|
||||
const schema = useSchema()
|
||||
|
||||
useEffect(() => {
|
||||
if (step !== Step.init)
|
||||
setControlFoldOptions(Date.now())
|
||||
}, [step])
|
||||
|
||||
const setShowAccountSettingModal = useModalContextSelector(state => state.setShowAccountSettingModal)
|
||||
const handleSetting = useCallback(() => {
|
||||
setShowAccountSettingModal({
|
||||
payload: 'data-source',
|
||||
})
|
||||
}, [setShowAccountSettingModal])
|
||||
|
||||
const isInit = step === Step.init
|
||||
const isCrawlFinished = step === Step.finished
|
||||
const isRunning = step === Step.running
|
||||
const [crawlResult, setCrawlResult] = useState<{
|
||||
current: number
|
||||
total: number
|
||||
data: CrawlResultItem[]
|
||||
time_consuming: number | string
|
||||
} | undefined>(undefined)
|
||||
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
||||
const showError = isCrawlFinished && crawlErrorMessage
|
||||
|
||||
const waitForCrawlFinished = useCallback(async (jobId: string): Promise<any> => {
|
||||
try {
|
||||
const res = await checkWatercrawlTaskStatus(jobId) as any
|
||||
if (res.status === 'completed') {
|
||||
return {
|
||||
isError: false,
|
||||
data: {
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
},
|
||||
}
|
||||
}
|
||||
if (res.status === 'error' || !res.status) {
|
||||
// can't get the error message from the watercrawl api
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: res.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
// update the progress
|
||||
setCrawlResult({
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
})
|
||||
onCheckedCrawlResultChange(res.data || []) // default select the crawl result
|
||||
await sleep(2500)
|
||||
return await waitForCrawlFinished(jobId)
|
||||
}
|
||||
catch (e: any) {
|
||||
const errorBody = await e.json()
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: errorBody.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
}, [crawlOptions.limit, onCheckedCrawlResultChange])
|
||||
|
||||
const handleRun = useCallback(async (value: Record<string, any>) => {
|
||||
const { url, ...crawlOptions } = value
|
||||
onCrawlOptionsChange(crawlOptions as CrawlOptions)
|
||||
setStep(Step.running)
|
||||
try {
|
||||
const passToServerCrawlOptions: any = {
|
||||
...crawlOptions,
|
||||
}
|
||||
if (crawlOptions.max_depth === '')
|
||||
delete passToServerCrawlOptions.max_depth
|
||||
|
||||
const res = await createWatercrawlTask({
|
||||
url,
|
||||
options: passToServerCrawlOptions,
|
||||
}) as any
|
||||
const jobId = res.job_id
|
||||
onJobIdChange(jobId)
|
||||
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
|
||||
if (isError) {
|
||||
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
|
||||
}
|
||||
else {
|
||||
setCrawlResult(data)
|
||||
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
|
||||
setCrawlErrorMessage('')
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
|
||||
console.log(e)
|
||||
}
|
||||
finally {
|
||||
setStep(Step.finished)
|
||||
}
|
||||
}, [onCrawlOptionsChange, onCheckedCrawlResultChange, onJobIdChange, t, waitForCrawlFinished])
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Header
|
||||
isInPipeline
|
||||
onClickConfiguration={handleSetting}
|
||||
title={t(`${I18N_PREFIX}.watercrawlTitle`)}
|
||||
buttonText={t(`${I18N_PREFIX}.configureWatercrawl`)}
|
||||
docTitle={t(`${I18N_PREFIX}.watercrawlDoc`)}
|
||||
docLink={'https://docs.watercrawl.dev/'}
|
||||
/>
|
||||
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
|
||||
<Options
|
||||
initialData={{
|
||||
...crawlOptions,
|
||||
url: '',
|
||||
}}
|
||||
configurations={configurations}
|
||||
isRunning={isRunning}
|
||||
controlFoldOptions={controlFoldOptions}
|
||||
schema={schema}
|
||||
onSubmit={(value) => {
|
||||
handleRun(value)
|
||||
console.log('submit')
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
{!isInit && (
|
||||
<div className='relative'>
|
||||
{isRunning && (
|
||||
<Crawling
|
||||
crawledNum={crawlResult?.current || 0}
|
||||
totalNum={crawlResult?.total || Number.parseFloat(crawlOptions.limit as string) || 0}
|
||||
/>
|
||||
)}
|
||||
{showError && (
|
||||
<ErrorMessage
|
||||
className='mt-2'
|
||||
title={t(`${I18N_PREFIX}.exceptionErrorTitle`)}
|
||||
errorMsg={crawlErrorMessage}
|
||||
/>
|
||||
)}
|
||||
{isCrawlFinished && !showError && (
|
||||
<CrawledResult
|
||||
className='mt-2'
|
||||
list={crawlResult?.data || []}
|
||||
checkedList={checkedCrawlResult}
|
||||
onSelectedChange={onCheckedCrawlResultChange}
|
||||
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<Crawler
|
||||
nodeId={nodeId}
|
||||
variables={variables}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
datasourceProvider={DataSourceProvider.jinaReader}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
/>
|
||||
)
|
||||
}
|
||||
export default React.memo(WaterCrawl)
|
||||
|
||||
@ -49,7 +49,7 @@ export const useConfigurations = (datasourceNodeId: string) => {
|
||||
value: option,
|
||||
})),
|
||||
showConditions: [],
|
||||
default: item.default,
|
||||
default: item.default_value,
|
||||
}))
|
||||
return configs
|
||||
}, [paramsConfig])
|
||||
|
||||
@ -50,7 +50,7 @@ export const useDatasourceOptions = () => {
|
||||
return {
|
||||
nodeId: node.id,
|
||||
type,
|
||||
config: {},
|
||||
variables: node.data.variables,
|
||||
}
|
||||
})
|
||||
}, [nodes])
|
||||
@ -98,3 +98,31 @@ export const useDatasourceOptions = () => {
|
||||
}, [datasources, t])
|
||||
return { datasources, options }
|
||||
}
|
||||
|
||||
export const useWebCrawlerHeaderInfo = () => {
|
||||
const { t } = useTranslation()
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
const headerInfoMap: Record<DataSourceProvider, {
|
||||
title: string
|
||||
docTitle: string
|
||||
docLink: string
|
||||
}> = {
|
||||
[DataSourceProvider.fireCrawl]: {
|
||||
title: t(`${I18N_PREFIX}.firecrawlTitle`),
|
||||
docTitle: t(`${I18N_PREFIX}.firecrawlDoc`),
|
||||
docLink: 'https://docs.firecrawl.dev/introduction',
|
||||
},
|
||||
[DataSourceProvider.jinaReader]: {
|
||||
title: t(`${I18N_PREFIX}.jinaReaderTitle`),
|
||||
docTitle: t(`${I18N_PREFIX}.jinaReaderDoc`),
|
||||
docLink: 'https://jina.ai/reader',
|
||||
},
|
||||
[DataSourceProvider.waterCrawl]: {
|
||||
title: t(`${I18N_PREFIX}.watercrawlTitle`),
|
||||
docTitle: t(`${I18N_PREFIX}.watercrawlDoc`),
|
||||
docLink: 'https://docs.watercrawl.dev/',
|
||||
},
|
||||
}
|
||||
return headerInfoMap
|
||||
}
|
||||
|
||||
@ -4,7 +4,7 @@ import { useCallback, useMemo, useState } from 'react'
|
||||
import StepIndicator from './step-indicator'
|
||||
import { useTestRunSteps } from './hooks'
|
||||
import DataSourceOptions from './data-source-options'
|
||||
import type { CrawlOptions, CrawlResultItem, FileItem } from '@/models/datasets'
|
||||
import type { CrawlResultItem, FileItem } from '@/models/datasets'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import LocalFile from './data-source/local-file'
|
||||
import produce from 'immer'
|
||||
@ -12,7 +12,6 @@ import { useProviderContextSelector } from '@/context/provider-context'
|
||||
import { DataSourceProvider, type NotionPage } from '@/models/common'
|
||||
import Notion from './data-source/notion'
|
||||
import VectorSpaceFull from '@/app/components/billing/vector-space-full'
|
||||
import { DEFAULT_CRAWL_OPTIONS } from './consts'
|
||||
import Firecrawl from './data-source/website/firecrawl'
|
||||
import JinaReader from './data-source/website/jina-reader'
|
||||
import WaterCrawl from './data-source/website/water-crawl'
|
||||
@ -31,7 +30,6 @@ const TestRunPanel = () => {
|
||||
const [notionPages, setNotionPages] = useState<NotionPage[]>([])
|
||||
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
|
||||
const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
|
||||
const [crawlOptions, setCrawlOptions] = useState<CrawlOptions>(DEFAULT_CRAWL_OPTIONS)
|
||||
|
||||
const plan = useProviderContextSelector(state => state.plan)
|
||||
const enableBilling = useProviderContextSelector(state => state.enableBilling)
|
||||
@ -159,35 +157,36 @@ const TestRunPanel = () => {
|
||||
)}
|
||||
{datasource?.type === DataSourceType.NOTION && (
|
||||
<Notion
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
notionPages={notionPages}
|
||||
updateNotionPages={updateNotionPages}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.fireCrawl && (
|
||||
<Firecrawl
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={setCrawlOptions}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.jinaReader && (
|
||||
<JinaReader
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={setCrawlOptions}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.waterCrawl && (
|
||||
<WaterCrawl
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={setCrawlOptions}
|
||||
/>
|
||||
)}
|
||||
{isShowVectorSpaceFull && (
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import type { DataSourceProvider } from '@/models/common'
|
||||
import type { DataSourceType } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
|
||||
export enum TestRunStep {
|
||||
dataSource = 'dataSource',
|
||||
@ -15,5 +16,5 @@ export type DataSourceOption = {
|
||||
export type Datasource = {
|
||||
nodeId: string
|
||||
type: DataSourceType | DataSourceProvider
|
||||
config: any
|
||||
variables: RAGPipelineVariables
|
||||
}
|
||||
|
||||
@ -98,12 +98,12 @@ export type PipelineCheckDependenciesResponse = {
|
||||
}
|
||||
|
||||
export enum PipelineInputVarType {
|
||||
textInput = 'text-input',
|
||||
textInput = 'textInput',
|
||||
paragraph = 'paragraph',
|
||||
select = 'select',
|
||||
number = 'number',
|
||||
number = 'numberInput',
|
||||
singleFile = 'file',
|
||||
multiFiles = 'file-list',
|
||||
multiFiles = 'fileList',
|
||||
checkbox = 'checkbox',
|
||||
}
|
||||
|
||||
@ -142,23 +142,4 @@ export type PipelineDatasourceNodeRunRequest = {
|
||||
inputs: Record<string, any>
|
||||
}
|
||||
|
||||
export type PipelineDatasourceNodeRunResponse = {
|
||||
id: string
|
||||
inputs: Record<string, any>
|
||||
process_data: Record<string, any>
|
||||
outputs: Record<string, any>
|
||||
status: string
|
||||
error?: string
|
||||
elapsed_time: number
|
||||
execution_metadata: {
|
||||
total_tokens: number
|
||||
total_price: number
|
||||
currency?: string
|
||||
}
|
||||
extras: {
|
||||
icon: string | object
|
||||
}
|
||||
created_at: string
|
||||
created_by: string
|
||||
finished_at: string
|
||||
}
|
||||
export type PipelineDatasourceNodeRunResponse = Record<string, any>
|
||||
|
||||
@ -9,6 +9,7 @@ import type {
|
||||
ImportPipelineDSLResponse,
|
||||
PipelineCheckDependenciesResponse,
|
||||
PipelineDatasourceNodeRunRequest,
|
||||
PipelineDatasourceNodeRunResponse,
|
||||
PipelineProcessingParamsRequest,
|
||||
PipelineProcessingParamsResponse,
|
||||
PipelineTemplateByIdResponse,
|
||||
@ -115,15 +116,18 @@ export const useCheckPipelineDependencies = (
|
||||
})
|
||||
}
|
||||
|
||||
export const useDatasourceNodeRun = () => {
|
||||
export const useDatasourceNodeRun = (
|
||||
mutationOptions: MutationOptions<PipelineDatasourceNodeRunResponse, Error, PipelineDatasourceNodeRunRequest> = {},
|
||||
) => {
|
||||
return useMutation({
|
||||
mutationKey: [NAME_SPACE, 'datasource-node-run'],
|
||||
mutationFn: (request: PipelineDatasourceNodeRunRequest) => {
|
||||
const { pipeline_id, node_id, ...rest } = request
|
||||
return post(`/rag/pipelines/${pipeline_id}/workflows/published/nodes/${node_id}/run`, {
|
||||
return post<PipelineDatasourceNodeRunResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/nodes/${node_id}/run`, {
|
||||
body: rest,
|
||||
})
|
||||
},
|
||||
...mutationOptions,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user