2025-05-21 16:37:02 +08:00
|
|
|
import { useTranslation } from 'react-i18next'
|
|
|
|
import { AddDocumentsStep } from './types'
|
2025-06-26 13:46:12 +08:00
|
|
|
import type { DataSourceOption } from '@/app/components/rag-pipeline/components/panel/test-run/types'
|
2025-05-28 18:34:26 +08:00
|
|
|
import { useCallback, useMemo, useRef, useState } from 'react'
|
2025-05-22 14:49:40 +08:00
|
|
|
import { BlockEnum, type Node } from '@/app/components/workflow/types'
|
|
|
|
import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
|
2025-06-23 15:38:24 +08:00
|
|
|
import type { CrawlResult, CrawlResultItem, DocumentItem, FileItem } from '@/models/datasets'
|
|
|
|
import { CrawlStep } from '@/models/datasets'
|
2025-05-28 18:34:26 +08:00
|
|
|
import produce from 'immer'
|
2025-07-01 16:32:21 +08:00
|
|
|
import type { DataSourceNotionPageMap, DataSourceNotionWorkspace, NotionPage } from '@/models/common'
|
2025-07-01 16:54:44 +08:00
|
|
|
import { type OnlineDriveFile, OnlineDriveFileType } from '@/models/pipeline'
|
2025-05-21 16:37:02 +08:00
|
|
|
|
|
|
|
export const useAddDocumentsSteps = () => {
|
|
|
|
const { t } = useTranslation()
|
2025-05-28 18:34:26 +08:00
|
|
|
const [currentStep, setCurrentStep] = useState(1)
|
|
|
|
|
|
|
|
const handleNextStep = useCallback(() => {
|
|
|
|
setCurrentStep(preStep => preStep + 1)
|
|
|
|
}, [])
|
|
|
|
|
|
|
|
const handleBackStep = useCallback(() => {
|
|
|
|
setCurrentStep(preStep => preStep - 1)
|
|
|
|
}, [])
|
|
|
|
|
2025-05-21 16:37:02 +08:00
|
|
|
const steps = [
|
|
|
|
{
|
|
|
|
label: t('datasetPipeline.addDocuments.steps.chooseDatasource'),
|
|
|
|
value: AddDocumentsStep.dataSource,
|
|
|
|
},
|
|
|
|
{
|
2025-05-22 23:05:58 +08:00
|
|
|
label: t('datasetPipeline.addDocuments.steps.processDocuments'),
|
2025-05-21 16:37:02 +08:00
|
|
|
value: AddDocumentsStep.processDocuments,
|
|
|
|
},
|
|
|
|
{
|
2025-05-22 23:05:58 +08:00
|
|
|
label: t('datasetPipeline.addDocuments.steps.processingDocuments'),
|
2025-05-21 16:37:02 +08:00
|
|
|
value: AddDocumentsStep.processingDocuments,
|
|
|
|
},
|
|
|
|
]
|
2025-05-28 18:34:26 +08:00
|
|
|
|
|
|
|
return {
|
|
|
|
steps,
|
|
|
|
currentStep,
|
|
|
|
handleNextStep,
|
|
|
|
handleBackStep,
|
|
|
|
}
|
2025-05-21 16:37:02 +08:00
|
|
|
}
|
2025-05-22 14:49:40 +08:00
|
|
|
|
|
|
|
export const useDatasourceOptions = (pipelineNodes: Node<DataSourceNodeType>[]) => {
|
2025-05-27 11:01:38 +08:00
|
|
|
const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
|
2025-05-22 14:49:40 +08:00
|
|
|
|
|
|
|
const options = useMemo(() => {
|
|
|
|
const options: DataSourceOption[] = []
|
2025-05-27 11:01:38 +08:00
|
|
|
datasourceNodes.forEach((node) => {
|
|
|
|
const label = node.data.title
|
|
|
|
options.push({
|
|
|
|
label,
|
|
|
|
value: node.id,
|
2025-05-27 14:17:55 +08:00
|
|
|
data: node.data,
|
2025-05-27 11:01:38 +08:00
|
|
|
})
|
2025-05-22 14:49:40 +08:00
|
|
|
})
|
2025-06-26 14:24:50 +08:00
|
|
|
if (process.env.NODE_ENV === 'development') {
|
|
|
|
// todo: delete mock data
|
|
|
|
options.push({
|
|
|
|
label: 'Google Drive',
|
|
|
|
value: '123456',
|
|
|
|
// @ts-expect-error mock data
|
|
|
|
data: {
|
|
|
|
datasource_parameters: {},
|
|
|
|
datasource_configurations: {},
|
|
|
|
type: BlockEnum.DataSource,
|
|
|
|
title: 'Google Drive',
|
|
|
|
plugin_id: 'langgenius/google-drive',
|
|
|
|
provider_type: 'online_drive',
|
|
|
|
provider_name: 'google_drive',
|
|
|
|
datasource_name: 'google-drive',
|
|
|
|
datasource_label: 'Google Drive',
|
|
|
|
selected: false,
|
|
|
|
},
|
|
|
|
})
|
|
|
|
}
|
2025-05-22 14:49:40 +08:00
|
|
|
return options
|
2025-05-27 11:01:38 +08:00
|
|
|
}, [datasourceNodes])
|
2025-05-22 14:49:40 +08:00
|
|
|
|
2025-06-26 13:46:12 +08:00
|
|
|
return options
|
2025-05-22 14:49:40 +08:00
|
|
|
}
|
2025-05-28 18:34:26 +08:00
|
|
|
|
|
|
|
export const useLocalFile = () => {
|
|
|
|
const [fileList, setFileList] = useState<FileItem[]>([])
|
|
|
|
const [currentFile, setCurrentFile] = useState<File | undefined>()
|
|
|
|
|
2025-05-29 10:18:11 +08:00
|
|
|
const previewFile = useRef<DocumentItem>()
|
2025-05-28 18:34:26 +08:00
|
|
|
|
|
|
|
const allFileLoaded = useMemo(() => (fileList.length > 0 && fileList.every(file => file.file.id)), [fileList])
|
|
|
|
|
|
|
|
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
|
|
|
|
const newList = produce(list, (draft) => {
|
|
|
|
const targetIndex = draft.findIndex(file => file.fileID === fileItem.fileID)
|
|
|
|
draft[targetIndex] = {
|
|
|
|
...draft[targetIndex],
|
|
|
|
progress,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
setFileList(newList)
|
2025-05-29 10:18:11 +08:00
|
|
|
previewFile.current = newList[0].file as DocumentItem
|
2025-05-28 18:34:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const updateFileList = useCallback((preparedFiles: FileItem[]) => {
|
|
|
|
setFileList(preparedFiles)
|
|
|
|
}, [])
|
|
|
|
|
|
|
|
const updateCurrentFile = useCallback((file: File) => {
|
|
|
|
setCurrentFile(file)
|
|
|
|
}, [])
|
|
|
|
|
|
|
|
const hideFilePreview = useCallback(() => {
|
|
|
|
setCurrentFile(undefined)
|
|
|
|
}, [])
|
|
|
|
|
|
|
|
return {
|
|
|
|
fileList,
|
|
|
|
previewFile,
|
|
|
|
allFileLoaded,
|
|
|
|
updateFile,
|
|
|
|
updateFileList,
|
|
|
|
currentFile,
|
|
|
|
updateCurrentFile,
|
|
|
|
hideFilePreview,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-06-06 10:08:19 +08:00
|
|
|
export const useOnlineDocuments = () => {
|
2025-07-01 16:32:21 +08:00
|
|
|
const [documentsData, setDocumentsData] = useState<DataSourceNotionWorkspace[]>([])
|
|
|
|
const [searchValue, setSearchValue] = useState('')
|
|
|
|
const [currentWorkspaceId, setCurrentWorkspaceId] = useState('')
|
2025-06-06 10:08:19 +08:00
|
|
|
const [onlineDocuments, setOnlineDocuments] = useState<NotionPage[]>([])
|
2025-06-23 16:31:09 +08:00
|
|
|
const [currentDocument, setCurrentDocument] = useState<NotionPage | undefined>()
|
2025-05-28 18:34:26 +08:00
|
|
|
|
2025-07-02 14:56:29 +08:00
|
|
|
const PagesMapAndSelectedPagesId: DataSourceNotionPageMap = useMemo(() => {
|
2025-07-01 16:32:21 +08:00
|
|
|
const pagesMap = (documentsData || []).reduce((prev: DataSourceNotionPageMap, next: DataSourceNotionWorkspace) => {
|
|
|
|
next.pages.forEach((page) => {
|
|
|
|
prev[page.page_id] = {
|
|
|
|
...page,
|
|
|
|
workspace_id: next.workspace_id,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return prev
|
|
|
|
}, {})
|
2025-07-02 14:56:29 +08:00
|
|
|
return pagesMap
|
2025-07-01 16:32:21 +08:00
|
|
|
}, [documentsData])
|
2025-07-02 14:56:29 +08:00
|
|
|
const defaultSelectedPagesId = [...(onlineDocuments.map(doc => doc.page_id) || [])]
|
2025-07-01 16:32:21 +08:00
|
|
|
const [selectedPagesId, setSelectedPagesId] = useState<Set<string>>(new Set(defaultSelectedPagesId))
|
|
|
|
|
2025-06-06 10:08:19 +08:00
|
|
|
const previewOnlineDocument = useRef<NotionPage>(onlineDocuments[0])
|
2025-05-28 18:34:26 +08:00
|
|
|
|
2025-06-06 10:08:19 +08:00
|
|
|
const updateOnlineDocuments = (value: NotionPage[]) => {
|
|
|
|
setOnlineDocuments(value)
|
2025-05-28 18:34:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const updateCurrentPage = useCallback((page: NotionPage) => {
|
2025-06-23 16:31:09 +08:00
|
|
|
setCurrentDocument(page)
|
2025-05-28 18:34:26 +08:00
|
|
|
}, [])
|
|
|
|
|
2025-06-06 10:08:19 +08:00
|
|
|
const hideOnlineDocumentPreview = useCallback(() => {
|
2025-06-23 16:31:09 +08:00
|
|
|
setCurrentDocument(undefined)
|
2025-05-28 18:34:26 +08:00
|
|
|
}, [])
|
|
|
|
|
|
|
|
return {
|
2025-07-01 16:32:21 +08:00
|
|
|
documentsData,
|
|
|
|
setDocumentsData,
|
|
|
|
searchValue,
|
|
|
|
setSearchValue,
|
|
|
|
currentWorkspaceId,
|
|
|
|
setCurrentWorkspaceId,
|
|
|
|
PagesMapAndSelectedPagesId,
|
|
|
|
selectedPagesId,
|
|
|
|
setSelectedPagesId,
|
2025-06-06 10:08:19 +08:00
|
|
|
onlineDocuments,
|
|
|
|
previewOnlineDocument,
|
|
|
|
updateOnlineDocuments,
|
2025-06-23 16:31:09 +08:00
|
|
|
currentDocument,
|
2025-05-28 18:34:26 +08:00
|
|
|
updateCurrentPage,
|
2025-06-06 10:08:19 +08:00
|
|
|
hideOnlineDocumentPreview,
|
2025-05-28 18:34:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export const useWebsiteCrawl = () => {
|
|
|
|
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
|
|
|
|
const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>()
|
2025-06-23 15:38:24 +08:00
|
|
|
const [crawlResult, setCrawlResult] = useState<CrawlResult | undefined>()
|
|
|
|
const [step, setStep] = useState<CrawlStep>(CrawlStep.init)
|
|
|
|
const [previewIndex, setPreviewIndex] = useState<number>(-1)
|
2025-05-28 18:34:26 +08:00
|
|
|
|
|
|
|
const previewWebsitePage = useRef<CrawlResultItem>(websitePages[0])
|
|
|
|
|
2025-06-23 15:38:24 +08:00
|
|
|
const updateCurrentWebsite = useCallback((website: CrawlResultItem, index: number) => {
|
2025-05-28 18:34:26 +08:00
|
|
|
setCurrentWebsite(website)
|
2025-06-23 15:38:24 +08:00
|
|
|
setPreviewIndex(index)
|
2025-05-28 18:34:26 +08:00
|
|
|
}, [])
|
|
|
|
|
|
|
|
const hideWebsitePreview = useCallback(() => {
|
|
|
|
setCurrentWebsite(undefined)
|
2025-06-23 15:38:24 +08:00
|
|
|
setPreviewIndex(-1)
|
2025-05-28 18:34:26 +08:00
|
|
|
}, [])
|
|
|
|
|
2025-06-06 17:00:34 +08:00
|
|
|
const updataCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => {
|
|
|
|
setWebsitePages(checkedCrawlResult)
|
|
|
|
previewWebsitePage.current = checkedCrawlResult[0]
|
|
|
|
}, [])
|
|
|
|
|
2025-05-28 18:34:26 +08:00
|
|
|
return {
|
|
|
|
websitePages,
|
2025-06-23 15:38:24 +08:00
|
|
|
crawlResult,
|
|
|
|
setCrawlResult,
|
|
|
|
step,
|
|
|
|
setStep,
|
2025-05-28 18:34:26 +08:00
|
|
|
previewWebsitePage,
|
2025-06-06 17:00:34 +08:00
|
|
|
updataCheckedCrawlResultChange,
|
2025-05-28 18:34:26 +08:00
|
|
|
currentWebsite,
|
|
|
|
updateCurrentWebsite,
|
2025-06-23 15:38:24 +08:00
|
|
|
previewIndex,
|
2025-05-28 18:34:26 +08:00
|
|
|
hideWebsitePreview,
|
|
|
|
}
|
|
|
|
}
|
2025-07-01 16:54:44 +08:00
|
|
|
|
|
|
|
export const useOnlineDrive = () => {
|
|
|
|
const [prefix, setPrefix] = useState<string[]>([])
|
|
|
|
const [keywords, setKeywords] = useState('')
|
|
|
|
const [startAfter, setStartAfter] = useState('')
|
|
|
|
const [selectedFileList, setSelectedFileList] = useState<string[]>([])
|
|
|
|
const [fileList, setFileList] = useState<OnlineDriveFile[]>([
|
|
|
|
{
|
|
|
|
key: 'Bucket_1',
|
|
|
|
size: 1024, // unit bytes
|
|
|
|
type: OnlineDriveFileType.bucket,
|
|
|
|
},
|
|
|
|
])
|
|
|
|
|
|
|
|
return {
|
|
|
|
prefix,
|
|
|
|
setPrefix,
|
|
|
|
keywords,
|
|
|
|
setKeywords,
|
|
|
|
startAfter,
|
|
|
|
setStartAfter,
|
|
|
|
selectedFileList,
|
|
|
|
setSelectedFileList,
|
|
|
|
fileList,
|
|
|
|
setFileList,
|
|
|
|
}
|
|
|
|
}
|