| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 'use client' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | import React, { useCallback, useEffect, useState } from 'react' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import { useTranslation } from 'react-i18next' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | import AppUnavailable from '../../base/app-unavailable' | 
					
						
							| 
									
										
										
										
											2024-04-04 15:54:59 +08:00
										 |  |  | import { ModelTypeEnum } from '../../header/account-setting/model-provider-page/declarations' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import StepOne from './step-one' | 
					
						
							|  |  |  | import StepTwo from './step-two' | 
					
						
							|  |  |  | import StepThree from './step-three' | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  | import { TopBar } from './top-bar' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | import { DataSourceType } from '@/models/datasets' | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  | import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets' | 
					
						
							| 
									
										
										
										
											2023-08-22 14:55:20 +08:00
										 |  |  | import { fetchDataSource } from '@/service/common' | 
					
						
							| 
									
										
										
										
											2023-09-27 10:31:46 +08:00
										 |  |  | import { fetchDatasetDetail } from '@/service/datasets' | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  | import { DataSourceProvider, type NotionPage } from '@/models/common' | 
					
						
							| 
									
										
										
										
											2023-11-06 19:36:32 +08:00
										 |  |  | import { useModalContext } from '@/context/modal-context' | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | type DatasetUpdateFormProps = { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   datasetId?: string | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  | const DEFAULT_CRAWL_OPTIONS: CrawlOptions = { | 
					
						
							|  |  |  |   crawl_sub_pages: true, | 
					
						
							|  |  |  |   only_main_content: true, | 
					
						
							|  |  |  |   includes: '', | 
					
						
							|  |  |  |   excludes: '', | 
					
						
							|  |  |  |   limit: 10, | 
					
						
							|  |  |  |   max_depth: '', | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  |   use_sitemap: true, | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { | 
					
						
							|  |  |  |   const { t } = useTranslation() | 
					
						
							| 
									
										
										
										
											2023-11-06 19:36:32 +08:00
										 |  |  |   const { setShowAccountSettingModal } = useModalContext() | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   const [hasConnection, setHasConnection] = useState(true) | 
					
						
							|  |  |  |   const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const [step, setStep] = useState(1) | 
					
						
							|  |  |  |   const [indexingTypeCache, setIndexTypeCache] = useState('') | 
					
						
							| 
									
										
										
										
											2024-12-26 12:01:51 +08:00
										 |  |  |   const [retrievalMethodCache, setRetrievalMethodCache] = useState('') | 
					
						
							| 
									
										
										
										
											2023-08-16 23:14:27 +08:00
										 |  |  |   const [fileList, setFiles] = useState<FileItem[]>([]) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const [result, setResult] = useState<createDocumentResponse | undefined>() | 
					
						
							|  |  |  |   const [hasError, setHasError] = useState(false) | 
					
						
							| 
									
										
										
										
											2024-04-04 15:54:59 +08:00
										 |  |  |   const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-28 19:48:53 +08:00
										 |  |  |   const [notionPages, setNotionPages] = useState<NotionPage[]>([]) | 
					
						
							|  |  |  |   const updateNotionPages = (value: NotionPage[]) => { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |     setNotionPages(value) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |   const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([]) | 
					
						
							|  |  |  |   const [crawlOptions, setCrawlOptions] = useState<CrawlOptions>(DEFAULT_CRAWL_OPTIONS) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-16 23:14:27 +08:00
										 |  |  |   const updateFileList = (preparedFiles: FileItem[]) => { | 
					
						
							| 
									
										
										
										
											2023-06-21 09:44:01 +08:00
										 |  |  |     setFiles(preparedFiles) | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  |   const [websiteCrawlProvider, setWebsiteCrawlProvider] = useState<DataSourceProvider>(DataSourceProvider.fireCrawl) | 
					
						
							|  |  |  |   const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('') | 
					
						
							| 
									
										
										
										
											2023-06-21 09:44:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-16 23:14:27 +08:00
										 |  |  |   const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => { | 
					
						
							|  |  |  |     const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID) | 
					
						
							| 
									
										
										
										
											2023-06-21 09:44:01 +08:00
										 |  |  |     list[targetIndex] = { | 
					
						
							|  |  |  |       ...list[targetIndex], | 
					
						
							|  |  |  |       progress, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     setFiles([...list]) | 
					
						
							|  |  |  |     // use follow code would cause dirty list update problem
 | 
					
						
							|  |  |  |     // const newList = list.map((file) => {
 | 
					
						
							|  |  |  |     //   if (file.fileID === fileItem.fileID) {
 | 
					
						
							|  |  |  |     //     return {
 | 
					
						
							|  |  |  |     //       ...fileItem,
 | 
					
						
							|  |  |  |     //       progress,
 | 
					
						
							|  |  |  |     //     }
 | 
					
						
							|  |  |  |     //   }
 | 
					
						
							|  |  |  |     //   return file
 | 
					
						
							|  |  |  |     // })
 | 
					
						
							|  |  |  |     // setFiles(newList)
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   } | 
					
						
							|  |  |  |   const updateIndexingTypeCache = (type: string) => { | 
					
						
							|  |  |  |     setIndexTypeCache(type) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   const updateResultCache = (res?: createDocumentResponse) => { | 
					
						
							|  |  |  |     setResult(res) | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2024-12-26 12:01:51 +08:00
										 |  |  |   const updateRetrievalMethodCache = (method: string) => { | 
					
						
							|  |  |  |     setRetrievalMethodCache(method) | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   const nextStep = useCallback(() => { | 
					
						
							|  |  |  |     setStep(step + 1) | 
					
						
							|  |  |  |   }, [step, setStep]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const changeStep = useCallback((delta: number) => { | 
					
						
							|  |  |  |     setStep(step + delta) | 
					
						
							|  |  |  |   }, [step, setStep]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   const checkNotionConnection = async () => { | 
					
						
							|  |  |  |     const { data } = await fetchDataSource({ url: '/data-source/integrates' }) | 
					
						
							|  |  |  |     const hasConnection = data.filter(item => item.provider === 'notion') || [] | 
					
						
							|  |  |  |     setHasConnection(hasConnection.length > 0) | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   useEffect(() => { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |     checkNotionConnection() | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   }, []) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const [detail, setDetail] = useState<DataSet | null>(null) | 
					
						
							|  |  |  |   useEffect(() => { | 
					
						
							|  |  |  |     (async () => { | 
					
						
							|  |  |  |       if (datasetId) { | 
					
						
							|  |  |  |         try { | 
					
						
							| 
									
										
										
										
											2023-09-27 10:31:46 +08:00
										 |  |  |           const detail = await fetchDatasetDetail(datasetId) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           setDetail(detail) | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  |         catch { | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           setHasError(true) | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     })() | 
					
						
							|  |  |  |   }, [datasetId]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   if (hasError) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} /> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return ( | 
					
						
							| 
									
										
										
										
											2024-12-26 12:01:51 +08:00
										 |  |  |     <div className='flex flex-col bg-components-panel-bg' style={{ height: 'calc(100vh - 56px)' }}> | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  |       <TopBar activeIndex={step - 1} datasetId={datasetId} /> | 
					
						
							| 
									
										
										
										
											2024-12-26 12:01:51 +08:00
										 |  |  |       <div style={{ height: 'calc(100% - 52px)' }}> | 
					
						
							|  |  |  |         {step === 1 && <StepOne | 
					
						
							|  |  |  |           hasConnection={hasConnection} | 
					
						
							|  |  |  |           onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })} | 
					
						
							|  |  |  |           datasetId={datasetId} | 
					
						
							|  |  |  |           dataSourceType={dataSourceType} | 
					
						
							|  |  |  |           dataSourceTypeDisable={!!detail?.data_source_type} | 
					
						
							|  |  |  |           changeType={setDataSourceType} | 
					
						
							|  |  |  |           files={fileList} | 
					
						
							|  |  |  |           updateFile={updateFile} | 
					
						
							|  |  |  |           updateFileList={updateFileList} | 
					
						
							|  |  |  |           notionPages={notionPages} | 
					
						
							|  |  |  |           updateNotionPages={updateNotionPages} | 
					
						
							|  |  |  |           onStepChange={nextStep} | 
					
						
							|  |  |  |           websitePages={websitePages} | 
					
						
							|  |  |  |           updateWebsitePages={setWebsitePages} | 
					
						
							|  |  |  |           onWebsiteCrawlProviderChange={setWebsiteCrawlProvider} | 
					
						
							|  |  |  |           onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId} | 
					
						
							|  |  |  |           crawlOptions={crawlOptions} | 
					
						
							|  |  |  |           onCrawlOptionsChange={setCrawlOptions} | 
					
						
							|  |  |  |         />} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         {(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |           isAPIKeySet={!!embeddingsDefaultModel} | 
					
						
							| 
									
										
										
										
											2023-11-06 19:36:32 +08:00
										 |  |  |           onSetting={() => setShowAccountSettingModal({ payload: 'provider' })} | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |           indexingType={detail?.indexing_technique} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           datasetId={datasetId} | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |           dataSourceType={dataSourceType} | 
					
						
							| 
									
										
										
										
											2023-06-21 09:44:01 +08:00
										 |  |  |           files={fileList.map(file => file.file)} | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |           notionPages={notionPages} | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |           websitePages={websitePages} | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  |           websiteCrawlProvider={websiteCrawlProvider} | 
					
						
							|  |  |  |           websiteCrawlJobId={websiteCrawlJobId} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           onStepChange={changeStep} | 
					
						
							|  |  |  |           updateIndexingTypeCache={updateIndexingTypeCache} | 
					
						
							| 
									
										
										
										
											2024-12-26 12:01:51 +08:00
										 |  |  |           updateRetrievalMethodCache={updateRetrievalMethodCache} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           updateResultCache={updateResultCache} | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |           crawlOptions={crawlOptions} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         />} | 
					
						
							|  |  |  |         {step === 3 && <StepThree | 
					
						
							|  |  |  |           datasetId={datasetId} | 
					
						
							|  |  |  |           datasetName={detail?.name} | 
					
						
							|  |  |  |           indexingType={detail?.indexing_technique || indexingTypeCache} | 
					
						
							| 
									
										
										
										
											2024-12-26 12:01:51 +08:00
										 |  |  |           retrievalMethod={detail?.retrieval_model_dict?.search_method || retrievalMethodCache} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           creationCache={result} | 
					
						
							|  |  |  |         />} | 
					
						
							|  |  |  |       </div> | 
					
						
							|  |  |  |     </div> | 
					
						
							|  |  |  |   ) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-27 10:31:46 +08:00
										 |  |  | export default DatasetUpdateForm |