| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 'use client' | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  | import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import { useTranslation } from 'react-i18next' | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  | import { useContext } from 'use-context-selector' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import { useBoolean } from 'ahooks' | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | import { XMarkIcon } from '@heroicons/react/20/solid' | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  | import { RocketLaunchIcon } from '@heroicons/react/24/outline' | 
					
						
							| 
									
										
										
										
											2024-06-20 11:05:08 +08:00
										 |  |  | import { | 
					
						
							|  |  |  |   RiCloseLine, | 
					
						
							|  |  |  | } from '@remixicon/react' | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | import Link from 'next/link' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | import { groupBy } from 'lodash-es' | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  | import PreviewItem, { PreviewType } from './preview-item' | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  | import LanguageSelect from './language-select' | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | import s from './index.module.css' | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  | import unescape from './unescape' | 
					
						
							|  |  |  | import escape from './escape' | 
					
						
							| 
									
										
										
										
											2024-07-09 15:05:40 +08:00
										 |  |  | import cn from '@/utils/classnames' | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  | import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import { | 
					
						
							|  |  |  |   createDocument, | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |   createFirstDocument, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   fetchFileIndexingEstimate as didFetchFileIndexingEstimate, | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |   fetchDefaultProcessRule, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | } from '@/service/datasets' | 
					
						
							|  |  |  | import Button from '@/app/components/base/button' | 
					
						
							| 
									
										
										
										
											2024-10-21 10:32:37 +08:00
										 |  |  | import Input from '@/app/components/base/input' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import Loading from '@/app/components/base/loading' | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  | import FloatRightContainer from '@/app/components/base/float-right-container' | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  | import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' | 
					
						
							|  |  |  | import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' | 
					
						
							|  |  |  | import { type RetrievalConfig } from '@/types/app' | 
					
						
							|  |  |  | import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | import Toast from '@/app/components/base/toast' | 
					
						
							|  |  |  | import { formatNumber } from '@/utils/format' | 
					
						
							| 
									
										
										
										
											2023-08-28 19:48:53 +08:00
										 |  |  | import type { NotionPage } from '@/models/common' | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  | import { DataSourceProvider } from '@/models/common' | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  | import { DataSourceType, DocForm } from '@/models/datasets' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | import NotionIcon from '@/app/components/base/notion-icon' | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  | import Switch from '@/app/components/base/switch' | 
					
						
							|  |  |  | import { MessageChatSquare } from '@/app/components/base/icons/src/public/common' | 
					
						
							| 
									
										
										
										
											2023-06-19 16:32:25 +08:00
										 |  |  | import { useDatasetDetailContext } from '@/context/dataset-detail' | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  | import I18n from '@/context/i18n' | 
					
						
							| 
									
										
										
										
											2023-08-03 11:28:42 +08:00
										 |  |  | import { IS_CE_EDITION } from '@/config' | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  | import { RETRIEVE_METHOD } from '@/types/app' | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  | import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' | 
					
						
							|  |  |  | import Tooltip from '@/app/components/base/tooltip' | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  | import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks' | 
					
						
							| 
									
										
										
										
											2024-02-23 14:31:06 +08:00
										 |  |  | import { LanguagesSupported } from '@/i18n/language' | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  | import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' | 
					
						
							|  |  |  | import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations' | 
					
						
							| 
									
										
										
										
											2024-04-04 15:54:59 +08:00
										 |  |  | import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  | import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel' | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  | type ValueOf<T> = T[keyof T] | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | type StepTwoProps = { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |   isSetting?: boolean | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |   documentDetail?: FullDocumentDetail | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |   isAPIKeySet: boolean | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |   onSetting: () => void | 
					
						
							|  |  |  |   datasetId?: string | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |   indexingType?: ValueOf<IndexingType> | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   dataSourceType: DataSourceType | 
					
						
							| 
									
										
										
										
											2023-08-16 23:14:27 +08:00
										 |  |  |   files: CustomFile[] | 
					
						
							| 
									
										
										
										
											2023-08-28 19:48:53 +08:00
										 |  |  |   notionPages?: NotionPage[] | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |   websitePages?: CrawlResultItem[] | 
					
						
							|  |  |  |   crawlOptions?: CrawlOptions | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  |   websiteCrawlProvider?: DataSourceProvider | 
					
						
							|  |  |  |   websiteCrawlJobId?: string | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |   onStepChange?: (delta: number) => void | 
					
						
							|  |  |  |   updateIndexingTypeCache?: (type: string) => void | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |   updateResultCache?: (res: createDocumentResponse) => void | 
					
						
							|  |  |  |   onSave?: () => void | 
					
						
							|  |  |  |   onCancel?: () => void | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum SegmentType { | 
					
						
							|  |  |  |   AUTO = 'automatic', | 
					
						
							|  |  |  |   CUSTOM = 'custom', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | enum IndexingType { | 
					
						
							|  |  |  |   QUALIFIED = 'high_quality', | 
					
						
							|  |  |  |   ECONOMICAL = 'economy', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  | const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | const StepTwo = ({ | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |   isSetting, | 
					
						
							|  |  |  |   documentDetail, | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |   isAPIKeySet, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   onSetting, | 
					
						
							|  |  |  |   datasetId, | 
					
						
							|  |  |  |   indexingType, | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |   dataSourceType: inCreatePageDataSourceType, | 
					
						
							| 
									
										
										
										
											2023-06-21 09:44:01 +08:00
										 |  |  |   files, | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   notionPages = [], | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |   websitePages = [], | 
					
						
							|  |  |  |   crawlOptions, | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  |   websiteCrawlProvider = DataSourceProvider.fireCrawl, | 
					
						
							|  |  |  |   websiteCrawlJobId = '', | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   onStepChange, | 
					
						
							|  |  |  |   updateIndexingTypeCache, | 
					
						
							|  |  |  |   updateResultCache, | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |   onSave, | 
					
						
							|  |  |  |   onCancel, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | }: StepTwoProps) => { | 
					
						
							|  |  |  |   const { t } = useTranslation() | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |   const { locale } = useContext(I18n) | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |   const media = useBreakpoints() | 
					
						
							|  |  |  |   const isMobile = media === MediaType.mobile | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |   const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext() | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |   const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) | 
					
						
							|  |  |  |   const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const scrollRef = useRef<HTMLDivElement>(null) | 
					
						
							|  |  |  |   const [scrolled, setScrolled] = useState(false) | 
					
						
							|  |  |  |   const previewScrollRef = useRef<HTMLDivElement>(null) | 
					
						
							|  |  |  |   const [previewScrolled, setPreviewScrolled] = useState(false) | 
					
						
							|  |  |  |   const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO) | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  |   const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) | 
					
						
							|  |  |  |   const setSegmentIdentifier = useCallback((value: string) => { | 
					
						
							|  |  |  |     doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER) | 
					
						
							|  |  |  |   }, []) | 
					
						
							|  |  |  |   const [max, setMax] = useState(4000) // default chunk length
 | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |   const [overlap, setOverlap] = useState(50) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const [rules, setRules] = useState<PreProcessingRule[]>([]) | 
					
						
							|  |  |  |   const [defaultConfig, setDefaultConfig] = useState<Rules>() | 
					
						
							|  |  |  |   const hasSetIndexType = !!indexingType | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |   const [indexType, setIndexType] = useState<ValueOf<IndexingType>>( | 
					
						
							|  |  |  |     (indexingType | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |       || isAPIKeySet) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       ? IndexingType.QUALIFIED | 
					
						
							|  |  |  |       : IndexingType.ECONOMICAL, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   ) | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |   const [isLanguageSelectDisabled, setIsLanguageSelectDisabled] = useState(false) | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   const [docForm, setDocForm] = useState<DocForm | string>( | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |     (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT, | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   ) | 
					
						
							| 
									
										
										
										
											2024-08-28 08:45:51 +08:00
										 |  |  |   const [docLanguage, setDocLanguage] = useState<string>( | 
					
						
							|  |  |  |     (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), | 
					
						
							|  |  |  |   ) | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |   const [QATipHide, setQATipHide] = useState(false) | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   const [previewSwitched, setPreviewSwitched] = useState(false) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |   const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null) | 
					
						
							|  |  |  |   const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const fileIndexingEstimate = (() => { | 
					
						
							|  |  |  |     return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate | 
					
						
							|  |  |  |   })() | 
					
						
							| 
									
										
										
										
											2023-09-24 14:35:20 +08:00
										 |  |  |   const [isCreating, setIsCreating] = useState(false) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-28 19:48:53 +08:00
										 |  |  |   const scrollHandle = (e: Event) => { | 
					
						
							|  |  |  |     if ((e.target as HTMLDivElement).scrollTop > 0) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setScrolled(true) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     else | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setScrolled(false) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-28 19:48:53 +08:00
										 |  |  |   const previewScrollHandle = (e: Event) => { | 
					
						
							|  |  |  |     if ((e.target as HTMLDivElement).scrollTop > 0) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setPreviewScrolled(true) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     else | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setPreviewScrolled(false) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   const getFileName = (name: string) => { | 
					
						
							|  |  |  |     const arr = name.split('.') | 
					
						
							|  |  |  |     return arr.slice(0, -1).join('.') | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const getRuleName = (key: string) => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     if (key === 'remove_extra_spaces') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       return t('datasetCreation.stepTwo.removeExtraSpaces') | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (key === 'remove_urls_emails') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       return t('datasetCreation.stepTwo.removeUrlEmails') | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (key === 'remove_stopwords') | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       return t('datasetCreation.stepTwo.removeStopwords') | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   const ruleChangeHandle = (id: string) => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     const newRules = rules.map((rule) => { | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       if (rule.id === id) { | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |           id: rule.id, | 
					
						
							|  |  |  |           enabled: !rule.enabled, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       return rule | 
					
						
							|  |  |  |     }) | 
					
						
							|  |  |  |     setRules(newRules) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   const resetRules = () => { | 
					
						
							|  |  |  |     if (defaultConfig) { | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  |       setSegmentIdentifier(defaultConfig.segmentation.separator) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setMax(defaultConfig.segmentation.max_tokens) | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |       setOverlap(defaultConfig.segmentation.chunk_overlap) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setRules(defaultConfig.pre_processing_rules) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |   const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT, language?: string) => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     // eslint-disable-next-line @typescript-eslint/no-use-before-define
 | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |     const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm, language)!) | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |     if (segmentationType === SegmentType.CUSTOM) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       setCustomFileIndexingEstimate(res) | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |     else | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       setAutomaticFileIndexingEstimate(res) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   const confirmChangeCustomConfig = () => { | 
					
						
							| 
									
										
										
										
											2024-10-25 15:02:36 +08:00
										 |  |  |     if (segmentationType === SegmentType.CUSTOM && max > 4000) { | 
					
						
							|  |  |  |       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) | 
					
						
							|  |  |  |       return | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     setCustomFileIndexingEstimate(null) | 
					
						
							|  |  |  |     setShowPreview() | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |     fetchFileIndexingEstimate() | 
					
						
							|  |  |  |     setPreviewSwitched(false) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |   const getIndexing_technique = () => indexingType || indexType | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   const getProcessRule = () => { | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |     const processRule: ProcessRule = { | 
					
						
							|  |  |  |       rules: {} as any, // api will check this. It will be removed after api refactored.
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       mode: segmentationType, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (segmentationType === SegmentType.CUSTOM) { | 
					
						
							|  |  |  |       const ruleObj = { | 
					
						
							|  |  |  |         pre_processing_rules: rules, | 
					
						
							|  |  |  |         segmentation: { | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  |           separator: unescape(segmentIdentifier), | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           max_tokens: max, | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |           chunk_overlap: overlap, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       processRule.rules = ruleObj | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return processRule | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |   const getNotionInfo = () => { | 
					
						
							|  |  |  |     const workspacesMap = groupBy(notionPages, 'workspace_id') | 
					
						
							|  |  |  |     const workspaces = Object.keys(workspacesMap).map((workspaceId) => { | 
					
						
							|  |  |  |       return { | 
					
						
							|  |  |  |         workspaceId, | 
					
						
							|  |  |  |         pages: workspacesMap[workspaceId], | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     }) | 
					
						
							|  |  |  |     return workspaces.map((workspace) => { | 
					
						
							|  |  |  |       return { | 
					
						
							|  |  |  |         workspace_id: workspace.workspaceId, | 
					
						
							|  |  |  |         pages: workspace.pages.map((page) => { | 
					
						
							|  |  |  |           const { page_id, page_name, page_icon, type } = page | 
					
						
							|  |  |  |           return { | 
					
						
							|  |  |  |             page_id, | 
					
						
							|  |  |  |             page_name, | 
					
						
							|  |  |  |             page_icon, | 
					
						
							|  |  |  |             type, | 
					
						
							|  |  |  |           } | 
					
						
							|  |  |  |         }), | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     }) as NotionInfo[] | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |   const getWebsiteInfo = () => { | 
					
						
							|  |  |  |     return { | 
					
						
							| 
									
										
										
										
											2024-09-30 09:57:19 +08:00
										 |  |  |       provider: websiteCrawlProvider, | 
					
						
							|  |  |  |       job_id: websiteCrawlJobId, | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |       urls: websitePages.map(page => page.source_url), | 
					
						
							|  |  |  |       only_main_content: crawlOptions?.only_main_content, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |   const getFileIndexingEstimateParams = (docForm: DocForm, language?: string): IndexingEstimateParams | undefined => { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |     if (dataSourceType === DataSourceType.FILE) { | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |       return { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         info_list: { | 
					
						
							|  |  |  |           data_source_type: dataSourceType, | 
					
						
							|  |  |  |           file_info_list: { | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |             file_ids: files.map(file => file.id) as string[], | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |           }, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |         indexing_technique: getIndexing_technique() as string, | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         process_rule: getProcessRule(), | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |         doc_form: docForm, | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |         doc_language: language || docLanguage, | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |         dataset_id: datasetId as string, | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (dataSourceType === DataSourceType.NOTION) { | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |       return { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         info_list: { | 
					
						
							|  |  |  |           data_source_type: dataSourceType, | 
					
						
							|  |  |  |           notion_info_list: getNotionInfo(), | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |         indexing_technique: getIndexing_technique() as string, | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         process_rule: getProcessRule(), | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |         doc_form: docForm, | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |         doc_language: language || docLanguage, | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |         dataset_id: datasetId as string, | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |       } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |     if (dataSourceType === DataSourceType.WEB) { | 
					
						
							|  |  |  |       return { | 
					
						
							|  |  |  |         info_list: { | 
					
						
							|  |  |  |           data_source_type: dataSourceType, | 
					
						
							|  |  |  |           website_info_list: getWebsiteInfo(), | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         indexing_technique: getIndexing_technique() as string, | 
					
						
							|  |  |  |         process_rule: getProcessRule(), | 
					
						
							|  |  |  |         doc_form: docForm, | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |         doc_language: language || docLanguage, | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |         dataset_id: datasetId as string, | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   } | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |   const { | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     modelList: rerankModelList, | 
					
						
							|  |  |  |     defaultModel: rerankDefaultModel, | 
					
						
							| 
									
										
										
										
											2024-09-08 12:14:11 +07:00
										 |  |  |     currentModel: isRerankDefaultModelValid, | 
					
						
							| 
									
										
										
										
											2024-04-04 15:54:59 +08:00
										 |  |  |   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |   const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding) | 
					
						
							|  |  |  |   const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding) | 
					
						
							|  |  |  |   const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>( | 
					
						
							|  |  |  |     currentDataset?.embedding_model | 
					
						
							|  |  |  |       ? { | 
					
						
							|  |  |  |         provider: currentDataset.embedding_model_provider, | 
					
						
							|  |  |  |         model: currentDataset.embedding_model, | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       : { | 
					
						
							|  |  |  |         provider: defaultEmbeddingModel?.provider.provider || '', | 
					
						
							|  |  |  |         model: defaultEmbeddingModel?.model || '', | 
					
						
							|  |  |  |       }, | 
					
						
							|  |  |  |   ) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const getCreationParams = () => { | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |     let params | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |     if (segmentationType === SegmentType.CUSTOM && overlap > max) { | 
					
						
							|  |  |  |       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') }) | 
					
						
							|  |  |  |       return | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-10-25 15:02:36 +08:00
										 |  |  |     if (segmentationType === SegmentType.CUSTOM && max > 4000) { | 
					
						
							|  |  |  |       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) | 
					
						
							|  |  |  |       return | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |     if (isSetting) { | 
					
						
							|  |  |  |       params = { | 
					
						
							|  |  |  |         original_document_id: documentDetail?.id, | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |         doc_form: docForm, | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |         doc_language: docLanguage, | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |         process_rule: getProcessRule(), | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |         // eslint-disable-next-line @typescript-eslint/no-use-before-define
 | 
					
						
							|  |  |  |         retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
 | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |         embedding_model: embeddingModel.model, // Readonly
 | 
					
						
							|  |  |  |         embedding_model_provider: embeddingModel.provider, // Readonly
 | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       } as CreateDocumentReq | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |     else { // create
 | 
					
						
							|  |  |  |       const indexMethod = getIndexing_technique() | 
					
						
							|  |  |  |       if ( | 
					
						
							|  |  |  |         !isReRankModelSelected({ | 
					
						
							|  |  |  |           rerankDefaultModel, | 
					
						
							| 
									
										
										
										
											2024-09-08 12:14:11 +07:00
										 |  |  |           isRerankDefaultModelValid: !!isRerankDefaultModelValid, | 
					
						
							| 
									
										
										
										
											2023-11-21 13:46:07 +08:00
										 |  |  |           rerankModelList, | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |           // eslint-disable-next-line @typescript-eslint/no-use-before-define
 | 
					
						
							|  |  |  |           retrievalConfig, | 
					
						
							|  |  |  |           indexMethod: indexMethod as string, | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  |       ) { | 
					
						
							|  |  |  |         Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') }) | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       const postRetrievalConfig = ensureRerankModelSelected({ | 
					
						
							|  |  |  |         rerankDefaultModel: rerankDefaultModel!, | 
					
						
							|  |  |  |         // eslint-disable-next-line @typescript-eslint/no-use-before-define
 | 
					
						
							|  |  |  |         retrievalConfig, | 
					
						
							|  |  |  |         indexMethod: indexMethod as string, | 
					
						
							|  |  |  |       }) | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       params = { | 
					
						
							|  |  |  |         data_source: { | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |           type: dataSourceType, | 
					
						
							|  |  |  |           info_list: { | 
					
						
							|  |  |  |             data_source_type: dataSourceType, | 
					
						
							|  |  |  |           }, | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |         indexing_technique: getIndexing_technique(), | 
					
						
							|  |  |  |         process_rule: getProcessRule(), | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |         doc_form: docForm, | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |         doc_language: docLanguage, | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         retrieval_model: postRetrievalConfig, | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |         embedding_model: embeddingModel.model, | 
					
						
							|  |  |  |         embedding_model_provider: embeddingModel.provider, | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       } as CreateDocumentReq | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |       if (dataSourceType === DataSourceType.FILE) { | 
					
						
							|  |  |  |         params.data_source.info_list.file_info_list = { | 
					
						
							| 
									
										
										
										
											2023-09-24 14:35:20 +08:00
										 |  |  |           file_ids: files.map(file => file.id || '').filter(Boolean), | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       if (dataSourceType === DataSourceType.NOTION) | 
					
						
							|  |  |  |         params.data_source.info_list.notion_info_list = getNotionInfo() | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |       if (dataSourceType === DataSourceType.WEB) | 
					
						
							|  |  |  |         params.data_source.info_list.website_info_list = getWebsiteInfo() | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     return params | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const getRules = async () => { | 
					
						
							|  |  |  |     try { | 
					
						
							|  |  |  |       const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) | 
					
						
							|  |  |  |       const separator = res.rules.segmentation.separator | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  |       setSegmentIdentifier(separator) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setMax(res.rules.segmentation.max_tokens) | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |       setOverlap(res.rules.segmentation.chunk_overlap) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setRules(res.rules.pre_processing_rules) | 
					
						
							|  |  |  |       setDefaultConfig(res.rules) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     catch (err) { | 
					
						
							|  |  |  |       console.log(err) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   const getRulesFromDetail = () => { | 
					
						
							|  |  |  |     if (documentDetail) { | 
					
						
							|  |  |  |       const rules = documentDetail.dataset_process_rule.rules | 
					
						
							|  |  |  |       const separator = rules.segmentation.separator | 
					
						
							|  |  |  |       const max = rules.segmentation.max_tokens | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |       const overlap = rules.segmentation.chunk_overlap | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  |       setSegmentIdentifier(separator) | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       setMax(max) | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |       setOverlap(overlap) | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       setRules(rules.pre_processing_rules) | 
					
						
							|  |  |  |       setDefaultConfig(rules) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const getDefaultMode = () => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     if (documentDetail) | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       setSegmentationType(documentDetail.dataset_process_rule.mode) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   const createHandle = async () => { | 
					
						
							| 
									
										
										
										
											2023-09-27 10:31:27 +08:00
										 |  |  |     if (isCreating) | 
					
						
							|  |  |  |       return | 
					
						
							|  |  |  |     setIsCreating(true) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     try { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       let res | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       const params = getCreationParams() | 
					
						
							| 
									
										
										
										
											2023-11-21 13:46:07 +08:00
										 |  |  |       if (!params) | 
					
						
							|  |  |  |         return false | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-24 14:35:20 +08:00
										 |  |  |       setIsCreating(true) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       if (!datasetId) { | 
					
						
							|  |  |  |         res = await createFirstDocument({ | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |           body: params as CreateDocumentReq, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         }) | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |         updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |         updateResultCache && updateResultCache(res) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       } | 
					
						
							|  |  |  |       else { | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         res = await createDocument({ | 
					
						
							|  |  |  |           datasetId, | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |           body: params as CreateDocumentReq, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |         }) | 
					
						
							| 
									
										
										
										
											2023-10-07 17:42:16 +08:00
										 |  |  |         updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |         updateResultCache && updateResultCache(res) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       } | 
					
						
							| 
									
										
										
										
											2023-06-19 16:32:25 +08:00
										 |  |  |       if (mutateDatasetRes) | 
					
						
							|  |  |  |         mutateDatasetRes() | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       onStepChange && onStepChange(+1) | 
					
						
							|  |  |  |       isSetting && onSave && onSave() | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     catch (err) { | 
					
						
							|  |  |  |       Toast.notify({ | 
					
						
							|  |  |  |         type: 'error', | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |         message: `${err}`, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       }) | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-09-24 14:35:20 +08:00
										 |  |  |     finally { | 
					
						
							|  |  |  |       setIsCreating(false) | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   const handleSwitch = (state: boolean) => { | 
					
						
							|  |  |  |     if (state) | 
					
						
							|  |  |  |       setDocForm(DocForm.QA) | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       setDocForm(DocForm.TEXT) | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |   const previewSwitch = async (language?: string) => { | 
					
						
							|  |  |  |     setPreviewSwitched(true) | 
					
						
							|  |  |  |     setIsLanguageSelectDisabled(true) | 
					
						
							|  |  |  |     if (segmentationType === SegmentType.AUTO) | 
					
						
							|  |  |  |       setAutomaticFileIndexingEstimate(null) | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       setCustomFileIndexingEstimate(null) | 
					
						
							|  |  |  |     try { | 
					
						
							|  |  |  |       await fetchFileIndexingEstimate(DocForm.QA, language) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     finally { | 
					
						
							|  |  |  |       setIsLanguageSelectDisabled(false) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |   const handleSelect = (language: string) => { | 
					
						
							|  |  |  |     setDocLanguage(language) | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |     // Switch language, re-cutter
 | 
					
						
							|  |  |  |     if (docForm === DocForm.QA && previewSwitched) | 
					
						
							|  |  |  |       previewSwitch(language) | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   const changeToEconomicalType = () => { | 
					
						
							|  |  |  |     if (!hasSetIndexType) { | 
					
						
							|  |  |  |       setIndexType(IndexingType.ECONOMICAL) | 
					
						
							|  |  |  |       setDocForm(DocForm.TEXT) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   useEffect(() => { | 
					
						
							|  |  |  |     // fetch rules
 | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |     if (!isSetting) { | 
					
						
							|  |  |  |       getRules() | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							| 
									
										
										
										
											2023-06-01 23:19:36 +08:00
										 |  |  |       getRulesFromDetail() | 
					
						
							|  |  |  |       getDefaultMode() | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   }, []) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   useEffect(() => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     scrollRef.current?.addEventListener('scroll', scrollHandle) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     return () => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       scrollRef.current?.removeEventListener('scroll', scrollHandle) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |   }, []) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   useLayoutEffect(() => { | 
					
						
							|  |  |  |     if (showPreview) { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |       previewScrollRef.current?.addEventListener('scroll', previewScrollHandle) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       return () => { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |         previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   }, [showPreview]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |   useEffect(() => { | 
					
						
							|  |  |  |     if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA) | 
					
						
							|  |  |  |       setDocForm(DocForm.TEXT) | 
					
						
							|  |  |  |   }, [indexingType, docForm]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   useEffect(() => { | 
					
						
							|  |  |  |     // get indexing type by props
 | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     if (indexingType) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       setIndexType(indexingType as IndexingType) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     else | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |       setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) | 
					
						
							|  |  |  |   }, [isAPIKeySet, indexingType, datasetId]) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   useEffect(() => { | 
					
						
							|  |  |  |     if (segmentationType === SegmentType.AUTO) { | 
					
						
							|  |  |  |       setAutomaticFileIndexingEstimate(null) | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |       !isMobile && setShowPreview() | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       fetchFileIndexingEstimate() | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |       setPreviewSwitched(false) | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |       hidePreview() | 
					
						
							|  |  |  |       setCustomFileIndexingEstimate(null) | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |       setPreviewSwitched(false) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |   }, [segmentationType, indexType]) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |   const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || { | 
					
						
							|  |  |  |     search_method: RETRIEVE_METHOD.semantic, | 
					
						
							|  |  |  |     reranking_enable: false, | 
					
						
							|  |  |  |     reranking_model: { | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |       reranking_provider_name: rerankDefaultModel?.provider.provider, | 
					
						
							|  |  |  |       reranking_model_name: rerankDefaultModel?.model, | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |     }, | 
					
						
							|  |  |  |     top_k: 3, | 
					
						
							| 
									
										
										
										
											2023-11-27 15:38:05 +08:00
										 |  |  |     score_threshold_enabled: false, | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |     score_threshold: 0.5, | 
					
						
							|  |  |  |   } as RetrievalConfig) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |   return ( | 
					
						
							|  |  |  |     <div className='flex w-full h-full'> | 
					
						
							|  |  |  |       <div ref={scrollRef} className='relative h-full w-full overflow-y-scroll'> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |         <div className={cn(s.pageHeader, scrolled && s.fixed, isMobile && '!px-6')}> | 
					
						
							|  |  |  |           <span>{t('datasetCreation.steps.two')}</span> | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:52 +08:00
										 |  |  |           {(isMobile || !showPreview) && ( | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |             <Button | 
					
						
							|  |  |  |               className='border-[0.5px] !h-8 hover:outline hover:outline-[0.5px] hover:outline-gray-300 text-gray-700 font-medium bg-white shadow-[0px_1px_2px_0px_rgba(16,24,40,0.05)]' | 
					
						
							|  |  |  |               onClick={setShowPreview} | 
					
						
							|  |  |  |             > | 
					
						
							| 
									
										
										
										
											2024-08-26 13:00:02 +08:00
										 |  |  |               <Tooltip> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |                 <div className="flex flex-row items-center"> | 
					
						
							|  |  |  |                   <RocketLaunchIcon className="h-4 w-4 mr-1.5 stroke-[1.8px]" /> | 
					
						
							|  |  |  |                   <span className="text-[13px]">{t('datasetCreation.stepTwo.previewTitleButton')}</span> | 
					
						
							|  |  |  |                 </div> | 
					
						
							|  |  |  |               </Tooltip> | 
					
						
							|  |  |  |             </Button> | 
					
						
							|  |  |  |           )} | 
					
						
							|  |  |  |         </div> | 
					
						
							|  |  |  |         <div className={cn(s.form, isMobile && '!px-4')}> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           <div className={s.label}>{t('datasetCreation.stepTwo.segmentation')}</div> | 
					
						
							|  |  |  |           <div className='max-w-[640px]'> | 
					
						
							|  |  |  |             <div | 
					
						
							|  |  |  |               className={cn( | 
					
						
							|  |  |  |                 s.radioItem, | 
					
						
							|  |  |  |                 s.segmentationItem, | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |                 segmentationType === SegmentType.AUTO && s.active, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |               )} | 
					
						
							|  |  |  |               onClick={() => setSegmentationType(SegmentType.AUTO)} | 
					
						
							|  |  |  |             > | 
					
						
							|  |  |  |               <span className={cn(s.typeIcon, s.auto)} /> | 
					
						
							|  |  |  |               <span className={cn(s.radio)} /> | 
					
						
							|  |  |  |               <div className={s.typeHeader}> | 
					
						
							|  |  |  |                 <div className={s.title}>{t('datasetCreation.stepTwo.auto')}</div> | 
					
						
							|  |  |  |                 <div className={s.tip}>{t('datasetCreation.stepTwo.autoDescription')}</div> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             </div> | 
					
						
							|  |  |  |             <div | 
					
						
							|  |  |  |               className={cn( | 
					
						
							|  |  |  |                 s.radioItem, | 
					
						
							|  |  |  |                 s.segmentationItem, | 
					
						
							|  |  |  |                 segmentationType === SegmentType.CUSTOM && s.active, | 
					
						
							|  |  |  |                 segmentationType === SegmentType.CUSTOM && s.custom, | 
					
						
							|  |  |  |               )} | 
					
						
							|  |  |  |               onClick={() => setSegmentationType(SegmentType.CUSTOM)} | 
					
						
							|  |  |  |             > | 
					
						
							|  |  |  |               <span className={cn(s.typeIcon, s.customize)} /> | 
					
						
							|  |  |  |               <span className={cn(s.radio)} /> | 
					
						
							|  |  |  |               <div className={s.typeHeader}> | 
					
						
							|  |  |  |                 <div className={s.title}>{t('datasetCreation.stepTwo.custom')}</div> | 
					
						
							|  |  |  |                 <div className={s.tip}>{t('datasetCreation.stepTwo.customDescription')}</div> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |               {segmentationType === SegmentType.CUSTOM && ( | 
					
						
							|  |  |  |                 <div className={s.typeFormBody}> | 
					
						
							|  |  |  |                   <div className={s.formRow}> | 
					
						
							|  |  |  |                     <div className='w-full'> | 
					
						
							| 
									
										
										
										
											2024-09-19 17:40:20 +08:00
										 |  |  |                       <div className={s.label}> | 
					
						
							|  |  |  |                         {t('datasetCreation.stepTwo.separator')} | 
					
						
							|  |  |  |                         <Tooltip | 
					
						
							|  |  |  |                           popupContent={ | 
					
						
							|  |  |  |                             <div className='max-w-[200px]'> | 
					
						
							|  |  |  |                               {t('datasetCreation.stepTwo.separatorTip')} | 
					
						
							|  |  |  |                             </div> | 
					
						
							|  |  |  |                           } | 
					
						
							|  |  |  |                         /> | 
					
						
							|  |  |  |                       </div> | 
					
						
							| 
									
										
										
										
											2024-10-21 10:32:37 +08:00
										 |  |  |                       <Input | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                         type="text" | 
					
						
							| 
									
										
										
										
											2024-10-21 10:32:37 +08:00
										 |  |  |                         className='h-9' | 
					
						
							|  |  |  |                         placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={segmentIdentifier} | 
					
						
							|  |  |  |                         onChange={e => setSegmentIdentifier(e.target.value)} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                       /> | 
					
						
							|  |  |  |                     </div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                   <div className={s.formRow}> | 
					
						
							|  |  |  |                     <div className='w-full'> | 
					
						
							|  |  |  |                       <div className={s.label}>{t('datasetCreation.stepTwo.maxLength')}</div> | 
					
						
							| 
									
										
										
										
											2024-10-21 10:32:37 +08:00
										 |  |  |                       <Input | 
					
						
							|  |  |  |                         type="number" | 
					
						
							|  |  |  |                         className='h-9' | 
					
						
							|  |  |  |                         placeholder={t('datasetCreation.stepTwo.maxLength') || ''} | 
					
						
							|  |  |  |                         value={max} | 
					
						
							| 
									
										
										
										
											2024-10-25 15:02:36 +08:00
										 |  |  |                         max={4000} | 
					
						
							| 
									
										
										
										
											2024-10-21 10:32:37 +08:00
										 |  |  |                         min={1} | 
					
						
							|  |  |  |                         onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))} | 
					
						
							|  |  |  |                       /> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                     </div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |                   <div className={s.formRow}> | 
					
						
							|  |  |  |                     <div className='w-full'> | 
					
						
							|  |  |  |                       <div className={s.label}> | 
					
						
							|  |  |  |                         {t('datasetCreation.stepTwo.overlap')} | 
					
						
							| 
									
										
										
										
											2024-08-26 13:00:02 +08:00
										 |  |  |                         <Tooltip | 
					
						
							|  |  |  |                           popupContent={ | 
					
						
							|  |  |  |                             <div className='max-w-[200px]'> | 
					
						
							|  |  |  |                               {t('datasetCreation.stepTwo.overlapTip')} | 
					
						
							|  |  |  |                             </div> | 
					
						
							|  |  |  |                           } | 
					
						
							|  |  |  |                         /> | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |                       </div> | 
					
						
							| 
									
										
										
										
											2024-10-21 10:32:37 +08:00
										 |  |  |                       <Input | 
					
						
							|  |  |  |                         type="number" | 
					
						
							|  |  |  |                         className='h-9' | 
					
						
							|  |  |  |                         placeholder={t('datasetCreation.stepTwo.overlap') || ''} | 
					
						
							|  |  |  |                         value={overlap} | 
					
						
							|  |  |  |                         min={1} | 
					
						
							|  |  |  |                         onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} | 
					
						
							|  |  |  |                       /> | 
					
						
							| 
									
										
										
										
											2024-01-26 13:24:40 +08:00
										 |  |  |                     </div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                   <div className={s.formRow}> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |                     <div className='w-full flex flex-col gap-1'> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                       <div className={s.label}>{t('datasetCreation.stepTwo.rules')}</div> | 
					
						
							|  |  |  |                       {rules.map(rule => ( | 
					
						
							|  |  |  |                         <div key={rule.id} className={s.ruleItem}> | 
					
						
							| 
									
										
										
										
											2023-09-15 20:54:20 +08:00
										 |  |  |                           <input id={rule.id} type="checkbox" checked={rule.enabled} onChange={() => ruleChangeHandle(rule.id)} className="w-4 h-4 rounded border-gray-300 text-blue-700 focus:ring-blue-700" /> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                           <label htmlFor={rule.id} className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label> | 
					
						
							|  |  |  |                         </div> | 
					
						
							|  |  |  |                       ))} | 
					
						
							|  |  |  |                     </div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                   <div className={s.formFooter}> | 
					
						
							| 
									
										
										
										
											2024-06-21 14:17:45 +08:00
										 |  |  |                     <Button variant="primary" className={cn(s.button)} onClick={confirmChangeCustomConfig}>{t('datasetCreation.stepTwo.preview')}</Button> | 
					
						
							|  |  |  |                     <Button className={cn(s.button, 'ml-2')} onClick={resetRules}>{t('datasetCreation.stepTwo.reset')}</Button> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                   </div> | 
					
						
							|  |  |  |                 </div> | 
					
						
							|  |  |  |               )} | 
					
						
							|  |  |  |             </div> | 
					
						
							|  |  |  |           </div> | 
					
						
							|  |  |  |           <div className={s.label}>{t('datasetCreation.stepTwo.indexMode')}</div> | 
					
						
							|  |  |  |           <div className='max-w-[640px]'> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |             <div className='flex items-center gap-3 flex-wrap sm:flex-nowrap'> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |               {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && ( | 
					
						
							|  |  |  |                 <div | 
					
						
							|  |  |  |                   className={cn( | 
					
						
							|  |  |  |                     s.radioItem, | 
					
						
							|  |  |  |                     s.indexItem, | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |                     !isAPIKeySet && s.disabled, | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                     !hasSetIndexType && indexType === IndexingType.QUALIFIED && s.active, | 
					
						
							|  |  |  |                     hasSetIndexType && s.disabled, | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |                     hasSetIndexType && '!w-full !min-h-[96px]', | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                   )} | 
					
						
							|  |  |  |                   onClick={() => { | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |                     if (isAPIKeySet) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                       setIndexType(IndexingType.QUALIFIED) | 
					
						
							|  |  |  |                   }} | 
					
						
							|  |  |  |                 > | 
					
						
							|  |  |  |                   <span className={cn(s.typeIcon, s.qualified)} /> | 
					
						
							|  |  |  |                   {!hasSetIndexType && <span className={cn(s.radio)} />} | 
					
						
							|  |  |  |                   <div className={s.typeHeader}> | 
					
						
							|  |  |  |                     <div className={s.title}> | 
					
						
							|  |  |  |                       {t('datasetCreation.stepTwo.qualified')} | 
					
						
							|  |  |  |                       {!hasSetIndexType && <span className={s.recommendTag}>{t('datasetCreation.stepTwo.recommend')}</span>} | 
					
						
							|  |  |  |                     </div> | 
					
						
							|  |  |  |                     <div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:29 +08:00
										 |  |  |                   {!isAPIKeySet && ( | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                     <div className={s.warningTip}> | 
					
						
							|  |  |  |                       <span>{t('datasetCreation.stepTwo.warning')} </span> | 
					
						
							|  |  |  |                       <span className={s.click} onClick={onSetting}>{t('datasetCreation.stepTwo.click')}</span> | 
					
						
							|  |  |  |                     </div> | 
					
						
							|  |  |  |                   )} | 
					
						
							|  |  |  |                 </div> | 
					
						
							|  |  |  |               )} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |               {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && ( | 
					
						
							|  |  |  |                 <div | 
					
						
							|  |  |  |                   className={cn( | 
					
						
							|  |  |  |                     s.radioItem, | 
					
						
							|  |  |  |                     s.indexItem, | 
					
						
							|  |  |  |                     !hasSetIndexType && indexType === IndexingType.ECONOMICAL && s.active, | 
					
						
							|  |  |  |                     hasSetIndexType && s.disabled, | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |                     hasSetIndexType && '!w-full !min-h-[96px]', | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                   )} | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |                   onClick={changeToEconomicalType} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                 > | 
					
						
							|  |  |  |                   <span className={cn(s.typeIcon, s.economical)} /> | 
					
						
							|  |  |  |                   {!hasSetIndexType && <span className={cn(s.radio)} />} | 
					
						
							|  |  |  |                   <div className={s.typeHeader}> | 
					
						
							|  |  |  |                     <div className={s.title}>{t('datasetCreation.stepTwo.economical')}</div> | 
					
						
							|  |  |  |                     <div className={s.tip}>{t('datasetCreation.stepTwo.economicalTip')}</div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                 </div> | 
					
						
							|  |  |  |               )} | 
					
						
							|  |  |  |             </div> | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |             {hasSetIndexType && indexType === IndexingType.ECONOMICAL && ( | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |               <div className='mt-2 text-xs text-gray-500 font-medium'> | 
					
						
							| 
									
										
										
										
											2024-09-07 15:59:38 +07:00
										 |  |  |                 {t('datasetCreation.stepTwo.indexSettingTip')} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                 <Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             )} | 
					
						
							| 
									
										
										
										
											2023-08-03 11:28:42 +08:00
										 |  |  |             {IS_CE_EDITION && indexType === IndexingType.QUALIFIED && ( | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |               <div className='mt-3 rounded-xl bg-gray-50 border border-gray-100'> | 
					
						
							|  |  |  |                 <div className='flex justify-between items-center px-5 py-4'> | 
					
						
							|  |  |  |                   <div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'> | 
					
						
							|  |  |  |                     <MessageChatSquare className='w-4 h-4' /> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                   <div className='grow mx-3'> | 
					
						
							|  |  |  |                     <div className='mb-[2px] text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div> | 
					
						
							|  |  |  |                     <div className='inline-flex items-center text-[13px] leading-[18px] text-gray-500'> | 
					
						
							|  |  |  |                       <span className='pr-1'>{t('datasetCreation.stepTwo.QALanguage')}</span> | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |                       <LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} disabled={isLanguageSelectDisabled} /> | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |                     </div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                   <div className='shrink-0'> | 
					
						
							|  |  |  |                     <Switch | 
					
						
							|  |  |  |                       defaultValue={docForm === DocForm.QA} | 
					
						
							|  |  |  |                       onChange={handleSwitch} | 
					
						
							|  |  |  |                       size='md' | 
					
						
							|  |  |  |                     /> | 
					
						
							|  |  |  |                   </div> | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |                 </div> | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |                 {docForm === DocForm.QA && !QATipHide && ( | 
					
						
							|  |  |  |                   <div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'> | 
					
						
							|  |  |  |                     {t('datasetCreation.stepTwo.QATip')} | 
					
						
							| 
									
										
										
										
											2024-06-20 11:05:08 +08:00
										 |  |  |                     <RiCloseLine className='w-4 h-4 text-gray-500 cursor-pointer' onClick={() => setQATipHide(true)} /> | 
					
						
							| 
									
										
										
										
											2023-08-18 17:37:31 +08:00
										 |  |  |                   </div> | 
					
						
							|  |  |  |                 )} | 
					
						
							| 
									
										
										
										
											2023-07-28 20:47:15 +08:00
										 |  |  |               </div> | 
					
						
							|  |  |  |             )} | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |             {/* Embedding model */} | 
					
						
							|  |  |  |             {indexType === IndexingType.QUALIFIED && ( | 
					
						
							|  |  |  |               <div className='mb-2'> | 
					
						
							|  |  |  |                 <div className={cn(s.label, datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div> | 
					
						
							|  |  |  |                 <ModelSelector | 
					
						
							|  |  |  |                   readonly={!!datasetId} | 
					
						
							|  |  |  |                   defaultModel={embeddingModel} | 
					
						
							|  |  |  |                   modelList={embeddingModelList} | 
					
						
							|  |  |  |                   onSelect={(model: DefaultModel) => { | 
					
						
							|  |  |  |                     setEmbeddingModel(model) | 
					
						
							|  |  |  |                   }} | 
					
						
							|  |  |  |                 /> | 
					
						
							|  |  |  |                 {!!datasetId && ( | 
					
						
							|  |  |  |                   <div className='mt-2 text-xs text-gray-500 font-medium'> | 
					
						
							| 
									
										
										
										
											2024-09-07 15:59:38 +07:00
										 |  |  |                     {t('datasetCreation.stepTwo.indexSettingTip')} | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |                     <Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                 )} | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             )} | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |             {/* Retrieval Method Config */} | 
					
						
							|  |  |  |             <div> | 
					
						
							|  |  |  |               {!datasetId | 
					
						
							|  |  |  |                 ? ( | 
					
						
							|  |  |  |                   <div className={s.label}> | 
					
						
							| 
									
										
										
										
											2024-09-04 14:41:47 +08:00
										 |  |  |                     <div className='shrink-0 mr-4'>{t('datasetSettings.form.retrievalSetting.title')}</div> | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |                     <div className='leading-[18px] text-xs font-normal text-gray-500'> | 
					
						
							| 
									
										
										
										
											2024-09-21 17:30:30 +08:00
										 |  |  |                       <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a> | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |                       {t('datasetSettings.form.retrievalSetting.longDescription')} | 
					
						
							|  |  |  |                     </div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |                 : ( | 
					
						
							|  |  |  |                   <div className={cn(s.label, 'flex justify-between items-center')}> | 
					
						
							|  |  |  |                     <div>{t('datasetSettings.form.retrievalSetting.title')}</div> | 
					
						
							|  |  |  |                   </div> | 
					
						
							|  |  |  |                 )} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |               <div className='max-w-[640px]'> | 
					
						
							| 
									
										
										
										
											2024-08-27 11:25:27 +08:00
										 |  |  |                 { | 
					
						
							|  |  |  |                   getIndexing_technique() === IndexingType.QUALIFIED | 
					
						
							|  |  |  |                     ? ( | 
					
						
							|  |  |  |                       <RetrievalMethodConfig | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |                         value={retrievalConfig} | 
					
						
							| 
									
										
										
										
											2024-08-27 11:25:27 +08:00
										 |  |  |                         onChange={setRetrievalConfig} | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |                       /> | 
					
						
							| 
									
										
										
										
											2024-08-27 11:25:27 +08:00
										 |  |  |                     ) | 
					
						
							|  |  |  |                     : ( | 
					
						
							|  |  |  |                       <EconomicalRetrievalMethodConfig | 
					
						
							|  |  |  |                         value={retrievalConfig} | 
					
						
							|  |  |  |                         onChange={setRetrievalConfig} | 
					
						
							|  |  |  |                       /> | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-11-18 11:53:35 +08:00
										 |  |  |               </div> | 
					
						
							|  |  |  |             </div> | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |             <div className={s.source}> | 
					
						
							|  |  |  |               <div className={s.sourceContent}> | 
					
						
							|  |  |  |                 {dataSourceType === DataSourceType.FILE && ( | 
					
						
							|  |  |  |                   <> | 
					
						
							|  |  |  |                     <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.fileSource')}</div> | 
					
						
							|  |  |  |                     <div className='flex items-center text-sm leading-6 font-medium text-gray-800'> | 
					
						
							| 
									
										
										
										
											2023-09-24 14:35:20 +08:00
										 |  |  |                       <span className={cn(s.fileIcon, files.length && s[files[0].extension || ''])} /> | 
					
						
							| 
									
										
										
										
											2023-06-21 09:44:01 +08:00
										 |  |  |                       {getFileName(files[0].name || '')} | 
					
						
							|  |  |  |                       {files.length > 1 && ( | 
					
						
							|  |  |  |                         <span className={s.sourceCount}> | 
					
						
							|  |  |  |                           <span>{t('datasetCreation.stepTwo.other')}</span> | 
					
						
							|  |  |  |                           <span>{files.length - 1}</span> | 
					
						
							|  |  |  |                           <span>{t('datasetCreation.stepTwo.fileUnit')}</span> | 
					
						
							|  |  |  |                         </span> | 
					
						
							|  |  |  |                       )} | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |                     </div> | 
					
						
							|  |  |  |                   </> | 
					
						
							|  |  |  |                 )} | 
					
						
							|  |  |  |                 {dataSourceType === DataSourceType.NOTION && ( | 
					
						
							|  |  |  |                   <> | 
					
						
							|  |  |  |                     <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.notionSource')}</div> | 
					
						
							|  |  |  |                     <div className='flex items-center text-sm leading-6 font-medium text-gray-800'> | 
					
						
							|  |  |  |                       <NotionIcon | 
					
						
							|  |  |  |                         className='shrink-0 mr-1' | 
					
						
							|  |  |  |                         type='page' | 
					
						
							|  |  |  |                         src={notionPages[0]?.page_icon} | 
					
						
							|  |  |  |                       /> | 
					
						
							|  |  |  |                       {notionPages[0]?.page_name} | 
					
						
							|  |  |  |                       {notionPages.length > 1 && ( | 
					
						
							|  |  |  |                         <span className={s.sourceCount}> | 
					
						
							|  |  |  |                           <span>{t('datasetCreation.stepTwo.other')}</span> | 
					
						
							|  |  |  |                           <span>{notionPages.length - 1}</span> | 
					
						
							|  |  |  |                           <span>{t('datasetCreation.stepTwo.notionUnit')}</span> | 
					
						
							|  |  |  |                         </span> | 
					
						
							|  |  |  |                       )} | 
					
						
							|  |  |  |                     </div> | 
					
						
							| 
									
										
										
										
											2024-06-14 22:02:41 +08:00
										 |  |  |                   </> | 
					
						
							|  |  |  |                 )} | 
					
						
							|  |  |  |                 {dataSourceType === DataSourceType.WEB && ( | 
					
						
							|  |  |  |                   <> | 
					
						
							|  |  |  |                     <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.websiteSource')}</div> | 
					
						
							|  |  |  |                     <div className='flex items-center text-sm leading-6 font-medium text-gray-800'> | 
					
						
							|  |  |  |                       <Globe01 className='shrink-0 mr-1' /> | 
					
						
							|  |  |  |                       <span className='grow w-0 truncate'>{websitePages[0].source_url}</span> | 
					
						
							|  |  |  |                       {websitePages.length > 1 && ( | 
					
						
							|  |  |  |                         <span className={s.sourceCount}> | 
					
						
							|  |  |  |                           <span>{t('datasetCreation.stepTwo.other')}</span> | 
					
						
							|  |  |  |                           <span>{websitePages.length - 1}</span> | 
					
						
							|  |  |  |                           <span>{t('datasetCreation.stepTwo.webpageUnit')}</span> | 
					
						
							|  |  |  |                         </span> | 
					
						
							|  |  |  |                       )} | 
					
						
							|  |  |  |                     </div> | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |                   </> | 
					
						
							|  |  |  |                 )} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |               </div> | 
					
						
							|  |  |  |               <div className={s.divider} /> | 
					
						
							| 
									
										
										
										
											2023-06-16 21:47:51 +08:00
										 |  |  |               <div className={s.segmentCount}> | 
					
						
							| 
									
										
										
										
											2024-09-07 15:59:38 +07:00
										 |  |  |                 <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.estimateSegment')}</div> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                 <div className='flex items-center text-sm leading-6 font-medium text-gray-800'> | 
					
						
							|  |  |  |                   { | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |                     fileIndexingEstimate | 
					
						
							|  |  |  |                       ? ( | 
					
						
							|  |  |  |                         <div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.total_segments)} </div> | 
					
						
							|  |  |  |                       ) | 
					
						
							|  |  |  |                       : ( | 
					
						
							|  |  |  |                         <div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div> | 
					
						
							|  |  |  |                       ) | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |                   } | 
					
						
							|  |  |  |                 </div> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             </div> | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |             {!isSetting | 
					
						
							|  |  |  |               ? ( | 
					
						
							|  |  |  |                 <div className='flex items-center mt-8 py-2'> | 
					
						
							| 
									
										
										
										
											2024-03-08 19:27:02 +08:00
										 |  |  |                   <Button onClick={() => onStepChange && onStepChange(-1)}>{t('datasetCreation.stepTwo.previousStep')}</Button> | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |                   <div className={s.divider} /> | 
					
						
							| 
									
										
										
										
											2024-06-19 14:13:16 +08:00
										 |  |  |                   <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button> | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |                 </div> | 
					
						
							|  |  |  |               ) | 
					
						
							|  |  |  |               : ( | 
					
						
							|  |  |  |                 <div className='flex items-center mt-8 py-2'> | 
					
						
							| 
									
										
										
										
											2024-06-19 14:13:16 +08:00
										 |  |  |                   <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button> | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |                   <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button> | 
					
						
							|  |  |  |                 </div> | 
					
						
							|  |  |  |               )} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           </div> | 
					
						
							|  |  |  |         </div> | 
					
						
							|  |  |  |       </div> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |       <FloatRightContainer isMobile={isMobile} isOpen={showPreview} onClose={hidePreview} footer={null}> | 
					
						
							| 
									
										
										
										
											2023-12-27 17:59:50 +08:00
										 |  |  |         {showPreview && <div ref={previewScrollRef} className={cn(s.previewWrap, isMobile && s.isMobile, 'relative h-full overflow-y-scroll border-l border-[#F2F4F7]')}> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |           <div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`)}> | 
					
						
							|  |  |  |             <div className='flex items-center justify-between px-8'> | 
					
						
							|  |  |  |               <div className='grow flex items-center'> | 
					
						
							|  |  |  |                 <div>{t('datasetCreation.stepTwo.previewTitle')}</div> | 
					
						
							|  |  |  |                 {docForm === DocForm.QA && !previewSwitched && ( | 
					
						
							| 
									
										
										
										
											2024-10-14 13:32:13 +08:00
										 |  |  |                   <Button className='ml-2' variant='secondary-accent' onClick={() => previewSwitch()}>{t('datasetCreation.stepTwo.previewButton')}</Button> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |                 )} | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |               <div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}> | 
					
						
							|  |  |  |                 <XMarkIcon className='h-4 w-4'></XMarkIcon> | 
					
						
							| 
									
										
										
										
											2023-06-06 10:52:02 +08:00
										 |  |  |               </div> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |             </div> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |             {docForm === DocForm.QA && !previewSwitched && ( | 
					
						
							|  |  |  |               <div className='px-8 pr-12 text-xs text-gray-500'> | 
					
						
							|  |  |  |                 <span>{t('datasetCreation.stepTwo.previewSwitchTipStart')}</span> | 
					
						
							|  |  |  |                 <span className='text-amber-600'>{t('datasetCreation.stepTwo.previewSwitchTipEnd')}</span> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             )} | 
					
						
							|  |  |  |           </div> | 
					
						
							|  |  |  |           <div className='my-4 px-8 space-y-4'> | 
					
						
							|  |  |  |             {previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && ( | 
					
						
							|  |  |  |               <> | 
					
						
							|  |  |  |                 {fileIndexingEstimate?.qa_preview.map((item, index) => ( | 
					
						
							|  |  |  |                   <PreviewItem type={PreviewType.QA} key={item.question} qa={item} index={index + 1} /> | 
					
						
							|  |  |  |                 ))} | 
					
						
							|  |  |  |               </> | 
					
						
							|  |  |  |             )} | 
					
						
							|  |  |  |             {(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && ( | 
					
						
							|  |  |  |               <> | 
					
						
							|  |  |  |                 {fileIndexingEstimate?.preview.map((item, index) => ( | 
					
						
							|  |  |  |                   <PreviewItem type={PreviewType.TEXT} key={item} content={item} index={index + 1} /> | 
					
						
							|  |  |  |                 ))} | 
					
						
							|  |  |  |               </> | 
					
						
							|  |  |  |             )} | 
					
						
							|  |  |  |             {previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && ( | 
					
						
							|  |  |  |               <div className='flex items-center justify-center h-[200px]'> | 
					
						
							|  |  |  |                 <Loading type='area' /> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             )} | 
					
						
							|  |  |  |             {!previewSwitched && !fileIndexingEstimate?.preview && ( | 
					
						
							|  |  |  |               <div className='flex items-center justify-center h-[200px]'> | 
					
						
							|  |  |  |                 <Loading type='area' /> | 
					
						
							|  |  |  |               </div> | 
					
						
							|  |  |  |             )} | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |           </div> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |         </div>} | 
					
						
							|  |  |  |         {!showPreview && ( | 
					
						
							|  |  |  |           <div className={cn(s.sideTip)}> | 
					
						
							|  |  |  |             <div className={s.tipCard}> | 
					
						
							|  |  |  |               <span className={s.icon} /> | 
					
						
							|  |  |  |               <div className={s.title}>{t('datasetCreation.stepTwo.sideTipTitle')}</div> | 
					
						
							|  |  |  |               <div className={s.content}> | 
					
						
							|  |  |  |                 <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP1')}</p> | 
					
						
							|  |  |  |                 <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP2')}</p> | 
					
						
							|  |  |  |                 <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP3')}</p> | 
					
						
							|  |  |  |                 <p>{t('datasetCreation.stepTwo.sideTipP4')}</p> | 
					
						
							|  |  |  |               </div> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |             </div> | 
					
						
							|  |  |  |           </div> | 
					
						
							| 
									
										
										
										
											2023-11-27 11:47:48 +08:00
										 |  |  |         )} | 
					
						
							|  |  |  |       </FloatRightContainer> | 
					
						
							| 
									
										
										
										
											2023-05-15 08:51:32 +08:00
										 |  |  |     </div> | 
					
						
							|  |  |  |   ) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | export default StepTwo |