2025-05-28 10:07:12 +08:00
|
|
|
import React, { useState } from 'react'
|
2025-05-22 23:05:58 +08:00
|
|
|
import { useTranslation } from 'react-i18next'
|
2025-06-13 10:54:31 +08:00
|
|
|
import PreviewContainer from '../../../preview/container'
|
2025-05-22 23:05:58 +08:00
|
|
|
import { PreviewHeader } from '../../../preview/header'
|
|
|
|
import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
|
2025-05-28 10:07:12 +08:00
|
|
|
import { ChunkingMode } from '@/models/datasets'
|
2025-05-22 23:05:58 +08:00
|
|
|
import type { NotionPage } from '@/models/common'
|
|
|
|
import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
|
|
|
|
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
|
|
|
import { ChunkContainer, QAPreview } from '../../../chunk'
|
|
|
|
import { FormattedText } from '../../../formatted-text/formatted'
|
|
|
|
import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
|
|
|
|
import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
|
|
|
|
import { RiSearchEyeLine } from '@remixicon/react'
|
|
|
|
import Badge from '@/app/components/base/badge'
|
2025-05-23 10:29:59 +08:00
|
|
|
import Button from '@/app/components/base/button'
|
2025-05-28 10:07:12 +08:00
|
|
|
import { DatasourceType } from '@/models/pipeline'
|
2025-05-22 23:05:58 +08:00
|
|
|
|
|
|
|
type ChunkPreviewProps = {
|
2025-06-12 15:13:15 +08:00
|
|
|
dataSourceType: DatasourceType
|
2025-05-22 23:05:58 +08:00
|
|
|
files: CustomFile[]
|
2025-06-06 10:08:19 +08:00
|
|
|
onlineDocuments: NotionPage[]
|
2025-05-22 23:05:58 +08:00
|
|
|
websitePages: CrawlResultItem[]
|
|
|
|
isIdle: boolean
|
|
|
|
isPending: boolean
|
|
|
|
estimateData: FileIndexingEstimateResponse | undefined
|
2025-05-23 10:29:59 +08:00
|
|
|
onPreview: () => void
|
2025-05-28 13:44:37 +08:00
|
|
|
handlePreviewFileChange: (file: DocumentItem) => void
|
2025-06-06 10:08:19 +08:00
|
|
|
handlePreviewOnlineDocumentChange: (page: NotionPage) => void
|
2025-05-28 13:44:37 +08:00
|
|
|
handlePreviewWebsitePageChange: (page: CrawlResultItem) => void
|
2025-05-22 23:05:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const ChunkPreview = ({
|
2025-06-12 15:13:15 +08:00
|
|
|
dataSourceType,
|
2025-05-22 23:05:58 +08:00
|
|
|
files,
|
2025-06-06 10:08:19 +08:00
|
|
|
onlineDocuments,
|
2025-05-22 23:05:58 +08:00
|
|
|
websitePages,
|
|
|
|
isIdle,
|
|
|
|
isPending,
|
|
|
|
estimateData,
|
2025-05-23 10:29:59 +08:00
|
|
|
onPreview,
|
2025-05-28 13:44:37 +08:00
|
|
|
handlePreviewFileChange,
|
2025-06-06 10:08:19 +08:00
|
|
|
handlePreviewOnlineDocumentChange,
|
2025-05-28 13:44:37 +08:00
|
|
|
handlePreviewWebsitePageChange,
|
2025-05-22 23:05:58 +08:00
|
|
|
}: ChunkPreviewProps) => {
|
|
|
|
const { t } = useTranslation()
|
|
|
|
const currentDocForm = useDatasetDetailContextWithSelector(s => s.dataset?.doc_form)
|
|
|
|
|
|
|
|
const [previewFile, setPreviewFile] = useState<DocumentItem>(files[0] as DocumentItem)
|
2025-06-06 10:08:19 +08:00
|
|
|
const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0])
|
2025-05-22 23:05:58 +08:00
|
|
|
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])
|
|
|
|
|
|
|
|
return (
|
|
|
|
<PreviewContainer
|
|
|
|
header={<PreviewHeader
|
|
|
|
title={t('datasetCreation.stepTwo.preview')}
|
|
|
|
>
|
|
|
|
<div className='flex items-center gap-1'>
|
2025-05-28 10:07:12 +08:00
|
|
|
{dataSourceType === DatasourceType.localFile
|
2025-05-22 23:05:58 +08:00
|
|
|
&& <PreviewDocumentPicker
|
|
|
|
files={files as Array<Required<CustomFile>>}
|
|
|
|
onChange={(selected) => {
|
|
|
|
setPreviewFile(selected)
|
2025-05-28 13:44:37 +08:00
|
|
|
handlePreviewFileChange(selected)
|
2025-05-22 23:05:58 +08:00
|
|
|
}}
|
|
|
|
value={previewFile}
|
|
|
|
/>
|
|
|
|
}
|
2025-05-28 10:07:12 +08:00
|
|
|
{dataSourceType === DatasourceType.onlineDocument
|
2025-05-22 23:05:58 +08:00
|
|
|
&& <PreviewDocumentPicker
|
|
|
|
files={
|
2025-06-06 10:08:19 +08:00
|
|
|
onlineDocuments.map(page => ({
|
2025-05-22 23:05:58 +08:00
|
|
|
id: page.page_id,
|
|
|
|
name: page.page_name,
|
|
|
|
extension: 'md',
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
onChange={(selected) => {
|
2025-06-06 10:08:19 +08:00
|
|
|
const selectedPage = onlineDocuments.find(page => page.page_id === selected.id)
|
|
|
|
setPreviewOnlineDocument(selectedPage!)
|
|
|
|
handlePreviewOnlineDocumentChange(selectedPage!)
|
2025-05-22 23:05:58 +08:00
|
|
|
}}
|
|
|
|
value={{
|
2025-06-06 10:08:19 +08:00
|
|
|
id: previewOnlineDocument?.page_id || '',
|
|
|
|
name: previewOnlineDocument?.page_name || '',
|
2025-05-22 23:05:58 +08:00
|
|
|
extension: 'md',
|
|
|
|
}}
|
|
|
|
/>
|
|
|
|
}
|
2025-05-28 10:07:12 +08:00
|
|
|
{dataSourceType === DatasourceType.websiteCrawl
|
2025-05-22 23:05:58 +08:00
|
|
|
&& <PreviewDocumentPicker
|
|
|
|
files={
|
|
|
|
websitePages.map(page => ({
|
|
|
|
id: page.source_url,
|
|
|
|
name: page.title,
|
|
|
|
extension: 'md',
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
onChange={(selected) => {
|
|
|
|
const selectedPage = websitePages.find(page => page.source_url === selected.id)
|
|
|
|
setPreviewWebsitePage(selectedPage!)
|
2025-05-28 13:44:37 +08:00
|
|
|
handlePreviewWebsitePageChange(selectedPage!)
|
2025-05-22 23:05:58 +08:00
|
|
|
}}
|
|
|
|
value={
|
|
|
|
{
|
|
|
|
id: previewWebsitePage?.source_url || '',
|
|
|
|
name: previewWebsitePage?.title || '',
|
|
|
|
extension: 'md',
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/>
|
|
|
|
}
|
|
|
|
{
|
|
|
|
currentDocForm !== ChunkingMode.qa
|
|
|
|
&& <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
|
|
|
|
count: estimateData?.total_segments || 0,
|
|
|
|
}) as string}
|
|
|
|
/>
|
|
|
|
}
|
|
|
|
</div>
|
|
|
|
</PreviewHeader>}
|
2025-05-28 13:44:37 +08:00
|
|
|
className='relative flex h-full w-full shrink-0'
|
2025-05-22 23:05:58 +08:00
|
|
|
mainClassName='space-y-6'
|
|
|
|
>
|
2025-06-13 11:38:26 +08:00
|
|
|
{!isPending && currentDocForm === ChunkingMode.qa && estimateData?.qa_preview && (
|
2025-05-22 23:05:58 +08:00
|
|
|
estimateData?.qa_preview.map((item, index) => (
|
|
|
|
<ChunkContainer
|
|
|
|
key={item.question}
|
|
|
|
label={`Chunk-${index + 1}`}
|
|
|
|
characterCount={item.question.length + item.answer.length}
|
|
|
|
>
|
|
|
|
<QAPreview qa={item} />
|
|
|
|
</ChunkContainer>
|
|
|
|
))
|
|
|
|
)}
|
2025-06-13 11:38:26 +08:00
|
|
|
{!isPending && currentDocForm === ChunkingMode.text && estimateData?.preview && (
|
2025-05-22 23:05:58 +08:00
|
|
|
estimateData?.preview.map((item, index) => (
|
|
|
|
<ChunkContainer
|
|
|
|
key={item.content}
|
|
|
|
label={`Chunk-${index + 1}`}
|
|
|
|
characterCount={item.content.length}
|
|
|
|
>
|
|
|
|
{item.content}
|
|
|
|
</ChunkContainer>
|
|
|
|
))
|
|
|
|
)}
|
2025-06-13 11:38:26 +08:00
|
|
|
{!isPending && currentDocForm === ChunkingMode.parentChild && estimateData?.preview && (
|
2025-05-22 23:05:58 +08:00
|
|
|
estimateData?.preview?.map((item, index) => {
|
|
|
|
const indexForLabel = index + 1
|
|
|
|
return (
|
|
|
|
<ChunkContainer
|
|
|
|
key={item.content}
|
|
|
|
label={`Chunk-${indexForLabel}`}
|
|
|
|
characterCount={item.content.length}
|
|
|
|
>
|
|
|
|
<FormattedText>
|
|
|
|
{item.child_chunks.map((child, index) => {
|
|
|
|
const indexForLabel = index + 1
|
|
|
|
return (
|
|
|
|
<PreviewSlice
|
|
|
|
key={child}
|
|
|
|
label={`C-${indexForLabel}`}
|
|
|
|
text={child}
|
|
|
|
tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
|
|
|
|
labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
|
|
|
|
dividerClassName='leading-7'
|
|
|
|
/>
|
|
|
|
)
|
|
|
|
})}
|
|
|
|
</FormattedText>
|
|
|
|
</ChunkContainer>
|
|
|
|
)
|
|
|
|
})
|
|
|
|
)}
|
2025-05-28 13:44:37 +08:00
|
|
|
{isIdle && (
|
2025-05-22 23:05:58 +08:00
|
|
|
<div className='flex h-full w-full items-center justify-center'>
|
2025-05-23 10:29:59 +08:00
|
|
|
<div className='flex flex-col items-center justify-center gap-3 pb-4'>
|
2025-05-22 23:05:58 +08:00
|
|
|
<RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
|
|
|
|
<p className='text-sm text-text-tertiary'>
|
|
|
|
{t('datasetCreation.stepTwo.previewChunkTip')}
|
|
|
|
</p>
|
2025-06-12 15:13:15 +08:00
|
|
|
<Button onClick={onPreview}>
|
2025-05-23 10:29:59 +08:00
|
|
|
{t('datasetPipeline.addDocuments.stepTwo.previewChunks')}
|
|
|
|
</Button>
|
2025-05-22 23:05:58 +08:00
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
)}
|
|
|
|
{isPending && (
|
2025-06-13 10:54:31 +08:00
|
|
|
<div className='h-full w-full space-y-6 overflow-hidden'>
|
2025-05-22 23:05:58 +08:00
|
|
|
{Array.from({ length: 10 }, (_, i) => (
|
|
|
|
<SkeletonContainer key={i}>
|
|
|
|
<SkeletonRow>
|
|
|
|
<SkeletonRectangle className='w-20' />
|
|
|
|
<SkeletonPoint />
|
|
|
|
<SkeletonRectangle className='w-24' />
|
|
|
|
</SkeletonRow>
|
|
|
|
<SkeletonRectangle className='w-full' />
|
|
|
|
<SkeletonRectangle className='w-full' />
|
|
|
|
<SkeletonRectangle className='w-[422px]' />
|
|
|
|
</SkeletonContainer>
|
|
|
|
))}
|
|
|
|
</div>
|
|
|
|
)}
|
|
|
|
</PreviewContainer>
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
export default React.memo(ChunkPreview)
|