2024-10-25 16:58:50 +08:00
|
|
|
import type { DataSourceNotionPage, DataSourceProvider } from './common'
|
2024-08-19 09:16:33 +08:00
|
|
|
import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
|
2024-04-24 15:02:29 +08:00
|
|
|
import type { Tag } from '@/app/components/base/tag-management/constant'
|
2024-12-26 12:01:51 +08:00
|
|
|
import type { IndexingType } from '@/app/components/datasets/create/step-two'
|
2025-03-18 11:01:06 +08:00
|
|
|
import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types'
|
|
|
|
import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
|
2025-06-11 16:38:42 +08:00
|
|
|
import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
|
2025-05-08 13:48:14 +08:00
|
|
|
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
|
2025-06-12 15:13:15 +08:00
|
|
|
import type { DatasourceType } from './pipeline'
|
2023-06-16 21:47:51 +08:00
|
|
|
|
|
|
|
export enum DataSourceType {
|
|
|
|
FILE = 'upload_file',
|
|
|
|
NOTION = 'notion_import',
|
2024-06-14 22:02:41 +08:00
|
|
|
WEB = 'website_crawl',
|
2023-06-16 21:47:51 +08:00
|
|
|
}
|
2023-05-15 08:51:32 +08:00
|
|
|
|
2025-02-06 14:26:16 +08:00
|
|
|
export enum DatasetPermission {
|
2025-02-17 17:05:13 +08:00
|
|
|
onlyMe = 'only_me',
|
|
|
|
allTeamMembers = 'all_team_members',
|
|
|
|
partialMembers = 'partial_members',
|
2025-02-06 14:26:16 +08:00
|
|
|
}
|
2024-07-09 17:47:54 +08:00
|
|
|
|
2024-12-26 12:01:51 +08:00
|
|
|
export enum ChunkingMode {
|
2025-02-17 17:05:13 +08:00
|
|
|
text = 'text_model', // General text
|
|
|
|
qa = 'qa_model', // General QA
|
|
|
|
parentChild = 'hierarchical_model', // Parent-Child
|
2025-06-11 16:38:42 +08:00
|
|
|
// graph = 'graph', // todo: Graph RAG
|
2024-12-26 12:01:51 +08:00
|
|
|
}
|
|
|
|
|
2025-03-18 11:01:06 +08:00
|
|
|
export type MetadataInDoc = {
|
|
|
|
value: string
|
|
|
|
id: string
|
|
|
|
type: MetadataFilteringVariableType
|
|
|
|
name: string
|
|
|
|
}
|
|
|
|
|
2025-05-07 11:30:13 +08:00
|
|
|
export type IconInfo = {
|
|
|
|
icon: string
|
|
|
|
icon_background?: string
|
|
|
|
icon_type: AppIconType
|
|
|
|
icon_url?: string
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type DataSet = {
|
|
|
|
id: string
|
|
|
|
name: string
|
2025-05-06 16:37:21 +08:00
|
|
|
indexing_status: DocumentIndexingStatus
|
2025-05-07 11:30:13 +08:00
|
|
|
icon_info: IconInfo
|
2023-05-15 08:51:32 +08:00
|
|
|
description: string
|
2024-07-09 17:47:54 +08:00
|
|
|
permission: DatasetPermission
|
2023-06-16 21:47:51 +08:00
|
|
|
data_source_type: DataSourceType
|
2024-12-26 12:01:51 +08:00
|
|
|
indexing_technique: IndexingType
|
2023-05-15 08:51:32 +08:00
|
|
|
created_by: string
|
|
|
|
updated_by: string
|
|
|
|
updated_at: number
|
|
|
|
app_count: number
|
2024-12-26 12:01:51 +08:00
|
|
|
doc_form: ChunkingMode
|
2023-05-15 08:51:32 +08:00
|
|
|
document_count: number
|
2025-05-06 16:37:21 +08:00
|
|
|
total_document_count: number
|
2025-06-11 10:24:07 +08:00
|
|
|
total_available_documents?: number
|
2023-05-15 08:51:32 +08:00
|
|
|
word_count: number
|
2024-09-30 15:38:43 +08:00
|
|
|
provider: string
|
2023-08-18 17:37:31 +08:00
|
|
|
embedding_model: string
|
|
|
|
embedding_model_provider: string
|
|
|
|
embedding_available: boolean
|
2023-11-18 11:53:35 +08:00
|
|
|
retrieval_model_dict: RetrievalConfig
|
|
|
|
retrieval_model: RetrievalConfig
|
2024-04-24 15:02:29 +08:00
|
|
|
tags: Tag[]
|
2025-02-06 14:26:16 +08:00
|
|
|
partial_member_list?: string[]
|
2024-09-30 15:38:43 +08:00
|
|
|
external_knowledge_info: {
|
|
|
|
external_knowledge_id: string
|
|
|
|
external_knowledge_api_id: string
|
|
|
|
external_knowledge_api_name: string
|
|
|
|
external_knowledge_api_endpoint: string
|
|
|
|
}
|
|
|
|
external_retrieval_model: {
|
|
|
|
top_k: number
|
|
|
|
score_threshold: number
|
|
|
|
score_threshold_enabled: boolean
|
|
|
|
}
|
2025-03-18 11:01:06 +08:00
|
|
|
built_in_field_enabled: boolean
|
|
|
|
doc_metadata?: MetadataInDoc[]
|
2025-05-13 15:35:21 +08:00
|
|
|
keyword_number?: number
|
2025-05-16 17:47:08 +08:00
|
|
|
pipeline_id?: string
|
2025-05-29 14:06:12 +08:00
|
|
|
is_published?: boolean // Indicates if the pipeline is published
|
2025-06-12 15:57:07 +08:00
|
|
|
runtime_mode: 'rag_pipeline' | 'general'
|
2024-09-30 15:38:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type ExternalAPIItem = {
|
|
|
|
id: string
|
|
|
|
tenant_id: string
|
|
|
|
name: string
|
|
|
|
description: string
|
|
|
|
settings: {
|
|
|
|
endpoint: string
|
|
|
|
api_key: string
|
|
|
|
}
|
|
|
|
dataset_bindings: { id: string; name: string }[]
|
|
|
|
created_by: string
|
|
|
|
created_at: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export type ExternalKnowledgeItem = {
|
|
|
|
id: string
|
|
|
|
name: string
|
|
|
|
description: string | null
|
|
|
|
provider: 'external'
|
|
|
|
permission: DatasetPermission
|
|
|
|
data_source_type: null
|
|
|
|
indexing_technique: null
|
|
|
|
app_count: number
|
|
|
|
document_count: number
|
|
|
|
word_count: number
|
|
|
|
created_by: string
|
|
|
|
created_at: string
|
|
|
|
updated_by: string
|
|
|
|
updated_at: string
|
|
|
|
tags: Tag[]
|
|
|
|
}
|
|
|
|
|
|
|
|
export type ExternalAPIDeleteResponse = {
|
|
|
|
result: 'success' | 'error'
|
|
|
|
}
|
|
|
|
|
|
|
|
export type ExternalAPIUsage = {
|
|
|
|
is_using: boolean
|
|
|
|
count: number
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2023-08-16 23:14:27 +08:00
|
|
|
export type CustomFile = File & {
|
|
|
|
id?: string
|
|
|
|
extension?: string
|
|
|
|
mime_type?: string
|
|
|
|
created_by?: string
|
|
|
|
created_at?: number
|
|
|
|
}
|
|
|
|
|
2024-12-26 12:01:51 +08:00
|
|
|
export type DocumentItem = {
|
|
|
|
id: string
|
|
|
|
name: string
|
|
|
|
extension: string
|
|
|
|
}
|
|
|
|
|
2024-06-14 22:02:41 +08:00
|
|
|
export type CrawlOptions = {
|
|
|
|
crawl_sub_pages: boolean
|
|
|
|
only_main_content: boolean
|
|
|
|
includes: string
|
|
|
|
excludes: string
|
|
|
|
limit: number | string
|
|
|
|
max_depth: number | string
|
2024-09-30 09:57:19 +08:00
|
|
|
use_sitemap: boolean
|
2024-06-14 22:02:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type CrawlResultItem = {
|
|
|
|
title: string
|
|
|
|
markdown: string
|
|
|
|
description: string
|
|
|
|
source_url: string
|
|
|
|
}
|
|
|
|
|
2025-06-23 15:38:24 +08:00
|
|
|
export type CrawlResult = {
|
|
|
|
data: CrawlResultItem[]
|
|
|
|
time_consuming: number | string
|
|
|
|
}
|
|
|
|
|
|
|
|
export enum CrawlStep {
|
|
|
|
init = 'init',
|
|
|
|
running = 'running',
|
|
|
|
finished = 'finished',
|
|
|
|
}
|
|
|
|
|
2023-08-16 23:14:27 +08:00
|
|
|
export type FileItem = {
|
|
|
|
fileID: string
|
|
|
|
file: CustomFile
|
|
|
|
progress: number
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2025-01-09 17:44:11 +08:00
|
|
|
export type FetchDatasetsParams = {
|
|
|
|
url: string
|
|
|
|
params: {
|
|
|
|
page: number
|
2025-02-10 12:28:36 +08:00
|
|
|
ids?: string[]
|
2025-01-09 17:44:11 +08:00
|
|
|
tag_ids?: string[]
|
2025-02-10 12:28:36 +08:00
|
|
|
limit?: number
|
|
|
|
include_all?: boolean
|
2025-01-09 17:44:11 +08:00
|
|
|
keyword?: string
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-16 10:50:31 +08:00
|
|
|
export type DatasetListRequest = {
|
|
|
|
initialPage: number
|
2025-06-18 15:05:21 +08:00
|
|
|
tag_ids?: string[]
|
2025-05-16 10:50:31 +08:00
|
|
|
limit: number
|
2025-06-18 15:05:21 +08:00
|
|
|
include_all?: boolean
|
|
|
|
keyword?: string
|
2025-05-16 10:50:31 +08:00
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type DataSetListResponse = {
|
|
|
|
data: DataSet[]
|
2023-05-20 21:55:47 +08:00
|
|
|
has_more: boolean
|
|
|
|
limit: number
|
|
|
|
page: number
|
|
|
|
total: number
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2024-09-30 15:38:43 +08:00
|
|
|
export type ExternalAPIListResponse = {
|
|
|
|
data: ExternalAPIItem[]
|
|
|
|
has_more: boolean
|
|
|
|
limit: number
|
|
|
|
page: number
|
|
|
|
total: number
|
|
|
|
}
|
|
|
|
|
2023-07-28 20:47:15 +08:00
|
|
|
export type QA = {
|
|
|
|
question: string
|
|
|
|
answer: string
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type IndexingEstimateResponse = {
|
|
|
|
tokens: number
|
|
|
|
total_price: number
|
|
|
|
currency: string
|
|
|
|
total_segments: number
|
2024-12-26 12:01:51 +08:00
|
|
|
preview: Array<{ content: string; child_chunks: string[] }>
|
2023-07-28 20:47:15 +08:00
|
|
|
qa_preview?: QA[]
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2023-06-16 21:47:51 +08:00
|
|
|
export type FileIndexingEstimateResponse = {
|
2023-05-15 08:51:32 +08:00
|
|
|
total_nodes: number
|
2023-06-16 21:47:51 +08:00
|
|
|
} & IndexingEstimateResponse
|
2023-05-15 08:51:32 +08:00
|
|
|
|
|
|
|
export type IndexingStatusResponse = {
|
|
|
|
id: string
|
|
|
|
indexing_status: DocumentIndexingStatus
|
|
|
|
processing_started_at: number
|
|
|
|
parsing_completed_at: number
|
|
|
|
cleaning_completed_at: number
|
|
|
|
splitting_completed_at: number
|
|
|
|
completed_at: any
|
|
|
|
paused_at: any
|
|
|
|
error: any
|
|
|
|
stopped_at: any
|
|
|
|
completed_segments: number
|
|
|
|
total_segments: number
|
|
|
|
}
|
2023-06-16 21:47:51 +08:00
|
|
|
export type IndexingStatusBatchResponse = {
|
|
|
|
data: IndexingStatusResponse[]
|
|
|
|
}
|
2023-05-15 08:51:32 +08:00
|
|
|
|
2024-12-26 12:01:51 +08:00
|
|
|
export enum ProcessMode {
|
|
|
|
general = 'custom',
|
|
|
|
parentChild = 'hierarchical',
|
|
|
|
}
|
|
|
|
|
|
|
|
export type ParentMode = 'full-doc' | 'paragraph'
|
2023-05-15 08:51:32 +08:00
|
|
|
|
|
|
|
export type ProcessRuleResponse = {
|
|
|
|
mode: ProcessMode
|
|
|
|
rules: Rules
|
2024-12-02 16:29:25 +09:00
|
|
|
limits: Limits
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type Rules = {
|
|
|
|
pre_processing_rules: PreProcessingRule[]
|
|
|
|
segmentation: Segmentation
|
2024-12-26 12:01:51 +08:00
|
|
|
parent_mode: ParentMode
|
|
|
|
subchunk_segmentation: Segmentation
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2024-12-02 16:29:25 +09:00
|
|
|
export type Limits = {
|
|
|
|
indexing_max_segmentation_tokens_length: number
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type PreProcessingRule = {
|
|
|
|
id: string
|
|
|
|
enabled: boolean
|
|
|
|
}
|
|
|
|
|
|
|
|
export type Segmentation = {
|
|
|
|
separator: string
|
|
|
|
max_tokens: number
|
2024-12-26 12:01:51 +08:00
|
|
|
chunk_overlap?: number
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export const DocumentIndexingStatusList = [
|
|
|
|
'waiting',
|
|
|
|
'parsing',
|
|
|
|
'cleaning',
|
|
|
|
'splitting',
|
|
|
|
'indexing',
|
|
|
|
'paused',
|
|
|
|
'error',
|
|
|
|
'completed',
|
|
|
|
] as const
|
|
|
|
|
|
|
|
export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
|
|
|
|
|
|
|
|
export const DisplayStatusList = [
|
2023-06-16 21:47:51 +08:00
|
|
|
'queuing',
|
|
|
|
'indexing',
|
|
|
|
'paused',
|
|
|
|
'error',
|
|
|
|
'available',
|
|
|
|
'enabled',
|
|
|
|
'disabled',
|
|
|
|
'archived',
|
|
|
|
] as const
|
|
|
|
|
|
|
|
export type DocumentDisplayStatus = typeof DisplayStatusList[number]
|
2023-05-15 08:51:32 +08:00
|
|
|
|
|
|
|
export type DataSourceInfo = {
|
|
|
|
upload_file: {
|
|
|
|
id: string
|
|
|
|
name: string
|
|
|
|
size: number
|
|
|
|
mime_type: string
|
|
|
|
created_at: number
|
|
|
|
created_by: string
|
|
|
|
extension: string
|
|
|
|
}
|
2023-08-16 10:31:08 +08:00
|
|
|
notion_page_icon?: string
|
2024-10-25 16:58:50 +08:00
|
|
|
notion_workspace_id?: string
|
|
|
|
notion_page_id?: string
|
|
|
|
provider?: DataSourceProvider
|
2024-06-14 22:02:41 +08:00
|
|
|
job_id: string
|
|
|
|
url: string
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type InitialDocumentDetail = {
|
|
|
|
id: string
|
2023-06-16 21:47:51 +08:00
|
|
|
batch: string
|
2023-05-15 08:51:32 +08:00
|
|
|
position: number
|
|
|
|
dataset_id: string
|
2025-06-12 15:13:15 +08:00
|
|
|
data_source_type: DataSourceType | DatasourceType
|
2023-05-15 08:51:32 +08:00
|
|
|
data_source_info: DataSourceInfo
|
|
|
|
dataset_process_rule_id: string
|
|
|
|
name: string
|
|
|
|
created_from: 'api' | 'web'
|
|
|
|
created_by: string
|
|
|
|
created_at: number
|
|
|
|
indexing_status: DocumentIndexingStatus
|
|
|
|
display_status: DocumentDisplayStatus
|
2023-06-16 21:47:51 +08:00
|
|
|
completed_segments?: number
|
|
|
|
total_segments?: number
|
2024-12-26 12:01:51 +08:00
|
|
|
doc_form: ChunkingMode
|
2024-08-28 08:45:51 +08:00
|
|
|
doc_language: string
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type SimpleDocumentDetail = InitialDocumentDetail & {
|
|
|
|
enabled: boolean
|
|
|
|
word_count: number
|
2024-12-26 12:01:51 +08:00
|
|
|
is_qa: boolean // TODO waiting for backend to add this field
|
2023-05-15 08:51:32 +08:00
|
|
|
error?: string | null
|
|
|
|
archived: boolean
|
|
|
|
updated_at: number
|
|
|
|
hit_count: number
|
|
|
|
dataset_process_rule_id?: string
|
2024-06-04 15:10:34 +08:00
|
|
|
data_source_detail_dict?: {
|
|
|
|
upload_file: {
|
|
|
|
name: string
|
|
|
|
extension: string
|
|
|
|
}
|
|
|
|
}
|
2025-03-18 11:01:06 +08:00
|
|
|
doc_metadata?: MetadataItemWithValue[]
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type DocumentListResponse = {
|
|
|
|
data: SimpleDocumentDetail[]
|
|
|
|
has_more: boolean
|
|
|
|
total: number
|
|
|
|
page: number
|
|
|
|
limit: number
|
|
|
|
}
|
|
|
|
|
2023-10-07 17:42:16 +08:00
|
|
|
export type DocumentReq = {
|
2023-05-15 08:51:32 +08:00
|
|
|
original_document_id?: string
|
2025-05-07 11:30:13 +08:00
|
|
|
indexing_technique?: IndexingType
|
2024-12-26 12:01:51 +08:00
|
|
|
doc_form: ChunkingMode
|
2023-08-18 17:37:31 +08:00
|
|
|
doc_language: string
|
2023-05-15 08:51:32 +08:00
|
|
|
process_rule: ProcessRule
|
|
|
|
}
|
|
|
|
|
2023-10-07 17:42:16 +08:00
|
|
|
export type CreateDocumentReq = DocumentReq & {
|
|
|
|
data_source: DataSource
|
2023-11-18 11:53:35 +08:00
|
|
|
retrieval_model: RetrievalConfig
|
2024-09-04 14:41:47 +08:00
|
|
|
embedding_model: string
|
|
|
|
embedding_model_provider: string
|
2023-10-07 17:42:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
|
|
|
|
dataset_id: string
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type DataSource = {
|
2023-06-16 21:47:51 +08:00
|
|
|
type: DataSourceType
|
|
|
|
info_list: {
|
|
|
|
data_source_type: DataSourceType
|
|
|
|
notion_info_list?: NotionInfo[]
|
|
|
|
file_info_list?: {
|
|
|
|
file_ids: string[]
|
|
|
|
}
|
2024-06-14 22:02:41 +08:00
|
|
|
website_info_list?: {
|
|
|
|
provider: string
|
|
|
|
job_id: string
|
|
|
|
urls: string[]
|
|
|
|
}
|
2023-06-16 21:47:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export type NotionInfo = {
|
|
|
|
workspace_id: string
|
|
|
|
pages: DataSourceNotionPage[]
|
|
|
|
}
|
|
|
|
export type NotionPage = {
|
|
|
|
page_id: string
|
2023-05-15 08:51:32 +08:00
|
|
|
type: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export type ProcessRule = {
|
2024-12-26 12:01:51 +08:00
|
|
|
mode: ProcessMode
|
2023-05-15 08:51:32 +08:00
|
|
|
rules: Rules
|
|
|
|
}
|
|
|
|
|
|
|
|
export type createDocumentResponse = {
|
|
|
|
dataset?: DataSet
|
2023-06-16 21:47:51 +08:00
|
|
|
batch: string
|
|
|
|
documents: InitialDocumentDetail[]
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2024-12-26 12:01:51 +08:00
|
|
|
export type PrecessRule = {
|
|
|
|
mode: ProcessMode
|
|
|
|
rules: Rules
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type FullDocumentDetail = SimpleDocumentDetail & {
|
|
|
|
batch: string
|
|
|
|
created_api_request_id: string
|
|
|
|
processing_started_at: number
|
|
|
|
parsing_completed_at: number
|
|
|
|
cleaning_completed_at: number
|
|
|
|
splitting_completed_at: number
|
|
|
|
tokens: number
|
|
|
|
indexing_latency: number
|
|
|
|
completed_at: number
|
|
|
|
paused_by: string
|
|
|
|
paused_at: number
|
|
|
|
stopped_at: number
|
|
|
|
indexing_status: string
|
|
|
|
disabled_at: number
|
|
|
|
disabled_by: string
|
|
|
|
archived_reason: 'rule_modified' | 're_upload'
|
|
|
|
archived_by: string
|
|
|
|
archived_at: number
|
2023-08-21 13:57:18 +08:00
|
|
|
doc_type?: DocType | null | 'others'
|
2023-05-15 08:51:32 +08:00
|
|
|
doc_metadata?: DocMetadata | null
|
|
|
|
segment_count: number
|
2024-12-26 12:01:51 +08:00
|
|
|
dataset_process_rule: PrecessRule
|
|
|
|
document_process_rule: ProcessRule
|
2023-05-15 08:51:32 +08:00
|
|
|
[key: string]: any
|
|
|
|
}
|
|
|
|
|
|
|
|
export type DocMetadata = {
|
|
|
|
title: string
|
|
|
|
language: string
|
|
|
|
author: string
|
|
|
|
publisher: string
|
|
|
|
publicationDate: string
|
|
|
|
ISBN: string
|
|
|
|
category: string
|
|
|
|
[key: string]: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export const CUSTOMIZABLE_DOC_TYPES = [
|
2023-06-16 21:47:51 +08:00
|
|
|
'book',
|
|
|
|
'web_page',
|
|
|
|
'paper',
|
|
|
|
'social_media_post',
|
|
|
|
'personal_document',
|
|
|
|
'business_document',
|
|
|
|
'im_chat_log',
|
|
|
|
] as const
|
|
|
|
|
|
|
|
export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
|
|
|
|
|
|
|
|
export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
|
|
|
|
export type FixedDocType = typeof FIXED_DOC_TYPES[number]
|
|
|
|
export type DocType = CustomizableDocType | FixedDocType
|
2023-05-15 08:51:32 +08:00
|
|
|
|
|
|
|
export type DocumentDetailResponse = FullDocumentDetail
|
|
|
|
|
|
|
|
export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
|
|
|
|
export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
|
|
|
|
|
|
|
|
export type SegmentsQuery = {
|
2024-12-26 12:01:51 +08:00
|
|
|
page?: string
|
2023-05-15 08:51:32 +08:00
|
|
|
limit: number
|
|
|
|
// status?: SegmentStatus
|
|
|
|
hit_count_gte?: number
|
|
|
|
keyword?: string
|
2024-12-26 12:01:51 +08:00
|
|
|
enabled?: boolean | 'all'
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type SegmentDetailModel = {
|
|
|
|
id: string
|
|
|
|
position: number
|
|
|
|
document_id: string
|
|
|
|
content: string
|
2025-02-17 17:05:13 +08:00
|
|
|
sign_content: string
|
2023-05-15 08:51:32 +08:00
|
|
|
word_count: number
|
|
|
|
tokens: number
|
|
|
|
keywords: string[]
|
|
|
|
index_node_id: string
|
|
|
|
index_node_hash: string
|
|
|
|
hit_count: number
|
|
|
|
enabled: boolean
|
|
|
|
disabled_at: number
|
|
|
|
disabled_by: string
|
|
|
|
status: SegmentStatus
|
|
|
|
created_by: string
|
|
|
|
created_at: number
|
|
|
|
indexing_at: number
|
|
|
|
completed_at: number
|
|
|
|
error: string | null
|
|
|
|
stopped_at: number
|
2023-07-28 20:47:15 +08:00
|
|
|
answer?: string
|
2024-12-26 12:01:51 +08:00
|
|
|
child_chunks?: ChildChunkDetail[]
|
|
|
|
updated_at: number
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type SegmentsResponse = {
|
|
|
|
data: SegmentDetailModel[]
|
|
|
|
has_more: boolean
|
|
|
|
limit: number
|
|
|
|
total: number
|
2024-12-26 12:01:51 +08:00
|
|
|
total_pages: number
|
|
|
|
page: number
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type HitTestingRecord = {
|
|
|
|
id: string
|
|
|
|
content: string
|
|
|
|
source: 'app' | 'hit_testing' | 'plugin'
|
|
|
|
source_app_id: string
|
|
|
|
created_by_role: 'account' | 'end_user'
|
|
|
|
created_by: string
|
|
|
|
created_at: number
|
|
|
|
}
|
|
|
|
|
2024-12-26 12:01:51 +08:00
|
|
|
export type HitTestingChildChunk = {
|
|
|
|
id: string
|
|
|
|
content: string
|
|
|
|
position: number
|
|
|
|
score: number
|
|
|
|
}
|
2023-05-15 08:51:32 +08:00
|
|
|
export type HitTesting = {
|
|
|
|
segment: Segment
|
2024-12-26 12:01:51 +08:00
|
|
|
content: Segment
|
2023-05-15 08:51:32 +08:00
|
|
|
score: number
|
|
|
|
tsne_position: TsnePosition
|
2024-12-26 12:01:51 +08:00
|
|
|
child_chunks?: HitTestingChildChunk[] | null
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
2024-09-30 15:38:43 +08:00
|
|
|
export type ExternalKnowledgeBaseHitTesting = {
|
|
|
|
content: string
|
|
|
|
title: string
|
|
|
|
score: number
|
|
|
|
metadata: {
|
|
|
|
'x-amz-bedrock-kb-source-uri': string
|
|
|
|
'x-amz-bedrock-kb-data-source-id': string
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type Segment = {
|
|
|
|
id: string
|
|
|
|
document: Document
|
|
|
|
content: string
|
2025-02-17 17:05:13 +08:00
|
|
|
sign_content: string
|
2023-05-15 08:51:32 +08:00
|
|
|
position: number
|
|
|
|
word_count: number
|
|
|
|
tokens: number
|
|
|
|
keywords: string[]
|
|
|
|
hit_count: number
|
|
|
|
index_node_hash: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export type Document = {
|
|
|
|
id: string
|
|
|
|
data_source_type: string
|
|
|
|
name: string
|
|
|
|
doc_type: DocType
|
|
|
|
}
|
|
|
|
|
|
|
|
export type HitTestingRecordsResponse = {
|
|
|
|
data: HitTestingRecord[]
|
|
|
|
has_more: boolean
|
|
|
|
limit: number
|
|
|
|
total: number
|
|
|
|
page: number
|
|
|
|
}
|
|
|
|
|
|
|
|
export type TsnePosition = {
|
|
|
|
x: number
|
|
|
|
y: number
|
|
|
|
}
|
|
|
|
|
|
|
|
export type HitTestingResponse = {
|
|
|
|
query: {
|
|
|
|
content: string
|
|
|
|
tsne_position: TsnePosition
|
|
|
|
}
|
|
|
|
records: Array<HitTesting>
|
|
|
|
}
|
|
|
|
|
2024-09-30 15:38:43 +08:00
|
|
|
export type ExternalKnowledgeBaseHitTestingResponse = {
|
|
|
|
query: {
|
|
|
|
content: string
|
|
|
|
}
|
|
|
|
records: Array<ExternalKnowledgeBaseHitTesting>
|
|
|
|
}
|
|
|
|
|
2023-05-15 08:51:32 +08:00
|
|
|
export type RelatedApp = {
|
|
|
|
id: string
|
|
|
|
name: string
|
|
|
|
mode: AppMode
|
2024-08-19 09:16:33 +08:00
|
|
|
icon_type: AppIconType | null
|
2023-05-15 08:51:32 +08:00
|
|
|
icon: string
|
|
|
|
icon_background: string
|
2024-08-19 09:16:33 +08:00
|
|
|
icon_url: string
|
2023-05-15 08:51:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type RelatedAppResponse = {
|
|
|
|
data: Array<RelatedApp>
|
|
|
|
total: number
|
|
|
|
}
|
2023-07-28 20:47:15 +08:00
|
|
|
|
2024-09-08 12:14:11 +07:00
|
|
|
export type SegmentUpdater = {
|
2023-07-28 20:47:15 +08:00
|
|
|
content: string
|
|
|
|
answer?: string
|
2023-08-18 17:18:58 +08:00
|
|
|
keywords?: string[]
|
2024-12-26 12:01:51 +08:00
|
|
|
regenerate_child_chunks?: boolean
|
2023-08-18 17:37:31 +08:00
|
|
|
}
|
2024-04-24 15:02:29 +08:00
|
|
|
|
|
|
|
export type ErrorDocsResponse = {
|
|
|
|
data: IndexingStatusResponse[]
|
|
|
|
total: number
|
|
|
|
}
|
2024-07-24 12:50:48 +08:00
|
|
|
|
|
|
|
export type SelectedDatasetsMode = {
|
|
|
|
allHighQuality: boolean
|
|
|
|
allHighQualityVectorSearch: boolean
|
|
|
|
allHighQualityFullTextSearch: boolean
|
|
|
|
allEconomic: boolean
|
|
|
|
mixtureHighQualityAndEconomic: boolean
|
2024-09-30 15:38:43 +08:00
|
|
|
allInternal: boolean
|
|
|
|
allExternal: boolean
|
|
|
|
mixtureInternalAndExternal: boolean
|
2024-07-24 12:50:48 +08:00
|
|
|
inconsistentEmbeddingModel: boolean
|
|
|
|
}
|
|
|
|
|
|
|
|
export enum WeightedScoreEnum {
|
|
|
|
SemanticFirst = 'semantic_first',
|
|
|
|
KeywordFirst = 'keyword_first',
|
|
|
|
Customized = 'customized',
|
|
|
|
}
|
|
|
|
|
|
|
|
export enum RerankingModeEnum {
|
|
|
|
RerankingModel = 'reranking_model',
|
|
|
|
WeightedScore = 'weighted_score',
|
|
|
|
}
|
|
|
|
|
|
|
|
export const DEFAULT_WEIGHTED_SCORE = {
|
|
|
|
allHighQualityVectorSearch: {
|
|
|
|
semantic: 1.0,
|
|
|
|
keyword: 0,
|
|
|
|
},
|
|
|
|
allHighQualityFullTextSearch: {
|
|
|
|
semantic: 0,
|
|
|
|
keyword: 1.0,
|
|
|
|
},
|
|
|
|
other: {
|
|
|
|
semantic: 0.7,
|
|
|
|
keyword: 0.3,
|
|
|
|
},
|
|
|
|
}
|
2024-12-26 12:01:51 +08:00
|
|
|
|
|
|
|
export type ChildChunkType = 'automatic' | 'customized'
|
|
|
|
|
|
|
|
export type ChildChunkDetail = {
|
|
|
|
id: string
|
|
|
|
position: number
|
|
|
|
segment_id: string
|
|
|
|
content: string
|
|
|
|
word_count: number
|
|
|
|
created_at: number
|
|
|
|
updated_at: number
|
|
|
|
type: ChildChunkType
|
|
|
|
}
|
|
|
|
|
|
|
|
export type ChildSegmentsResponse = {
|
|
|
|
data: ChildChunkDetail[]
|
|
|
|
total: number
|
|
|
|
total_pages: number
|
|
|
|
page: number
|
|
|
|
limit: number
|
|
|
|
}
|
|
|
|
|
|
|
|
export type UpdateDocumentParams = {
|
|
|
|
datasetId: string
|
|
|
|
documentId: string
|
|
|
|
}
|
|
|
|
|
|
|
|
// Used in api url
|
|
|
|
export enum DocumentActionType {
|
|
|
|
enable = 'enable',
|
|
|
|
disable = 'disable',
|
|
|
|
archive = 'archive',
|
|
|
|
unArchive = 'un_archive',
|
|
|
|
delete = 'delete',
|
|
|
|
}
|
|
|
|
|
|
|
|
export type UpdateDocumentBatchParams = {
|
|
|
|
datasetId: string
|
|
|
|
documentId?: string
|
|
|
|
documentIds?: string[] | string
|
|
|
|
}
|
|
|
|
|
|
|
|
export type BatchImportResponse = {
|
|
|
|
job_id: string
|
|
|
|
job_status: string
|
|
|
|
}
|
2025-05-07 11:30:13 +08:00
|
|
|
|
2025-05-08 13:48:14 +08:00
|
|
|
export const DOC_FORM_ICON_WITH_BG: Record<ChunkingMode | 'external', React.ComponentType<{ className: string }>> = {
|
2025-05-07 11:30:13 +08:00
|
|
|
[ChunkingMode.text]: General,
|
|
|
|
[ChunkingMode.qa]: Qa,
|
|
|
|
[ChunkingMode.parentChild]: ParentChild,
|
2025-06-11 16:38:42 +08:00
|
|
|
// [ChunkingMode.graph]: Graph, // todo: Graph RAG
|
2025-05-07 11:30:13 +08:00
|
|
|
external: ExternalKnowledgeBase,
|
|
|
|
}
|
|
|
|
|
2025-05-08 13:48:14 +08:00
|
|
|
export const DOC_FORM_ICON: Record<ChunkingMode.text | ChunkingMode.qa | ChunkingMode.parentChild, React.ComponentType<{ className: string }>> = {
|
|
|
|
[ChunkingMode.text]: GeneralChunk,
|
|
|
|
[ChunkingMode.qa]: QuestionAndAnswer,
|
|
|
|
[ChunkingMode.parentChild]: ParentChildChunk,
|
|
|
|
}
|
|
|
|
|
2025-05-07 11:30:13 +08:00
|
|
|
export const DOC_FORM_TEXT: Record<ChunkingMode, string> = {
|
|
|
|
[ChunkingMode.text]: 'general',
|
|
|
|
[ChunkingMode.qa]: 'qa',
|
|
|
|
[ChunkingMode.parentChild]: 'parentChild',
|
2025-06-11 16:38:42 +08:00
|
|
|
// [ChunkingMode.graph]: 'graph', // todo: Graph RAG
|
2025-05-07 11:30:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type CreateDatasetReq = {
|
|
|
|
name: string
|
|
|
|
description: string
|
|
|
|
icon_info: IconInfo
|
|
|
|
doc_form?: ChunkingMode
|
|
|
|
permission: DatasetPermission
|
|
|
|
partial_member_list?: {
|
|
|
|
user_id: string
|
|
|
|
role?: 'owner' | 'admin' | 'editor' | 'normal' | 'dataset_operator'
|
|
|
|
}[]
|
2025-05-14 15:53:17 +08:00
|
|
|
yaml_content?: string
|
2025-05-07 11:30:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
export type CreateDatasetResponse = {
|
|
|
|
id: string
|
|
|
|
name: string
|
|
|
|
description: string
|
|
|
|
permission: DatasetPermission
|
|
|
|
indexing_technique: IndexingType
|
|
|
|
created_by: string
|
|
|
|
created_at: number
|
|
|
|
updated_by: string
|
|
|
|
updated_at: number
|
2025-05-16 15:14:50 +08:00
|
|
|
pipeline_id: string
|
2025-06-24 11:12:11 +08:00
|
|
|
dataset_id: string
|
2025-05-07 11:30:13 +08:00
|
|
|
}
|
2025-06-17 11:29:56 +08:00
|
|
|
|
|
|
|
export type IndexingStatusBatchRequest = {
|
|
|
|
datasetId: string
|
|
|
|
batchId: string
|
|
|
|
}
|