mirror of
https://github.com/langgenius/dify.git
synced 2025-10-24 15:38:59 +00:00

Support filter knowledge by metadata. Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: NFish <douxc512@gmail.com>
396 lines
15 KiB
TypeScript
396 lines
15 KiB
TypeScript
'use client'
|
|
import { useTranslation } from 'react-i18next'
|
|
import { formatFileSize, formatNumber, formatTime } from '@/utils/format'
|
|
import { type DocType, ProcessMode } from '@/models/datasets'
|
|
import useTimestamp from '@/hooks/use-timestamp'
|
|
|
|
export type inputType = 'input' | 'select' | 'textarea'
|
|
export type metadataType = DocType | 'originInfo' | 'technicalParameters'
|
|
|
|
type MetadataMap =
|
|
Record<
|
|
metadataType,
|
|
{
|
|
text: string
|
|
allowEdit?: boolean
|
|
icon?: React.ReactNode
|
|
iconName?: string
|
|
subFieldsMap: Record<
|
|
string,
|
|
{
|
|
label: string
|
|
inputType?: inputType
|
|
field?: string
|
|
render?: (value: any, total?: number) => React.ReactNode | string
|
|
}
|
|
>
|
|
}
|
|
>
|
|
|
|
const fieldPrefix = 'datasetDocuments.metadata.field'
|
|
|
|
export const useMetadataMap = (): MetadataMap => {
|
|
const { t } = useTranslation()
|
|
const { formatTime: formatTimestamp } = useTimestamp()
|
|
|
|
return {
|
|
book: {
|
|
text: t('datasetDocuments.metadata.type.book'),
|
|
iconName: 'bookOpen',
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.book.title`) },
|
|
language: {
|
|
label: t(`${fieldPrefix}.book.language`),
|
|
inputType: 'select',
|
|
},
|
|
author: { label: t(`${fieldPrefix}.book.author`) },
|
|
publisher: { label: t(`${fieldPrefix}.book.publisher`) },
|
|
publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) },
|
|
isbn: { label: t(`${fieldPrefix}.book.ISBN`) },
|
|
category: {
|
|
label: t(`${fieldPrefix}.book.category`),
|
|
inputType: 'select',
|
|
},
|
|
},
|
|
},
|
|
web_page: {
|
|
text: t('datasetDocuments.metadata.type.webPage'),
|
|
iconName: 'globe',
|
|
subFieldsMap: {
|
|
'title': { label: t(`${fieldPrefix}.webPage.title`) },
|
|
'url': { label: t(`${fieldPrefix}.webPage.url`) },
|
|
'language': {
|
|
label: t(`${fieldPrefix}.webPage.language`),
|
|
inputType: 'select',
|
|
},
|
|
'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) },
|
|
'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) },
|
|
'topic/keywords': { label: t(`${fieldPrefix}.webPage.topicKeywords`) },
|
|
'description': { label: t(`${fieldPrefix}.webPage.description`) },
|
|
},
|
|
},
|
|
paper: {
|
|
text: t('datasetDocuments.metadata.type.paper'),
|
|
iconName: 'graduationHat',
|
|
subFieldsMap: {
|
|
'title': { label: t(`${fieldPrefix}.paper.title`) },
|
|
'language': {
|
|
label: t(`${fieldPrefix}.paper.language`),
|
|
inputType: 'select',
|
|
},
|
|
'author': { label: t(`${fieldPrefix}.paper.author`) },
|
|
'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) },
|
|
'journal/conference_name': {
|
|
label: t(`${fieldPrefix}.paper.journalConferenceName`),
|
|
},
|
|
'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) },
|
|
'doi': { label: t(`${fieldPrefix}.paper.DOI`) },
|
|
'topic/keywords': { label: t(`${fieldPrefix}.paper.topicKeywords`) },
|
|
'abstract': {
|
|
label: t(`${fieldPrefix}.paper.abstract`),
|
|
inputType: 'textarea',
|
|
},
|
|
},
|
|
},
|
|
social_media_post: {
|
|
text: t('datasetDocuments.metadata.type.socialMediaPost'),
|
|
iconName: 'atSign',
|
|
subFieldsMap: {
|
|
'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) },
|
|
'author/username': {
|
|
label: t(`${fieldPrefix}.socialMediaPost.authorUsername`),
|
|
},
|
|
'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) },
|
|
'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) },
|
|
'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) },
|
|
},
|
|
},
|
|
personal_document: {
|
|
text: t('datasetDocuments.metadata.type.personalDocument'),
|
|
iconName: 'file',
|
|
subFieldsMap: {
|
|
'title': { label: t(`${fieldPrefix}.personalDocument.title`) },
|
|
'author': { label: t(`${fieldPrefix}.personalDocument.author`) },
|
|
'creation_date': {
|
|
label: t(`${fieldPrefix}.personalDocument.creationDate`),
|
|
},
|
|
'last_modified_date': {
|
|
label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`),
|
|
},
|
|
'document_type': {
|
|
label: t(`${fieldPrefix}.personalDocument.documentType`),
|
|
inputType: 'select',
|
|
},
|
|
'tags/category': {
|
|
label: t(`${fieldPrefix}.personalDocument.tagsCategory`),
|
|
},
|
|
},
|
|
},
|
|
business_document: {
|
|
text: t('datasetDocuments.metadata.type.businessDocument'),
|
|
iconName: 'briefcase',
|
|
subFieldsMap: {
|
|
'title': { label: t(`${fieldPrefix}.businessDocument.title`) },
|
|
'author': { label: t(`${fieldPrefix}.businessDocument.author`) },
|
|
'creation_date': {
|
|
label: t(`${fieldPrefix}.businessDocument.creationDate`),
|
|
},
|
|
'last_modified_date': {
|
|
label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`),
|
|
},
|
|
'document_type': {
|
|
label: t(`${fieldPrefix}.businessDocument.documentType`),
|
|
inputType: 'select',
|
|
},
|
|
'department/team': {
|
|
label: t(`${fieldPrefix}.businessDocument.departmentTeam`),
|
|
},
|
|
},
|
|
},
|
|
im_chat_log: {
|
|
text: t('datasetDocuments.metadata.type.IMChat'),
|
|
iconName: 'messageTextCircle',
|
|
subFieldsMap: {
|
|
'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) },
|
|
'chat_participants/group_name': {
|
|
label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`),
|
|
},
|
|
'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) },
|
|
'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) },
|
|
'participants': { label: t(`${fieldPrefix}.IMChat.participants`) },
|
|
'topicKeywords': {
|
|
label: t(`${fieldPrefix}.IMChat.topicKeywords`),
|
|
inputType: 'textarea',
|
|
},
|
|
'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) },
|
|
},
|
|
},
|
|
wikipedia_entry: {
|
|
text: t('datasetDocuments.metadata.type.wikipediaEntry'),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) },
|
|
'language': {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.language`),
|
|
inputType: 'select',
|
|
},
|
|
'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) },
|
|
'editor/contributor': {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`),
|
|
},
|
|
'last_edit_date': {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`),
|
|
},
|
|
'summary/introduction': {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`),
|
|
inputType: 'textarea',
|
|
},
|
|
},
|
|
},
|
|
synced_from_notion: {
|
|
text: t('datasetDocuments.metadata.type.notion'),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
'title': { label: t(`${fieldPrefix}.notion.title`) },
|
|
'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' },
|
|
'author/creator': { label: t(`${fieldPrefix}.notion.author`) },
|
|
'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) },
|
|
'last_modified_date': {
|
|
label: t(`${fieldPrefix}.notion.lastModifiedTime`),
|
|
},
|
|
'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) },
|
|
'category/tags': { label: t(`${fieldPrefix}.notion.tag`) },
|
|
'description': { label: t(`${fieldPrefix}.notion.desc`) },
|
|
},
|
|
},
|
|
synced_from_github: {
|
|
text: t('datasetDocuments.metadata.type.github'),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
'repository_name': { label: t(`${fieldPrefix}.github.repoName`) },
|
|
'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) },
|
|
'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) },
|
|
'code_filename': { label: t(`${fieldPrefix}.github.fileName`) },
|
|
'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) },
|
|
'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) },
|
|
'github_link': { label: t(`${fieldPrefix}.github.url`) },
|
|
'open_source_license': { label: t(`${fieldPrefix}.github.license`) },
|
|
'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) },
|
|
'commit_author': {
|
|
label: t(`${fieldPrefix}.github.lastCommitAuthor`),
|
|
},
|
|
},
|
|
},
|
|
originInfo: {
|
|
text: '',
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) },
|
|
'data_source_info.upload_file.size': {
|
|
label: t(`${fieldPrefix}.originInfo.originalFileSize`),
|
|
render: value => formatFileSize(value),
|
|
},
|
|
'created_at': {
|
|
label: t(`${fieldPrefix}.originInfo.uploadDate`),
|
|
render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string),
|
|
},
|
|
'completed_at': {
|
|
label: t(`${fieldPrefix}.originInfo.lastUpdateDate`),
|
|
render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string),
|
|
},
|
|
'data_source_type': {
|
|
label: t(`${fieldPrefix}.originInfo.source`),
|
|
render: value => t(`datasetDocuments.metadata.source.${value === 'notion_import' ? 'notion' : value}`),
|
|
},
|
|
},
|
|
},
|
|
technicalParameters: {
|
|
text: t('datasetDocuments.metadata.type.technicalParameters'),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
'dataset_process_rule.mode': {
|
|
label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),
|
|
render: value => value === ProcessMode.general ? (t('datasetDocuments.embedding.custom') as string) : (t('datasetDocuments.embedding.hierarchical') as string),
|
|
},
|
|
'dataset_process_rule.rules.segmentation.max_tokens': {
|
|
label: t(`${fieldPrefix}.technicalParameters.segmentLength`),
|
|
render: value => formatNumber(value),
|
|
},
|
|
'average_segment_length': {
|
|
label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`),
|
|
render: value => `${formatNumber(value)} characters`,
|
|
},
|
|
'segment_count': {
|
|
label: t(`${fieldPrefix}.technicalParameters.paragraphs`),
|
|
render: value => `${formatNumber(value)} paragraphs`,
|
|
},
|
|
'hit_count': {
|
|
label: t(`${fieldPrefix}.technicalParameters.hitCount`),
|
|
render: (value, total) => {
|
|
const v = value || 0
|
|
return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})`
|
|
},
|
|
},
|
|
'indexing_latency': {
|
|
label: t(`${fieldPrefix}.technicalParameters.embeddingTime`),
|
|
render: value => formatTime(value),
|
|
},
|
|
'tokens': {
|
|
label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`),
|
|
render: value => `${formatNumber(value)} tokens`,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
const langPrefix = 'datasetDocuments.metadata.languageMap.'
|
|
|
|
export const useLanguages = () => {
|
|
const { t } = useTranslation()
|
|
return {
|
|
zh: t(`${langPrefix}zh`),
|
|
en: t(`${langPrefix}en`),
|
|
es: t(`${langPrefix}es`),
|
|
fr: t(`${langPrefix}fr`),
|
|
de: t(`${langPrefix}de`),
|
|
ja: t(`${langPrefix}ja`),
|
|
ko: t(`${langPrefix}ko`),
|
|
ru: t(`${langPrefix}ru`),
|
|
ar: t(`${langPrefix}ar`),
|
|
pt: t(`${langPrefix}pt`),
|
|
it: t(`${langPrefix}it`),
|
|
nl: t(`${langPrefix}nl`),
|
|
pl: t(`${langPrefix}pl`),
|
|
sv: t(`${langPrefix}sv`),
|
|
tr: t(`${langPrefix}tr`),
|
|
he: t(`${langPrefix}he`),
|
|
hi: t(`${langPrefix}hi`),
|
|
da: t(`${langPrefix}da`),
|
|
fi: t(`${langPrefix}fi`),
|
|
no: t(`${langPrefix}no`),
|
|
hu: t(`${langPrefix}hu`),
|
|
el: t(`${langPrefix}el`),
|
|
cs: t(`${langPrefix}cs`),
|
|
th: t(`${langPrefix}th`),
|
|
id: t(`${langPrefix}id`),
|
|
ro: t(`${langPrefix}ro`),
|
|
}
|
|
}
|
|
|
|
const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.'
|
|
|
|
export const useBookCategories = () => {
|
|
const { t } = useTranslation()
|
|
return {
|
|
fiction: t(`${bookCategoryPrefix}fiction`),
|
|
biography: t(`${bookCategoryPrefix}biography`),
|
|
history: t(`${bookCategoryPrefix}history`),
|
|
science: t(`${bookCategoryPrefix}science`),
|
|
technology: t(`${bookCategoryPrefix}technology`),
|
|
education: t(`${bookCategoryPrefix}education`),
|
|
philosophy: t(`${bookCategoryPrefix}philosophy`),
|
|
religion: t(`${bookCategoryPrefix}religion`),
|
|
socialSciences: t(`${bookCategoryPrefix}socialSciences`),
|
|
art: t(`${bookCategoryPrefix}art`),
|
|
travel: t(`${bookCategoryPrefix}travel`),
|
|
health: t(`${bookCategoryPrefix}health`),
|
|
selfHelp: t(`${bookCategoryPrefix}selfHelp`),
|
|
businessEconomics: t(`${bookCategoryPrefix}businessEconomics`),
|
|
cooking: t(`${bookCategoryPrefix}cooking`),
|
|
childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`),
|
|
comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`),
|
|
poetry: t(`${bookCategoryPrefix}poetry`),
|
|
drama: t(`${bookCategoryPrefix}drama`),
|
|
other: t(`${bookCategoryPrefix}other`),
|
|
}
|
|
}
|
|
|
|
const personalDocCategoryPrefix
|
|
= 'datasetDocuments.metadata.categoryMap.personalDoc.'
|
|
|
|
export const usePersonalDocCategories = () => {
|
|
const { t } = useTranslation()
|
|
return {
|
|
notes: t(`${personalDocCategoryPrefix}notes`),
|
|
blogDraft: t(`${personalDocCategoryPrefix}blogDraft`),
|
|
diary: t(`${personalDocCategoryPrefix}diary`),
|
|
researchReport: t(`${personalDocCategoryPrefix}researchReport`),
|
|
bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`),
|
|
schedule: t(`${personalDocCategoryPrefix}schedule`),
|
|
list: t(`${personalDocCategoryPrefix}list`),
|
|
projectOverview: t(`${personalDocCategoryPrefix}projectOverview`),
|
|
photoCollection: t(`${personalDocCategoryPrefix}photoCollection`),
|
|
creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`),
|
|
codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`),
|
|
designDraft: t(`${personalDocCategoryPrefix}designDraft`),
|
|
personalResume: t(`${personalDocCategoryPrefix}personalResume`),
|
|
other: t(`${personalDocCategoryPrefix}other`),
|
|
}
|
|
}
|
|
|
|
const businessDocCategoryPrefix
|
|
= 'datasetDocuments.metadata.categoryMap.businessDoc.'
|
|
|
|
export const useBusinessDocCategories = () => {
|
|
const { t } = useTranslation()
|
|
return {
|
|
meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`),
|
|
researchReport: t(`${businessDocCategoryPrefix}researchReport`),
|
|
proposal: t(`${businessDocCategoryPrefix}proposal`),
|
|
employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`),
|
|
trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`),
|
|
requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`),
|
|
designDocument: t(`${businessDocCategoryPrefix}designDocument`),
|
|
productSpecification: t(`${businessDocCategoryPrefix}productSpecification`),
|
|
financialReport: t(`${businessDocCategoryPrefix}financialReport`),
|
|
marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`),
|
|
projectPlan: t(`${businessDocCategoryPrefix}projectPlan`),
|
|
teamStructure: t(`${businessDocCategoryPrefix}teamStructure`),
|
|
policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`),
|
|
contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`),
|
|
emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`),
|
|
other: t(`${businessDocCategoryPrefix}other`),
|
|
}
|
|
}
|