dify/web/app/components/base/markdown/markdown-utils.ts

88 lines
3.2 KiB
TypeScript
Raw Permalink Normal View History

/**
* @fileoverview Utility functions for preprocessing Markdown content.
* These functions were extracted from the main markdown renderer for better separation of concerns.
* Includes preprocessing for LaTeX and custom "think" tags.
*/
import { flow } from 'lodash-es'
export const preprocessLaTeX = (content: string) => {
if (typeof content !== 'string')
return content
const codeBlockRegex = /```[\s\S]*?```/g
const codeBlocks = content.match(codeBlockRegex) || []
let processedContent = content.replace(codeBlockRegex, 'CODE_BLOCK_PLACEHOLDER')
processedContent = flow([
(str: string) => str.replace(/\\\[(.*?)\\\]/g, (_, equation) => `$$${equation}$$`),
(str: string) => str.replace(/\\\[([\s\S]*?)\\\]/g, (_, equation) => `$$${equation}$$`),
(str: string) => str.replace(/\\\((.*?)\\\)/g, (_, equation) => `$$${equation}$$`),
(str: string) => str.replace(/(^|[^\\])\$(.+?)\$/g, (_, prefix, equation) => `${prefix}$${equation}$`),
])(processedContent)
codeBlocks.forEach((block) => {
processedContent = processedContent.replace('CODE_BLOCK_PLACEHOLDER', block)
})
return processedContent
}
export const preprocessThinkTag = (content: string) => {
const thinkOpenTagRegex = /(<think>\n)+/g
const thinkCloseTagRegex = /\n<\/think>/g
return flow([
(str: string) => str.replace(thinkOpenTagRegex, '<details data-think=true>\n'),
(str: string) => str.replace(thinkCloseTagRegex, '\n[ENDTHINKFLAG]</details>'),
(str: string) => str.replace(/(<\/details>)(?![^\S\r\n]*[\r\n])(?![^\S\r\n]*$)/g, '$1\n'),
])(content)
}
/**
* Transforms a URI for use in react-markdown, ensuring security and compatibility.
* This function is designed to work with react-markdown v9+ which has stricter
* default URL handling.
*
* Behavior:
* 1. Always allows the custom 'abbr:' protocol.
* 2. Always allows page-local fragments (e.g., "#some-id").
* 3. Always allows protocol-relative URLs (e.g., "//example.com/path").
* 4. Always allows purely relative paths (e.g., "path/to/file", "/abs/path").
* 5. Allows absolute URLs if their scheme is in a permitted list (case-insensitive):
* 'http:', 'https:', 'mailto:', 'xmpp:', 'irc:', 'ircs:'.
* 6. Intelligently distinguishes colons used for schemes from colons within
* paths, query parameters, or fragments of relative-like URLs.
* 7. Returns the original URI if allowed, otherwise returns `undefined` to
* signal that the URI should be removed/disallowed by react-markdown.
*/
export const customUrlTransform = (uri: string): string | undefined => {
const PERMITTED_SCHEME_REGEX = /^(https?|ircs?|mailto|xmpp|abbr):$/i
if (uri.startsWith('#'))
return uri
if (uri.startsWith('//'))
return uri
const colonIndex = uri.indexOf(':')
if (colonIndex === -1)
return uri
const slashIndex = uri.indexOf('/')
const questionMarkIndex = uri.indexOf('?')
const hashIndex = uri.indexOf('#')
if (
(slashIndex !== -1 && colonIndex > slashIndex)
|| (questionMarkIndex !== -1 && colonIndex > questionMarkIndex)
|| (hashIndex !== -1 && colonIndex > hashIndex)
)
return uri
const scheme = uri.substring(0, colonIndex + 1).toLowerCase()
if (PERMITTED_SCHEME_REGEX.test(scheme))
return uri
return undefined
}