/** * @fileoverview Utility functions for preprocessing Markdown content. * These functions were extracted from the main markdown renderer for better separation of concerns. * Includes preprocessing for LaTeX and custom "think" tags. */ import { flow } from 'lodash-es' export const preprocessLaTeX = (content: string) => { if (typeof content !== 'string') return content const codeBlockRegex = /```[\s\S]*?```/g const codeBlocks = content.match(codeBlockRegex) || [] let processedContent = content.replace(codeBlockRegex, 'CODE_BLOCK_PLACEHOLDER') processedContent = flow([ (str: string) => str.replace(/\\\[(.*?)\\\]/g, (_, equation) => `$$${equation}$$`), (str: string) => str.replace(/\\\[([\s\S]*?)\\\]/g, (_, equation) => `$$${equation}$$`), (str: string) => str.replace(/\\\((.*?)\\\)/g, (_, equation) => `$$${equation}$$`), (str: string) => str.replace(/(^|[^\\])\$(.+?)\$/g, (_, prefix, equation) => `${prefix}$${equation}$`), ])(processedContent) codeBlocks.forEach((block) => { processedContent = processedContent.replace('CODE_BLOCK_PLACEHOLDER', block) }) return processedContent } export const preprocessThinkTag = (content: string) => { const thinkOpenTagRegex = /(\n)+/g const thinkCloseTagRegex = /\n<\/think>/g return flow([ (str: string) => str.replace(thinkOpenTagRegex, '
\n'), (str: string) => str.replace(thinkCloseTagRegex, '\n[ENDTHINKFLAG]
'), (str: string) => str.replace(/(<\/details>)(?![^\S\r\n]*[\r\n])(?![^\S\r\n]*$)/g, '$1\n'), ])(content) } /** * Transforms a URI for use in react-markdown, ensuring security and compatibility. * This function is designed to work with react-markdown v9+ which has stricter * default URL handling. * * Behavior: * 1. Always allows the custom 'abbr:' protocol. * 2. Always allows page-local fragments (e.g., "#some-id"). * 3. Always allows protocol-relative URLs (e.g., "//example.com/path"). * 4. Always allows purely relative paths (e.g., "path/to/file", "/abs/path"). * 5. Allows absolute URLs if their scheme is in a permitted list (case-insensitive): * 'http:', 'https:', 'mailto:', 'xmpp:', 'irc:', 'ircs:'. * 6. Intelligently distinguishes colons used for schemes from colons within * paths, query parameters, or fragments of relative-like URLs. * 7. Returns the original URI if allowed, otherwise returns `undefined` to * signal that the URI should be removed/disallowed by react-markdown. */ export const customUrlTransform = (uri: string): string | undefined => { const PERMITTED_SCHEME_REGEX = /^(https?|ircs?|mailto|xmpp|abbr):$/i if (uri.startsWith('#')) return uri if (uri.startsWith('//')) return uri const colonIndex = uri.indexOf(':') if (colonIndex === -1) return uri const slashIndex = uri.indexOf('/') const questionMarkIndex = uri.indexOf('?') const hashIndex = uri.indexOf('#') if ( (slashIndex !== -1 && colonIndex > slashIndex) || (questionMarkIndex !== -1 && colonIndex > questionMarkIndex) || (hashIndex !== -1 && colonIndex > hashIndex) ) return uri const scheme = uri.substring(0, colonIndex + 1).toLowerCase() if (PERMITTED_SCHEME_REGEX.test(scheme)) return uri return undefined }