| #!/usr/bin/env node |
|
|
| import { readFileSync, writeFileSync, existsSync, readdirSync } from 'fs'; |
| import { join, dirname } from 'path'; |
| import { fileURLToPath } from 'url'; |
|
|
| const __filename = fileURLToPath(import.meta.url); |
| const __dirname = dirname(__filename); |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| function removeTexGroupingCommands(content) { |
| console.log(' π§Ή Removing TeX grouping commands...'); |
|
|
| return content |
| .replace(/\\mathopen\{\}\\mathclose\\bgroup/g, '') |
| .replace(/\\aftergroup\\egroup/g, '') |
| .replace(/\\bgroup/g, '') |
| .replace(/\\egroup/g, ''); |
| } |
|
|
| |
| |
| |
| |
| |
| function simplifyLatexDelimiters(content) { |
| console.log(' π§ Simplifying LaTeX delimiters...'); |
|
|
| return content |
| .replace(/\\left\[\s*/g, '[') |
| .replace(/\s*\\right\]/g, ']'); |
| } |
|
|
| |
| |
| |
| |
| |
| function removeOrphanedLabels(content) { |
| console.log(' π·οΈ Removing orphaned labels...'); |
|
|
| return content |
| .replace(/^\s*\\label\{[^}]+\}\s*$/gm, '') |
| .replace(/\\label\{[^}]+\}/g, ''); |
| } |
|
|
| |
| |
| |
| |
| |
| function fixMathCommands(content) { |
| console.log(' π Fixing KaTeX-incompatible math commands...'); |
|
|
| return content |
| |
| .replace(/\\hdots/g, '\\ldots') |
| |
| .replace(/\\vdots/g, '\\vdots'); |
| } |
|
|
| |
| |
| |
| |
| |
| function fixMatrixCommands(content) { |
| console.log(' π’ Converting matrix commands to KaTeX format...'); |
|
|
| let fixedCount = 0; |
|
|
| |
| content = content.replace(/\\pmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => { |
| fixedCount++; |
| |
| const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row); |
| return `\\begin{pmatrix}\n${rows.join(' \\\\\n')}\n\\end{pmatrix}`; |
| }); |
|
|
| |
| content = content.replace(/\\bmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => { |
| fixedCount++; |
| const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row); |
| return `\\begin{bmatrix}\n${rows.join(' \\\\\n')}\n\\end{bmatrix}`; |
| }); |
|
|
| |
| content = content.replace(/\\vmatrix\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}/g, (match, matrixContent) => { |
| fixedCount++; |
| const rows = matrixContent.split('\\\\').map(row => row.trim()).filter(row => row); |
| return `\\begin{vmatrix}\n${rows.join(' \\\\\n')}\n\\end{vmatrix}`; |
| }); |
|
|
| if (fixedCount > 0) { |
| console.log(` β
Fixed ${fixedCount} matrix command(s)`); |
| } |
|
|
| return content; |
| } |
|
|
| |
| |
| |
| |
| |
| function fixUnicodeIssues(content) { |
| console.log(' π Fixing Unicode characters for MDX compatibility...'); |
|
|
| return content |
| |
| .replace(/\$([^$]*?)Β·([^$]*?)\$/g, (match, before, after) => { |
| return `$${before}\\cdot${after}$`; |
| }) |
| |
| .replace(/\$\$([^$]*?)Β·([^$]*?)\$\$/g, (match, before, after) => { |
| return `$$${before}\\cdot${after}$$`; |
| }) |
| |
| .replace(/[""]/g, '"') |
| .replace(/['']/g, "'") |
| .replace(/β¦/g, '...') |
| .replace(/β/g, '-') |
| .replace(/β/g, '--'); |
| } |
|
|
| |
| |
| |
| |
| |
| function fixMultilineMath(content) { |
| console.log(' π Fixing multiline math expressions for MDX...'); |
|
|
| return content |
| |
| |
| .replace(/\$([^$\n]*\\\\[^$\n]*)\$/g, (match, mathContent) => { |
| |
| if (mathContent.includes('\\\\') && /[=+\-*/^_{}]/.test(mathContent)) { |
| |
| const cleanedMath = mathContent |
| .replace(/^\s+|\s+$/g, '') |
| .replace(/\s*\\\\\s*/g, '\\\\\n '); |
| return `$$\n${cleanedMath}\n$$`; |
| } |
| return match; |
| }) |
| |
| .replace(/\$\$\s*\n\s*([^$]+?)\s*\n\s*\$\$/g, (match, mathContent) => { |
| return `\n$$\n${mathContent.trim()}\n$$\n`; |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| function injectCodeSnippets(content, inputDir = null) { |
| console.log(' π» Injecting code snippets...'); |
|
|
| if (!inputDir) { |
| console.log(' β οΈ No input directory provided, skipping code injection'); |
| return content; |
| } |
|
|
| const snippetsDir = join(inputDir, 'snippets'); |
|
|
| if (!existsSync(snippetsDir)) { |
| console.log(' β οΈ Snippets directory not found, skipping code injection'); |
| return content; |
| } |
|
|
| |
| let availableSnippets = []; |
| try { |
| availableSnippets = readdirSync(snippetsDir); |
| console.log(` π Found ${availableSnippets.length} snippet file(s): ${availableSnippets.join(', ')}`); |
| } catch (error) { |
| console.log(` β Error reading snippets directory: ${error.message}`); |
| return content; |
| } |
|
|
| |
| const emptyCodeBlockPattern = /```\s*(\w+)\s*\n\s*```/g; |
|
|
| let processedContent = content; |
| let injectionCount = 0; |
|
|
| processedContent = processedContent.replace(emptyCodeBlockPattern, (match, language) => { |
| |
| const extensionMap = { |
| 'python': 'py', |
| 'javascript': 'js', |
| 'typescript': 'ts', |
| 'bash': 'sh', |
| 'shell': 'sh' |
| }; |
|
|
| const fileExtension = extensionMap[language] || language; |
|
|
| |
| const matchingFiles = availableSnippets.filter(file => |
| file.endsWith(`.${fileExtension}`) |
| ); |
|
|
| if (matchingFiles.length === 0) { |
| console.log(` β οΈ No ${language} snippet found (looking for .${fileExtension})`); |
| return match; |
| } |
|
|
| |
| const selectedFile = matchingFiles[0]; |
| const snippetPath = join(snippetsDir, selectedFile); |
|
|
| try { |
| const snippetContent = readFileSync(snippetPath, 'utf8'); |
| injectionCount++; |
| console.log(` β
Injected: ${selectedFile}`); |
| return `\`\`\`${language}\n${snippetContent.trim()}\n\`\`\``; |
| } catch (error) { |
| console.log(` β Error reading ${selectedFile}: ${error.message}`); |
| return match; |
| } |
| }); |
|
|
| if (injectionCount > 0) { |
| console.log(` π Injected ${injectionCount} code snippet(s)`); |
| } |
|
|
| return processedContent; |
| } |
|
|
| |
| |
| |
| |
| |
| function fixAllAttributes(content) { |
| console.log(' π Fixing all attributes with colons...'); |
|
|
| let fixedCount = 0; |
|
|
| |
| content = content.replace(/href="([^"]*):([^"]*)"/g, (match, before, after) => { |
| fixedCount++; |
| return `href="${before}-${after}"`; |
| }); |
|
|
| |
| content = content.replace(/data-reference="([^"]*):([^"]*)"/g, (match, before, after) => { |
| fixedCount++; |
| return `data-reference="${before}-${after}"`; |
| }); |
|
|
| |
| content = content.replace(/id="([^"]*):([^"]*)"/g, (match, before, after) => { |
| fixedCount++; |
| return `id="${before}-${after}"`; |
| }); |
|
|
| if (fixedCount > 0) { |
| console.log(` β
Fixed ${fixedCount} attribute(s) with colons`); |
| } |
|
|
| return content; |
| } |
|
|
| |
| |
| |
| |
| |
| function fixLinkTextContent(content) { |
| console.log(' π Fixing link text content with colons...'); |
|
|
| let fixedCount = 0; |
|
|
| |
| |
| const cleanedContent = content.replace(/<a([^>]*)>\[([^:]*):([^\]]*)\]<\/a>/g, (match, attributes, before, after) => { |
| fixedCount++; |
| return `<a${attributes}>[${before}-${after}]</a>`; |
| }); |
|
|
| if (fixedCount > 0) { |
| console.log(` β
Fixed ${fixedCount} link text(s) with colons`); |
| } |
|
|
| return cleanedContent; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| function fixAutolinkUrls(content) { |
| console.log(' π Fixing autolink URLs with angle brackets...'); |
|
|
| let fixedCount = 0; |
|
|
| |
| const cleanedContent = content.replace(/<(https?:\/\/[^>]+)>/g, (match, url) => { |
| fixedCount++; |
| return `[${url}](${url})`; |
| }); |
|
|
| if (fixedCount > 0) { |
| console.log(` β
Fixed ${fixedCount} autolink URL(s)`); |
| } |
|
|
| return cleanedContent; |
| } |
|
|
| |
| |
| |
| |
| |
| function convertAlignAnchors(content) { |
| console.log(' π·οΈ Converting align anchor markers to HTML spans...'); |
|
|
| let convertedCount = 0; |
|
|
| |
| content = content.replace(/``` math\n%%ALIGN_ANCHOR_ID\{([^}]+)\}%%\n([\s\S]*?)\n```/g, (match, anchorId, mathContent) => { |
| convertedCount++; |
| return `<span id="${anchorId}" style="position: absolute;"></span>\n\n\`\`\` math\n${mathContent}\n\`\`\``; |
| }); |
|
|
| if (convertedCount > 0) { |
| console.log(` β
Converted ${convertedCount} align anchor marker(s) to spans`); |
| } |
|
|
| return content; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| export function postProcessMarkdown(content, inputDir = null) { |
| console.log('π§ Post-processing for KaTeX compatibility...'); |
|
|
| let processedContent = content; |
|
|
| |
| processedContent = removeTexGroupingCommands(processedContent); |
| processedContent = simplifyLatexDelimiters(processedContent); |
| processedContent = removeOrphanedLabels(processedContent); |
| processedContent = convertAlignAnchors(processedContent); |
| processedContent = fixMathCommands(processedContent); |
| processedContent = fixMatrixCommands(processedContent); |
| processedContent = fixUnicodeIssues(processedContent); |
| processedContent = fixMultilineMath(processedContent); |
| processedContent = fixAllAttributes(processedContent); |
| processedContent = fixLinkTextContent(processedContent); |
| processedContent = fixAutolinkUrls(processedContent); |
|
|
| |
| if (inputDir) { |
| processedContent = injectCodeSnippets(processedContent, inputDir); |
| } |
|
|
| return processedContent; |
| } |
|
|
| |
| |
| |
| function parseArgs() { |
| const args = process.argv.slice(2); |
| const config = { |
| input: join(__dirname, 'output', 'main.md'), |
| output: null, |
| verbose: false, |
| }; |
|
|
| for (const arg of args) { |
| if (arg.startsWith('--input=')) { |
| config.input = arg.substring('--input='.length); |
| } else if (arg.startsWith('--output=')) { |
| config.output = arg.substring('--output='.length); |
| } else if (arg === '--verbose') { |
| config.verbose = true; |
| } else if (arg === '--help' || arg === '-h') { |
| console.log(` |
| π§ Markdown Post-Processor |
| |
| Usage: |
| node post-processor.mjs [options] |
| |
| Options: |
| --input=PATH Input Markdown file (default: output/main.md) |
| --output=PATH Output file (default: overwrites input) |
| --verbose Verbose output |
| --help, -h Show this help |
| |
| Examples: |
| # Process main.md in-place |
| node post-processor.mjs |
| |
| # Process with custom paths |
| node post-processor.mjs --input=raw.md --output=clean.md |
| `); |
| process.exit(0); |
| } |
| } |
|
|
| |
| if (!config.output) { |
| config.output = config.input; |
| } |
|
|
| return config; |
| } |
|
|
| function main() { |
| const config = parseArgs(); |
|
|
| console.log('π§ Markdown Post-Processor'); |
| console.log(`π Input: ${config.input}`); |
| console.log(`π Output: ${config.output}`); |
|
|
| try { |
| const content = readFileSync(config.input, 'utf8'); |
| const processedContent = postProcessMarkdown(content); |
|
|
| writeFileSync(config.output, processedContent); |
|
|
| console.log(`β
Post-processing completed: ${config.output}`); |
|
|
| |
| if (config.verbose) { |
| const originalLines = content.split('\n').length; |
| const processedLines = processedContent.split('\n').length; |
| console.log(`π Lines: ${originalLines} β ${processedLines}`); |
| } |
|
|
| } catch (error) { |
| console.error('β Post-processing failed:'); |
| console.error(error.message); |
| process.exit(1); |
| } |
| } |
|
|
| |
| if (import.meta.url === `file://${process.argv[1]}`) { |
| main(); |
| } |
|
|