Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, statSync } from 'fs'; | |
| import { join, dirname, basename, extname } from 'path'; | |
| import { fileURLToPath } from 'url'; | |
| import matter from 'gray-matter'; | |
| import fetch from 'node-fetch'; | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = dirname(__filename); | |
| // Configuration | |
| const DEFAULT_INPUT = join(__dirname, 'output'); | |
| const DEFAULT_OUTPUT = join(__dirname, 'output'); | |
| const STATIC_FRONTMATTER_PATH = join(__dirname, 'static', 'frontmatter.mdx'); | |
| function parseArgs() { | |
| const args = process.argv.slice(2); | |
| const config = { | |
| input: DEFAULT_INPUT, | |
| output: DEFAULT_OUTPUT, | |
| }; | |
| for (const arg of args) { | |
| if (arg.startsWith('--input=')) { | |
| config.input = arg.substring('--input='.length); | |
| } else if (arg.startsWith('--output=')) { | |
| config.output = arg.substring('--output='.length); | |
| } else if (arg === '--help' || arg === '-h') { | |
| console.log(` | |
| π Notion Markdown to MDX Converter | |
| Usage: | |
| node mdx-converter.mjs [options] | |
| Options: | |
| --input=PATH Input directory or file (default: ${DEFAULT_INPUT}) | |
| --output=PATH Output directory (default: ${DEFAULT_OUTPUT}) | |
| --help, -h Show this help | |
| Examples: | |
| # Convert all markdown files in output directory | |
| node mdx-converter.mjs | |
| # Convert specific file | |
| node mdx-converter.mjs --input=article.md --output=converted/ | |
| # Convert directory | |
| node mdx-converter.mjs --input=markdown-files/ --output=mdx-files/ | |
| `); | |
| process.exit(0); | |
| } else if (!config.input) { | |
| config.input = arg; | |
| } else if (!config.output) { | |
| config.output = arg; | |
| } | |
| } | |
| return config; | |
| } | |
| /** | |
| * Track which Astro components are used during transformations | |
| */ | |
| const usedComponents = new Set(); | |
| /** | |
| * Track individual image imports needed | |
| */ | |
| const imageImports = new Map(); // src -> varName | |
| /** | |
| * Track external images that need to be downloaded | |
| */ | |
| const externalImagesToDownload = new Map(); // url -> localPath | |
| /** | |
| * Generate a variable name from image path | |
| * @param {string} src - Image source path | |
| * @returns {string} - Valid variable name | |
| */ | |
| function generateImageVarName(src) { | |
| // Extract filename without extension and make it a valid JS variable | |
| const filename = src.split('/').pop().replace(/\.[^.]+$/, ''); | |
| return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&'); | |
| } | |
| /** | |
| * Check if a URL is an external URL (HTTP/HTTPS) | |
| * @param {string} url - URL to check | |
| * @returns {boolean} - True if it's an external URL | |
| */ | |
| function isExternalImageUrl(url) { | |
| try { | |
| const urlObj = new URL(url); | |
| // Just check if it's HTTP/HTTPS - we'll try to download everything | |
| return urlObj.protocol === 'http:' || urlObj.protocol === 'https:'; | |
| } catch { | |
| return false; | |
| } | |
| } | |
| /** | |
| * Extract image URL from Twitter/X page | |
| * @param {string} tweetUrl - URL of the tweet | |
| * @returns {Promise<string|null>} - URL of the image or null if not found | |
| */ | |
| async function extractTwitterImageUrl(tweetUrl) { | |
| try { | |
| const response = await fetch(tweetUrl, { | |
| headers: { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| }); | |
| if (!response.ok) { | |
| return null; | |
| } | |
| const html = await response.text(); | |
| // Try to find image URLs in meta tags (Twitter Card) | |
| const metaImageMatch = html.match(/<meta property="og:image" content="([^"]+)"/); | |
| if (metaImageMatch) { | |
| let imageUrl = metaImageMatch[1]; | |
| // Try to get the large version | |
| if (imageUrl.includes('?')) { | |
| imageUrl = imageUrl.split('?')[0] + '?format=jpg&name=large'; | |
| } | |
| return imageUrl; | |
| } | |
| // Fallback: try to find pbs.twimg.com URLs in the HTML | |
| const pbsMatch = html.match(/https:\/\/pbs\.twimg\.com\/media\/([^"?]+)/); | |
| if (pbsMatch) { | |
| return `https://pbs.twimg.com/media/${pbsMatch[1]}?format=jpg&name=large`; | |
| } | |
| return null; | |
| } catch (error) { | |
| console.log(` β οΈ Failed to extract Twitter image: ${error.message}`); | |
| return null; | |
| } | |
| } | |
| /** | |
| * Download an external URL and save it locally | |
| * @param {string} imageUrl - External URL | |
| * @param {string} outputDir - Directory to save the file | |
| * @returns {Promise<string>} - Local path to the downloaded file | |
| */ | |
| async function downloadExternalImage(imageUrl, outputDir) { | |
| try { | |
| console.log(` π Downloading external URL: ${imageUrl}`); | |
| // Create output directory if it doesn't exist | |
| if (!existsSync(outputDir)) { | |
| mkdirSync(outputDir, { recursive: true }); | |
| } | |
| let actualImageUrl = imageUrl; | |
| // Check if it's a Twitter/X URL | |
| if (imageUrl.includes('twitter.com/') || imageUrl.includes('x.com/')) { | |
| console.log(` π¦ Detected Twitter/X URL, attempting to extract image...`); | |
| const extractedUrl = await extractTwitterImageUrl(imageUrl); | |
| if (extractedUrl) { | |
| actualImageUrl = extractedUrl; | |
| console.log(` β Extracted image URL: ${extractedUrl}`); | |
| } else { | |
| console.log(` β οΈ Could not automatically extract image from Twitter/X`); | |
| console.log(` π‘ Manual download required:`); | |
| console.log(` 1. Open ${imageUrl} in your browser`); | |
| console.log(` 2. Right-click on the image and "Save image as..."`); | |
| console.log(` 3. Save it to: app/src/content/assets/image/`); | |
| throw new Error('Twitter/X images require manual download'); | |
| } | |
| } | |
| // Generate filename from URL | |
| const urlObj = new URL(actualImageUrl); | |
| const pathname = urlObj.pathname; | |
| // Determine file extension - try to get it from URL, default to jpg | |
| let extension = 'jpg'; | |
| if (pathname.includes('.')) { | |
| const urlExtension = pathname.split('.').pop().toLowerCase(); | |
| if (['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp', 'tiff'].includes(urlExtension)) { | |
| extension = urlExtension; | |
| } | |
| } | |
| // Generate unique filename | |
| const filename = `external_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.${extension}`; | |
| const localPath = join(outputDir, filename); | |
| // Try to download the URL | |
| const response = await fetch(actualImageUrl, { | |
| headers: { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP ${response.status}: ${response.statusText}`); | |
| } | |
| const buffer = await response.buffer(); | |
| // Validate that we actually got data | |
| if (buffer.length === 0) { | |
| throw new Error('Empty response'); | |
| } | |
| // Validate that it's actually an image, not HTML | |
| const contentType = response.headers.get('content-type'); | |
| if (contentType && contentType.includes('text/html')) { | |
| throw new Error('Downloaded content is HTML, not an image'); | |
| } | |
| // Save to local file | |
| writeFileSync(localPath, buffer); | |
| console.log(` β Downloaded: ${filename} (${buffer.length} bytes)`); | |
| return localPath; | |
| } catch (error) { | |
| console.log(` β Failed to download ${imageUrl}: ${error.message}`); | |
| throw error; | |
| } | |
| } | |
| /** | |
| * Process external images in content and download them | |
| * @param {string} content - Markdown content | |
| * @param {string} outputDir - Directory to save downloaded images | |
| * @returns {Promise<string>} - Content with external images replaced by local paths | |
| */ | |
| async function processExternalImages(content, outputDir) { | |
| console.log(' π Processing external images...'); | |
| let processedCount = 0; | |
| let downloadedCount = 0; | |
| // Find all external image URLs in markdown format:  | |
| const externalImageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g; | |
| let match; | |
| const externalImages = new Map(); // url -> alt text | |
| // First pass: collect all external image URLs | |
| while ((match = externalImageRegex.exec(content)) !== null) { | |
| const alt = match[1]; | |
| const url = match[2]; | |
| if (isExternalImageUrl(url)) { | |
| externalImages.set(url, alt); | |
| console.log(` π Found external image: ${url}`); | |
| } | |
| } | |
| if (externalImages.size === 0) { | |
| console.log(' βΉοΈ No external images found'); | |
| return content; | |
| } | |
| // Second pass: download images and replace URLs | |
| let processedContent = content; | |
| for (const [url, alt] of externalImages) { | |
| try { | |
| // Download the image | |
| const localPath = await downloadExternalImage(url, outputDir); | |
| const relativePath = `./assets/image/${basename(localPath)}`; | |
| // Replace the URL in content | |
| processedContent = processedContent.replace( | |
| new RegExp(`!\\[${alt.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\]\\(${url.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\)`, 'g'), | |
| `` | |
| ); | |
| downloadedCount++; | |
| processedCount++; | |
| } catch (error) { | |
| console.log(` β οΈ Skipping external image due to download failure: ${url}`); | |
| } | |
| } | |
| if (downloadedCount > 0) { | |
| console.log(` β Downloaded ${downloadedCount} external image(s)`); | |
| } | |
| return processedContent; | |
| } | |
| /** | |
| * Detect and track Astro components used in the content | |
| * @param {string} content - MDX content | |
| */ | |
| function detectAstroComponents(content) { | |
| console.log(' π Detecting Astro components in content...'); | |
| let detectedCount = 0; | |
| // Known Astro components that should be auto-imported | |
| const knownComponents = [ | |
| 'HtmlEmbed', 'Image', 'Note', 'Sidenote', 'Wide', 'FullWidth', | |
| 'Accordion', 'Quote', 'Reference', 'Glossary', 'Stack', 'ThemeToggle', | |
| 'RawHtml', 'HfUser' | |
| ]; | |
| // Find all JSX elements that look like Astro components | |
| // Pattern: <ComponentName ... /> | |
| const componentMatches = content.match(/<([A-Z][a-zA-Z0-9]*)\s*[^>]*\/?>/g); | |
| if (componentMatches) { | |
| for (const match of componentMatches) { | |
| // Extract component name from the JSX element | |
| const componentMatch = match.match(/<([A-Z][a-zA-Z0-9]*)/); | |
| if (componentMatch) { | |
| const componentName = componentMatch[1]; | |
| // Only track known Astro components (skip HTML elements) | |
| if (knownComponents.includes(componentName) && !usedComponents.has(componentName)) { | |
| usedComponents.add(componentName); | |
| detectedCount++; | |
| console.log(` π¦ Found component: ${componentName}`); | |
| } | |
| } | |
| } | |
| } | |
| if (detectedCount > 0) { | |
| console.log(` β Detected ${detectedCount} new Astro component(s)`); | |
| } else { | |
| console.log(` βΉοΈ No new Astro components detected`); | |
| } | |
| } | |
| /** | |
| * Add required component imports to the frontmatter | |
| * @param {string} content - MDX content | |
| * @returns {string} - Content with component imports | |
| */ | |
| function addComponentImports(content) { | |
| console.log(' π¦ Adding component and image imports...'); | |
| let imports = []; | |
| // Add component imports | |
| if (usedComponents.size > 0) { | |
| const componentImports = Array.from(usedComponents) | |
| .map(component => `import ${component} from '../components/${component}.astro';`); | |
| imports.push(...componentImports); | |
| console.log(` β Importing components: ${Array.from(usedComponents).join(', ')}`); | |
| } | |
| // Add image imports | |
| if (imageImports.size > 0) { | |
| const imageImportStatements = Array.from(imageImports.entries()) | |
| .map(([src, varName]) => `import ${varName} from '${src}';`); | |
| imports.push(...imageImportStatements); | |
| console.log(` β Importing ${imageImports.size} image(s)`); | |
| } | |
| if (imports.length === 0) { | |
| console.log(' βΉοΈ No imports needed'); | |
| return content; | |
| } | |
| const importBlock = imports.join('\n'); | |
| // Insert imports after frontmatter | |
| const frontmatterEnd = content.indexOf('---', 3) + 3; | |
| if (frontmatterEnd > 2) { | |
| return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n\n' + content.slice(frontmatterEnd); | |
| } else { | |
| // No frontmatter, add at beginning | |
| return importBlock + '\n\n' + content; | |
| } | |
| } | |
| /** | |
| * Load static frontmatter from file | |
| * @returns {object} - Static frontmatter data | |
| */ | |
| function loadStaticFrontmatter() { | |
| try { | |
| if (existsSync(STATIC_FRONTMATTER_PATH)) { | |
| const staticContent = readFileSync(STATIC_FRONTMATTER_PATH, 'utf8'); | |
| const { data } = matter(staticContent); | |
| console.log(' β Loaded static frontmatter from file'); | |
| return data; | |
| } | |
| console.log(' βΉοΈ No static frontmatter file found'); | |
| return {}; | |
| } catch (error) { | |
| console.log(` β οΈ Failed to load static frontmatter: ${error.message}`); | |
| return {}; | |
| } | |
| } | |
| /** | |
| * Ensure proper frontmatter for MDX using static file first, then existing data | |
| * @param {string} content - MDX content | |
| * @param {string} pageId - Notion page ID (optional, kept for compatibility but ignored) | |
| * @param {string} notionToken - Notion API token (optional, kept for compatibility but ignored) | |
| * @returns {string} - Content with proper frontmatter | |
| */ | |
| async function ensureFrontmatter(content, pageId = null, notionToken = null) { | |
| console.log(' π Ensuring proper frontmatter...'); | |
| // Load static frontmatter first (highest priority) | |
| const staticData = loadStaticFrontmatter(); | |
| if (!content.startsWith('---')) { | |
| // No frontmatter in content, use static + basic defaults | |
| let baseData = { ...staticData }; | |
| // Add basic defaults for required fields if not in static | |
| if (!baseData.title) baseData.title = 'Article'; | |
| if (!baseData.published) { | |
| baseData.published = new Date().toLocaleDateString('en-US', { | |
| year: 'numeric', | |
| month: 'short', | |
| day: '2-digit' | |
| }); | |
| } | |
| if (baseData.tableOfContentsAutoCollapse === undefined) { | |
| baseData.tableOfContentsAutoCollapse = true; | |
| } | |
| const frontmatter = matter.stringify('', baseData); | |
| console.log(' β Applied static frontmatter to content without frontmatter'); | |
| return frontmatter + content; | |
| } | |
| // Parse existing frontmatter and merge with static (static takes priority) | |
| try { | |
| const { data: existingData, content: body } = matter(content); | |
| // Merge: existing data first, then static data overrides | |
| const mergedData = { ...existingData, ...staticData }; | |
| // Ensure required fields if still missing after merge | |
| if (!mergedData.title) mergedData.title = 'Article'; | |
| if (!mergedData.published) { | |
| mergedData.published = new Date().toLocaleDateString('en-US', { | |
| year: 'numeric', | |
| month: 'short', | |
| day: '2-digit' | |
| }); | |
| } | |
| if (mergedData.tableOfContentsAutoCollapse === undefined) { | |
| mergedData.tableOfContentsAutoCollapse = true; | |
| } | |
| const enhancedContent = matter.stringify(body, mergedData); | |
| console.log(' β Merged static and existing frontmatter'); | |
| return enhancedContent; | |
| } catch (error) { | |
| console.log(' β οΈ Could not parse frontmatter, keeping as is'); | |
| return content; | |
| } | |
| } | |
| /** | |
| * Generate basic frontmatter | |
| * @returns {string} - Basic frontmatter | |
| */ | |
| function generateBasicFrontmatter() { | |
| const currentDate = new Date().toLocaleDateString('en-US', { | |
| year: 'numeric', | |
| month: 'short', | |
| day: '2-digit' | |
| }); | |
| return `--- | |
| title: "Notion Article" | |
| published: "${currentDate}" | |
| tableOfContentsAutoCollapse: true | |
| --- | |
| `; | |
| } | |
| /** | |
| * Check if a line is a table line | |
| * @param {string} line - Line to check | |
| * @returns {boolean} - True if it's a table line | |
| */ | |
| function isTableLine(line) { | |
| const trimmed = line.trim(); | |
| return trimmed.startsWith('|') && trimmed.endsWith('|'); | |
| } | |
| /** | |
| * Check if a line is a list item | |
| * @param {string} line - Line to check | |
| * @returns {boolean} - True if it's a list item | |
| */ | |
| function isListItem(line) { | |
| const trimmed = line.trim(); | |
| // Match: * -, + (bullet points) or 1. 2. 3. (numbered lists) | |
| return /^\s*[\*\-\+]\s/.test(trimmed) || /^\s*\d+\.\s/.test(trimmed); | |
| } | |
| /** | |
| * Add a blank line after each markdown table and list | |
| * @param {string} content - MDX content | |
| * @returns {string} - Content with blank lines after tables and lists | |
| */ | |
| function addBlankLineAfterTablesAndLists(content) { | |
| console.log(' π Adding blank lines after tables and lists...'); | |
| let addedTableCount = 0; | |
| let addedListCount = 0; | |
| const lines = content.split('\n'); | |
| const result = []; | |
| for (let i = 0; i < lines.length; i++) { | |
| result.push(lines[i]); | |
| // Check if current line is the end of a table | |
| if (isTableLine(lines[i])) { | |
| // Look ahead to see if this is the last line of a table | |
| let isLastTableLine = false; | |
| // Check if next line is empty or doesn't start with | | |
| if (i + 1 >= lines.length || | |
| lines[i + 1].trim() === '' || | |
| !isTableLine(lines[i + 1])) { | |
| // Look back to find if we're actually inside a table | |
| let tableLineCount = 0; | |
| for (let j = i; j >= 0 && isTableLine(lines[j]); j--) { | |
| tableLineCount++; | |
| } | |
| // Only add blank line if we found at least 2 table lines (making it a real table) | |
| if (tableLineCount >= 2) { | |
| isLastTableLine = true; | |
| } | |
| } | |
| if (isLastTableLine) { | |
| addedTableCount++; | |
| result.push(''); // Add blank line | |
| } | |
| } | |
| // Check if current line is the end of a list | |
| else if (isListItem(lines[i])) { | |
| // Look ahead to see if this is the last line of a list | |
| let isLastListItem = false; | |
| // Check if next line is empty or doesn't start with list marker | |
| if (i + 1 >= lines.length || | |
| lines[i + 1].trim() === '' || | |
| !isListItem(lines[i + 1])) { | |
| isLastListItem = true; | |
| } | |
| if (isLastListItem) { | |
| addedListCount++; | |
| result.push(''); // Add blank line | |
| } | |
| } | |
| } | |
| if (addedTableCount > 0 || addedListCount > 0) { | |
| console.log(` β Added blank line after ${addedTableCount} table(s) and ${addedListCount} list(s)`); | |
| } else { | |
| console.log(' βΉοΈ No tables or lists found to process'); | |
| } | |
| return result.join('\n'); | |
| } | |
| /** | |
| * Transform markdown images to Image components | |
| * @param {string} content - Markdown content | |
| * @returns {string} - Content with Image components | |
| */ | |
| function transformMarkdownImages(content) { | |
| console.log(' πΌοΈ Transforming markdown images to Image components...'); | |
| let transformedCount = 0; | |
| // Transform markdown images:  -> <Image src={varName} alt="alt" /> | |
| content = content.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => { | |
| transformedCount++; | |
| // Clean up the src path - remove /media/ prefix and use relative path | |
| let cleanSrc = src; | |
| if (src.startsWith('/media/')) { | |
| cleanSrc = src.replace('/media/', './assets/image/'); | |
| } | |
| // Generate variable name for the image import | |
| const varName = generateImageVarName(cleanSrc); | |
| // Add to imageImports if not already present | |
| if (!imageImports.has(cleanSrc)) { | |
| imageImports.set(cleanSrc, varName); | |
| } | |
| // Extract filename for alt text if none provided | |
| const finalAlt = alt || src.split('/').pop().split('.')[0]; | |
| return `<Image src={${varName}} alt="${finalAlt}" />`; | |
| }); | |
| if (transformedCount > 0) { | |
| console.log(` β Transformed ${transformedCount} markdown image(s) to Image components with imports`); | |
| } else { | |
| console.log(' βΉοΈ No markdown images found to transform'); | |
| } | |
| return content; | |
| } | |
| /** | |
| * Add proper spacing around Astro components | |
| * @param {string} content - MDX content | |
| * @returns {string} - Content with proper spacing around components | |
| */ | |
| function addSpacingAroundComponents(content) { | |
| console.log(' π Adding spacing around Astro components...'); | |
| let processedContent = content; | |
| let spacingCount = 0; | |
| // Known Astro components that should have spacing | |
| const knownComponents = [ | |
| 'HtmlEmbed', 'Image', 'Note', 'Sidenote', 'Wide', 'FullWidth', | |
| 'Accordion', 'Quote', 'Reference', 'Glossary', 'Stack', 'ThemeToggle', | |
| 'RawHtml', 'HfUser', 'Figure' | |
| ]; | |
| // Process each component type | |
| for (const component of knownComponents) { | |
| // Pattern for components with content: <Component>...</Component> | |
| // Process this first to handle the complete component structure | |
| const withContentPattern = new RegExp(`(<${component}[^>]*>)([\\s\\S]*?)(<\\/${component}>)`, 'g'); | |
| processedContent = processedContent.replace(withContentPattern, (match, openTag, content, closeTag) => { | |
| spacingCount++; | |
| // Ensure blank line before opening tag and after closing tag | |
| // Also ensure closing tag is on its own line | |
| const trimmedContent = content.trim(); | |
| return `\n\n${openTag}\n${trimmedContent}\n${closeTag}\n\n`; | |
| }); | |
| // Pattern for self-closing components: <Component ... /> | |
| const selfClosingPattern = new RegExp(`(<${component}[^>]*\\/?>)`, 'g'); | |
| processedContent = processedContent.replace(selfClosingPattern, (match) => { | |
| spacingCount++; | |
| return `\n\n${match}\n\n`; | |
| }); | |
| } | |
| // Clean up excessive newlines (more than 2 consecutive) | |
| processedContent = processedContent.replace(/\n{3,}/g, '\n\n'); | |
| if (spacingCount > 0) { | |
| console.log(` β Added spacing around ${spacingCount} component(s)`); | |
| } else { | |
| console.log(' βΉοΈ No components found to add spacing around'); | |
| } | |
| return processedContent; | |
| } | |
| /** | |
| * Fix smart quotes (curly quotes) and replace them with straight quotes | |
| * @param {string} content - Markdown content | |
| * @returns {string} - Content with fixed quotes | |
| */ | |
| function fixSmartQuotes(content) { | |
| console.log(' βοΈ Fixing smart quotes (curly quotes)...'); | |
| let fixedCount = 0; | |
| const originalContent = content; | |
| // Replace opening smart double quotes (\u201C) with straight quotes (") | |
| content = content.replace(/\u201C/g, '"'); | |
| // Replace closing smart double quotes (\u201D) with straight quotes (") | |
| content = content.replace(/\u201D/g, '"'); | |
| // Replace opening smart single quotes (\u2018) with straight quotes (') | |
| content = content.replace(/\u2018/g, "'"); | |
| // Replace closing smart single quotes (\u2019) with straight quotes (') | |
| content = content.replace(/\u2019/g, "'"); | |
| // Count the number of replacements made | |
| fixedCount = 0; | |
| for (let i = 0; i < originalContent.length; i++) { | |
| const char = originalContent[i]; | |
| if (char === '\u201C' || char === '\u201D' || char === '\u2018' || char === '\u2019') { | |
| fixedCount++; | |
| } | |
| } | |
| if (fixedCount > 0) { | |
| console.log(` β Fixed ${fixedCount} smart quote(s)`); | |
| } else { | |
| console.log(' βΉοΈ No smart quotes found'); | |
| } | |
| return content; | |
| } | |
| /** | |
| * Main MDX processing function that applies all transformations | |
| * @param {string} content - Raw Markdown content | |
| * @param {string} pageId - Notion page ID (optional) | |
| * @param {string} notionToken - Notion API token (optional) | |
| * @param {string} outputDir - Output directory for downloaded images (optional) | |
| * @returns {string} - Processed MDX content compatible with Astro | |
| */ | |
| async function processMdxContent(content, pageId = null, notionToken = null, outputDir = null) { | |
| console.log('π§ Processing for Astro MDX compatibility...'); | |
| // Clear previous tracking | |
| usedComponents.clear(); | |
| imageImports.clear(); | |
| externalImagesToDownload.clear(); | |
| let processedContent = content; | |
| // Fix smart quotes first | |
| processedContent = fixSmartQuotes(processedContent); | |
| // Process external images first (before other transformations) | |
| if (outputDir) { | |
| // Create a temporary external images directory in the output folder | |
| const externalImagesDir = join(outputDir, 'external-images'); | |
| processedContent = await processExternalImages(processedContent, externalImagesDir); | |
| } | |
| // Apply essential steps only | |
| processedContent = await ensureFrontmatter(processedContent, pageId, notionToken); | |
| // Add blank lines after tables and lists | |
| processedContent = addBlankLineAfterTablesAndLists(processedContent); | |
| // Transform markdown images to Image components | |
| processedContent = transformMarkdownImages(processedContent); | |
| // Add spacing around Astro components | |
| processedContent = addSpacingAroundComponents(processedContent); | |
| // Detect Astro components used in the content before adding imports | |
| detectAstroComponents(processedContent); | |
| // Add component imports at the end | |
| processedContent = addComponentImports(processedContent); | |
| return processedContent; | |
| } | |
| /** | |
| * Convert a single markdown file to MDX | |
| * @param {string} inputFile - Input markdown file | |
| * @param {string} outputDir - Output directory | |
| * @param {string} pageId - Notion page ID (optional) | |
| * @param {string} notionToken - Notion API token (optional) | |
| */ | |
| async function convertFileToMdx(inputFile, outputDir, pageId = null, notionToken = null) { | |
| const filename = basename(inputFile, '.md'); | |
| const outputFile = join(outputDir, `${filename}.mdx`); | |
| console.log(`π Converting: ${basename(inputFile)} β ${basename(outputFile)}`); | |
| try { | |
| const markdownContent = readFileSync(inputFile, 'utf8'); | |
| const mdxContent = await processMdxContent(markdownContent, pageId, notionToken, outputDir); | |
| writeFileSync(outputFile, mdxContent); | |
| console.log(` β Converted: ${outputFile}`); | |
| // Show file size | |
| const inputSize = Math.round(markdownContent.length / 1024); | |
| const outputSize = Math.round(mdxContent.length / 1024); | |
| console.log(` π Input: ${inputSize}KB β Output: ${outputSize}KB`); | |
| } catch (error) { | |
| console.error(` β Failed to convert ${inputFile}: ${error.message}`); | |
| } | |
| } | |
| /** | |
| * Convert all markdown files in a directory to MDX | |
| * @param {string} inputPath - Input path (file or directory) | |
| * @param {string} outputDir - Output directory | |
| * @param {string} pageId - Notion page ID (optional) | |
| * @param {string} notionToken - Notion API token (optional) | |
| */ | |
| async function convertToMdx(inputPath, outputDir, pageId = null, notionToken = null) { | |
| console.log('π Notion Markdown to Astro MDX Converter'); | |
| console.log(`π Input: ${inputPath}`); | |
| console.log(`π Output: ${outputDir}`); | |
| // Check if input exists | |
| if (!existsSync(inputPath)) { | |
| console.error(`β Input not found: ${inputPath}`); | |
| process.exit(1); | |
| } | |
| try { | |
| // Ensure output directory exists | |
| if (!existsSync(outputDir)) { | |
| mkdirSync(outputDir, { recursive: true }); | |
| } | |
| let filesToConvert = []; | |
| if (statSync(inputPath).isDirectory()) { | |
| // Convert all .md files in directory | |
| const files = readdirSync(inputPath); | |
| filesToConvert = files | |
| .filter(file => file.endsWith('.md') && !file.includes('.raw.md')) | |
| .map(file => join(inputPath, file)); | |
| } else if (inputPath.endsWith('.md')) { | |
| // Convert single file | |
| filesToConvert = [inputPath]; | |
| } else { | |
| console.error('β Input must be a .md file or directory containing .md files'); | |
| process.exit(1); | |
| } | |
| if (filesToConvert.length === 0) { | |
| console.log('βΉοΈ No .md files found to convert'); | |
| return; | |
| } | |
| console.log(`π Found ${filesToConvert.length} file(s) to convert`); | |
| // Convert each file | |
| for (const file of filesToConvert) { | |
| await convertFileToMdx(file, outputDir, pageId, notionToken); | |
| } | |
| console.log(`β Conversion completed! ${filesToConvert.length} file(s) processed`); | |
| } catch (error) { | |
| console.error('β Conversion failed:', error.message); | |
| process.exit(1); | |
| } | |
| } | |
| export { convertToMdx }; | |
| function main() { | |
| const config = parseArgs(); | |
| convertToMdx(config.input, config.output); | |
| console.log('π MDX conversion completed!'); | |
| } | |
| if (import.meta.url === `file://${process.argv[1]}`) { | |
| main(); | |
| } | |