|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import { browser } from '$app/environment'; |
|
|
import { MimeTypeApplication, MimeTypeImage } from '$lib/enums/files'; |
|
|
import * as pdfjs from 'pdfjs-dist'; |
|
|
|
|
|
type TextContent = { |
|
|
items: Array<{ str: string }>; |
|
|
}; |
|
|
|
|
|
if (browser) { |
|
|
|
|
|
import('pdfjs-dist/build/pdf.worker.min.mjs?raw') |
|
|
.then((workerModule) => { |
|
|
const workerBlob = new Blob([workerModule.default], { type: 'application/javascript' }); |
|
|
pdfjs.GlobalWorkerOptions.workerSrc = URL.createObjectURL(workerBlob); |
|
|
}) |
|
|
.catch(() => { |
|
|
console.warn('Failed to load PDF.js worker, PDF processing may not work'); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function getFileAsBuffer(file: File): Promise<ArrayBuffer> { |
|
|
return new Promise((resolve, reject) => { |
|
|
const reader = new FileReader(); |
|
|
reader.onload = (event) => { |
|
|
if (event.target?.result) { |
|
|
resolve(event.target.result as ArrayBuffer); |
|
|
} else { |
|
|
reject(new Error('Failed to read file.')); |
|
|
} |
|
|
}; |
|
|
reader.onerror = () => { |
|
|
reject(new Error('Failed to read file.')); |
|
|
}; |
|
|
reader.readAsArrayBuffer(file); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function convertPDFToText(file: File): Promise<string> { |
|
|
if (!browser) { |
|
|
throw new Error('PDF processing is only available in the browser'); |
|
|
} |
|
|
|
|
|
try { |
|
|
const buffer = await getFileAsBuffer(file); |
|
|
const pdf = await pdfjs.getDocument(buffer).promise; |
|
|
const numPages = pdf.numPages; |
|
|
|
|
|
const textContentPromises: Promise<TextContent>[] = []; |
|
|
|
|
|
for (let i = 1; i <= numPages; i++) { |
|
|
|
|
|
textContentPromises.push(pdf.getPage(i).then((page: any) => page.getTextContent())); |
|
|
} |
|
|
|
|
|
const textContents = await Promise.all(textContentPromises); |
|
|
const textItems = textContents.flatMap((textContent: TextContent) => |
|
|
textContent.items.map((item) => item.str ?? '') |
|
|
); |
|
|
|
|
|
return textItems.join('\n'); |
|
|
} catch (error) { |
|
|
console.error('Error converting PDF to text:', error); |
|
|
throw new Error( |
|
|
`Failed to convert PDF to text: ${error instanceof Error ? error.message : 'Unknown error'}` |
|
|
); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function convertPDFToImage(file: File, scale: number = 1.5): Promise<string[]> { |
|
|
if (!browser) { |
|
|
throw new Error('PDF processing is only available in the browser'); |
|
|
} |
|
|
|
|
|
try { |
|
|
const buffer = await getFileAsBuffer(file); |
|
|
const doc = await pdfjs.getDocument(buffer).promise; |
|
|
const pages: Promise<string>[] = []; |
|
|
|
|
|
for (let i = 1; i <= doc.numPages; i++) { |
|
|
const page = await doc.getPage(i); |
|
|
const viewport = page.getViewport({ scale }); |
|
|
const canvas = document.createElement('canvas'); |
|
|
const ctx = canvas.getContext('2d'); |
|
|
|
|
|
canvas.width = viewport.width; |
|
|
canvas.height = viewport.height; |
|
|
|
|
|
if (!ctx) { |
|
|
throw new Error('Failed to get 2D context from canvas'); |
|
|
} |
|
|
|
|
|
const task = page.render({ |
|
|
canvasContext: ctx, |
|
|
viewport: viewport, |
|
|
canvas: canvas |
|
|
}); |
|
|
pages.push( |
|
|
task.promise.then(() => { |
|
|
return canvas.toDataURL(MimeTypeImage.PNG); |
|
|
}) |
|
|
); |
|
|
} |
|
|
|
|
|
return await Promise.all(pages); |
|
|
} catch (error) { |
|
|
console.error('Error converting PDF to images:', error); |
|
|
throw new Error( |
|
|
`Failed to convert PDF to images: ${error instanceof Error ? error.message : 'Unknown error'}` |
|
|
); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function isPdfFile(file: File): boolean { |
|
|
return file.type === MimeTypeApplication.PDF; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function isApplicationMimeType(mimeType: string): boolean { |
|
|
return mimeType === MimeTypeApplication.PDF; |
|
|
} |
|
|
|