web-llm-embed / src /components /FileLoader.jsx
matt HOFFNER
handle file embedding
e82c85b
raw
history blame
2.3 kB
import { useState } from 'react';
import { db } from '@/utils/db-client';
import { DashButton } from './DashButton'
import { FileEmbedder } from './FileEmbedder';
import * as PDFJS from 'pdfjs-dist/build/pdf';
PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
export default class Pdf {
static async getPageText(pdf, pageNo) {
const page = await pdf.getPage(pageNo);
const tokenizedText = await page.getTextContent();
const pageText = tokenizedText.items.map((token) => token.str).join('');
return pageText;
}
static async getPDFText(source) {
const pdf = await PDFJS.getDocument(source).promise;
const maxPages = pdf.numPages;
const pageTextPromises = [];
for (let pageNo = 1; pageNo <= maxPages; pageNo += 1) {
pageTextPromises.push(Pdf.getPageText(pdf, pageNo));
}
const pageTexts = await Promise.all(pageTextPromises);
return pageTexts.join(' ');
}
}
export const FileLoader = () => {
const [files, setFiles] = useState();
const [uploadStatus, setUploadStatus] = useState("Embed");
const handleEmbed = (files) => {
setFiles(files)
};
return (
<>
<FileEmbedder handleEmbed={handleEmbed} />
<DashButton
handleClick={async () => {
if (files && files.length) {
const file = files[0];
let text;
const blob = new Blob([file], { type: 'text/plain' });
if (file.type === "application/pdf") {
text = await Pdf.getPDFText(URL.createObjectURL(blob));
} else {
text = URL.createObjectURL(blob);
}
const response = await fetch('/api/docHandle', {
method: 'POST',
body: JSON.stringify({ text }),
});
const docChat = await response.json();
console.log('handleDocs-getModels: ', docChat);
const id = await db.docs.add({
fileName: file.name || '',
fileSourceData: pdfText,
model: docChat,
});
setUploadStatus("Embedding Completed");
console.log('handleDocs-setToDb', id);
}
}}
>
<div>{uploadStatus}</div>
</DashButton>
</>
);
};