File size: 2,077 Bytes
e82c85b
 
 
 
 
 
 
1300e36
 
 
 
 
 
 
e82c85b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d239ba
e82c85b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1300e36
 
e82c85b
1300e36
 
5d239ba
e82c85b
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import { useState } from 'react';
import { DashButton } from './DashButton'
import { FileEmbedder } from './FileEmbedder';
import * as PDFJS from 'pdfjs-dist/build/pdf';

PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;

const readFile = (blob) => new Promise((resolve, reject) => {
  const reader = new FileReader();
  reader.onload = (event) => resolve(event.target.result);
  reader.onerror = reject;
  reader.readAsText(blob);
});

export default class Pdf {
  static async getPageText(pdf, pageNo) {
    const page = await pdf.getPage(pageNo);
    const tokenizedText = await page.getTextContent();
    const pageText = tokenizedText.items.map((token) => token.str).join('');
    return pageText;
  }

  static async getPDFText(source) {
    const pdf = await PDFJS.getDocument(source).promise;
    const maxPages = pdf.numPages;
    const pageTextPromises = [];
    for (let pageNo = 1; pageNo <= maxPages; pageNo += 1) {
      pageTextPromises.push(Pdf.getPageText(pdf, pageNo));
    }
    const pageTexts = await Promise.all(pageTextPromises);
    return pageTexts.join(' ');
  }
}

export const FileLoader = ({ setFileText }) => {
  const [files, setFiles] = useState();
  const [uploadStatus, setUploadStatus] = useState("Embed");
  const handleEmbed = (files) => {
    setFiles(files)
  };

  return (
    <>
      <FileEmbedder handleEmbed={handleEmbed} />
      <DashButton
        handleClick={async () => {
          if (files && files.length) {
            const file = files[0];
            let text;
            const blob = new Blob([file], { type: 'text/plain' });
            if (file.type === "application/pdf") {
              text = await Pdf.getPDFText(URL.createObjectURL(blob));
            } else { 
              text = await readFile(file)
            }
            console.log(`file text: ${text}`);
            setFileText(text);
            setUploadStatus("Embed Complete");
          }
        }}
      >
        <div>{uploadStatus}</div>
      </DashButton>
    </>
  );
};