Spaces:
Runtime error
Runtime error
matt HOFFNER
commited on
Commit
Β·
1300e36
1
Parent(s):
5d239ba
cleanup
Browse files- package-lock.json +13 -2
- package.json +1 -0
- src/components/ChatWindow.jsx +22 -11
- src/components/FileLoader.jsx +11 -9
package-lock.json
CHANGED
|
@@ -16,11 +16,11 @@
|
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"chromadb": "^1.5.2",
|
|
|
|
| 19 |
"dexie": "^3.2.4",
|
| 20 |
"eslint": "8.40.0",
|
| 21 |
"eslint-config-next": "13.4.2",
|
| 22 |
"fs-extra": "^11.1.1",
|
| 23 |
-
"hnswlib-node": "^1.4.2",
|
| 24 |
"langchain": "^0.0.90",
|
| 25 |
"next": "13.4.2",
|
| 26 |
"pdfjs-dist": "^3.7.107",
|
|
@@ -1968,6 +1968,8 @@
|
|
| 1968 |
"version": "1.5.0",
|
| 1969 |
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
|
| 1970 |
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
|
|
|
|
|
|
|
| 1971 |
"dependencies": {
|
| 1972 |
"file-uri-to-path": "1.0.0"
|
| 1973 |
}
|
|
@@ -2220,6 +2222,11 @@
|
|
| 2220 |
"resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
|
| 2221 |
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
|
| 2222 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2223 |
"node_modules/color": {
|
| 2224 |
"version": "4.2.3",
|
| 2225 |
"resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
|
|
@@ -3285,7 +3292,9 @@
|
|
| 3285 |
"node_modules/file-uri-to-path": {
|
| 3286 |
"version": "1.0.0",
|
| 3287 |
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
|
| 3288 |
-
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
|
|
|
|
|
|
|
| 3289 |
},
|
| 3290 |
"node_modules/filelist": {
|
| 3291 |
"version": "1.0.4",
|
|
@@ -3802,6 +3811,8 @@
|
|
| 3802 |
"resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
|
| 3803 |
"integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
|
| 3804 |
"hasInstallScript": true,
|
|
|
|
|
|
|
| 3805 |
"dependencies": {
|
| 3806 |
"bindings": "^1.5.0",
|
| 3807 |
"node-addon-api": "^6.0.0"
|
|
|
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"chromadb": "^1.5.2",
|
| 19 |
+
"cohere-ai": "^5.1.0",
|
| 20 |
"dexie": "^3.2.4",
|
| 21 |
"eslint": "8.40.0",
|
| 22 |
"eslint-config-next": "13.4.2",
|
| 23 |
"fs-extra": "^11.1.1",
|
|
|
|
| 24 |
"langchain": "^0.0.90",
|
| 25 |
"next": "13.4.2",
|
| 26 |
"pdfjs-dist": "^3.7.107",
|
|
|
|
| 1968 |
"version": "1.5.0",
|
| 1969 |
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
|
| 1970 |
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
|
| 1971 |
+
"optional": true,
|
| 1972 |
+
"peer": true,
|
| 1973 |
"dependencies": {
|
| 1974 |
"file-uri-to-path": "1.0.0"
|
| 1975 |
}
|
|
|
|
| 2222 |
"resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
|
| 2223 |
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
|
| 2224 |
},
|
| 2225 |
+
"node_modules/cohere-ai": {
|
| 2226 |
+
"version": "5.1.0",
|
| 2227 |
+
"resolved": "https://registry.npmjs.org/cohere-ai/-/cohere-ai-5.1.0.tgz",
|
| 2228 |
+
"integrity": "sha512-7q3z3w6GSoPxQqRL9G6QTaQ0e513auVE1JlNDnqnoFEXGtDbkVfaTOliR5qrMoK//74Csb0NW669evqngwPx3g=="
|
| 2229 |
+
},
|
| 2230 |
"node_modules/color": {
|
| 2231 |
"version": "4.2.3",
|
| 2232 |
"resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
|
|
|
|
| 3292 |
"node_modules/file-uri-to-path": {
|
| 3293 |
"version": "1.0.0",
|
| 3294 |
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
|
| 3295 |
+
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
|
| 3296 |
+
"optional": true,
|
| 3297 |
+
"peer": true
|
| 3298 |
},
|
| 3299 |
"node_modules/filelist": {
|
| 3300 |
"version": "1.0.4",
|
|
|
|
| 3811 |
"resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
|
| 3812 |
"integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
|
| 3813 |
"hasInstallScript": true,
|
| 3814 |
+
"optional": true,
|
| 3815 |
+
"peer": true,
|
| 3816 |
"dependencies": {
|
| 3817 |
"bindings": "^1.5.0",
|
| 3818 |
"node-addon-api": "^6.0.0"
|
package.json
CHANGED
|
@@ -16,6 +16,7 @@
|
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"chromadb": "^1.5.2",
|
|
|
|
| 19 |
"dexie": "^3.2.4",
|
| 20 |
"eslint": "8.40.0",
|
| 21 |
"eslint-config-next": "13.4.2",
|
|
|
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
"chromadb": "^1.5.2",
|
| 19 |
+
"cohere-ai": "^5.1.0",
|
| 20 |
"dexie": "^3.2.4",
|
| 21 |
"eslint": "8.40.0",
|
| 22 |
"eslint-config-next": "13.4.2",
|
src/components/ChatWindow.jsx
CHANGED
|
@@ -5,9 +5,11 @@ import MessageList from './MessageList';
|
|
| 5 |
import {FileLoader} from './FileLoader';
|
| 6 |
import Loader from "./Loader";
|
| 7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
|
|
|
| 8 |
import { ChromaClient } from "chromadb";
|
| 9 |
|
| 10 |
const client = new ChromaClient();
|
|
|
|
| 11 |
|
| 12 |
function ChatWindow({
|
| 13 |
stopStrings,
|
|
@@ -29,19 +31,28 @@ function ChatWindow({
|
|
| 29 |
}
|
| 30 |
|
| 31 |
if (fileText) {
|
|
|
|
| 32 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
| 33 |
-
const docs = await textSplitter.createDocuments([
|
| 34 |
-
|
| 35 |
-
await
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
ids: [...docs.map((v, k) => k)],
|
| 37 |
metadatas: [...docs.map(doc => doc.metadata)],
|
| 38 |
documents: [...docs.map(doc => doc.pageContent)],
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
const qaPrompt =
|
| 47 |
`You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
|
|
@@ -50,7 +61,7 @@ function ChatWindow({
|
|
| 50 |
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
|
| 51 |
Question: ${userInput}
|
| 52 |
=========
|
| 53 |
-
${
|
| 54 |
=========
|
| 55 |
Answer:
|
| 56 |
`
|
|
@@ -85,7 +96,7 @@ function ChatWindow({
|
|
| 85 |
}, [handleSubmit]);
|
| 86 |
|
| 87 |
const loadFile = async () => {
|
| 88 |
-
console.log('
|
| 89 |
}
|
| 90 |
|
| 91 |
useEffect(() => {
|
|
|
|
| 5 |
import {FileLoader} from './FileLoader';
|
| 6 |
import Loader from "./Loader";
|
| 7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 8 |
+
import { TransformersEmbeddingFunction } from '../embed/hf';
|
| 9 |
import { ChromaClient } from "chromadb";
|
| 10 |
|
| 11 |
const client = new ChromaClient();
|
| 12 |
+
const embedder = new TransformersEmbeddingFunction({});
|
| 13 |
|
| 14 |
function ChatWindow({
|
| 15 |
stopStrings,
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
if (fileText) {
|
| 34 |
+
console.log('found file text splitting into chunks')
|
| 35 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
| 36 |
+
const docs = await textSplitter.createDocuments([fileText]);
|
| 37 |
+
console.log(`split docs: ${docs}`);
|
| 38 |
+
const collection = await client.createCollection({name: "docs", embeddingFunction: embedder })
|
| 39 |
+
console.log(`collection: ${collection}`);
|
| 40 |
+
let queryResult;
|
| 41 |
+
try {
|
| 42 |
+
await collection.add({
|
| 43 |
ids: [...docs.map((v, k) => k)],
|
| 44 |
metadatas: [...docs.map(doc => doc.metadata)],
|
| 45 |
documents: [...docs.map(doc => doc.pageContent)],
|
| 46 |
+
});
|
| 47 |
+
const queryResult = await collection.query({
|
| 48 |
+
nResults: 2,
|
| 49 |
+
queryTexts: [userPrompt]
|
| 50 |
+
});
|
| 51 |
+
console.log(queryResult);
|
| 52 |
+
} catch (err) {
|
| 53 |
+
console.log(err);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
|
| 57 |
const qaPrompt =
|
| 58 |
`You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
|
|
|
|
| 61 |
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
|
| 62 |
Question: ${userInput}
|
| 63 |
=========
|
| 64 |
+
${queryResult}
|
| 65 |
=========
|
| 66 |
Answer:
|
| 67 |
`
|
|
|
|
| 96 |
}, [handleSubmit]);
|
| 97 |
|
| 98 |
const loadFile = async () => {
|
| 99 |
+
console.log('file loaded');
|
| 100 |
}
|
| 101 |
|
| 102 |
useEffect(() => {
|
src/components/FileLoader.jsx
CHANGED
|
@@ -5,6 +5,13 @@ import * as PDFJS from 'pdfjs-dist/build/pdf';
|
|
| 5 |
|
| 6 |
PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
export default class Pdf {
|
| 9 |
static async getPageText(pdf, pageNo) {
|
| 10 |
const page = await pdf.getPage(pageNo);
|
|
@@ -43,16 +50,11 @@ export const FileLoader = ({ setFileText }) => {
|
|
| 43 |
const blob = new Blob([file], { type: 'text/plain' });
|
| 44 |
if (file.type === "application/pdf") {
|
| 45 |
text = await Pdf.getPDFText(URL.createObjectURL(blob));
|
| 46 |
-
} else {
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
reader.addEventListener('load', function (e) {
|
| 50 |
-
text = e.target.result;
|
| 51 |
-
});
|
| 52 |
-
|
| 53 |
-
reader.readAsBinaryString(file);
|
| 54 |
}
|
| 55 |
-
|
|
|
|
| 56 |
setUploadStatus("Embed Complete");
|
| 57 |
}
|
| 58 |
}}
|
|
|
|
| 5 |
|
| 6 |
PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
|
| 7 |
|
| 8 |
+
const readFile = (blob) => new Promise((resolve, reject) => {
|
| 9 |
+
const reader = new FileReader();
|
| 10 |
+
reader.onload = (event) => resolve(event.target.result);
|
| 11 |
+
reader.onerror = reject;
|
| 12 |
+
reader.readAsText(blob);
|
| 13 |
+
});
|
| 14 |
+
|
| 15 |
export default class Pdf {
|
| 16 |
static async getPageText(pdf, pageNo) {
|
| 17 |
const page = await pdf.getPage(pageNo);
|
|
|
|
| 50 |
const blob = new Blob([file], { type: 'text/plain' });
|
| 51 |
if (file.type === "application/pdf") {
|
| 52 |
text = await Pdf.getPDFText(URL.createObjectURL(blob));
|
| 53 |
+
} else {
|
| 54 |
+
text = await readFile(file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
+
console.log(`file text: ${text}`);
|
| 57 |
+
setFileText(text);
|
| 58 |
setUploadStatus("Embed Complete");
|
| 59 |
}
|
| 60 |
}}
|