Spaces:
Runtime error
Runtime error
matt HOFFNER
commited on
Commit
Β·
5e14bd6
1
Parent(s):
81c1854
use chromadb to run in browser
Browse files- package-lock.json +2 -3
- package.json +1 -1
- src/pages/api/docHandle.ts +5 -7
- src/utils/file-handler.ts +0 -76
package-lock.json
CHANGED
@@ -15,6 +15,7 @@
|
|
15 |
"@types/react": "18.2.6",
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
|
|
18 |
"dexie": "^3.2.4",
|
19 |
"eslint": "8.40.0",
|
20 |
"eslint-config-next": "13.4.2",
|
@@ -2212,9 +2213,7 @@
|
|
2212 |
"node_modules/chromadb": {
|
2213 |
"version": "1.5.2",
|
2214 |
"resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
|
2215 |
-
"integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ=="
|
2216 |
-
"optional": true,
|
2217 |
-
"peer": true
|
2218 |
},
|
2219 |
"node_modules/client-only": {
|
2220 |
"version": "0.0.1",
|
|
|
15 |
"@types/react": "18.2.6",
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
+
"chromadb": "^1.5.2",
|
19 |
"dexie": "^3.2.4",
|
20 |
"eslint": "8.40.0",
|
21 |
"eslint-config-next": "13.4.2",
|
|
|
2213 |
"node_modules/chromadb": {
|
2214 |
"version": "1.5.2",
|
2215 |
"resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
|
2216 |
+
"integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ=="
|
|
|
|
|
2217 |
},
|
2218 |
"node_modules/client-only": {
|
2219 |
"version": "0.0.1",
|
package.json
CHANGED
@@ -15,11 +15,11 @@
|
|
15 |
"@types/react": "18.2.6",
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
|
|
18 |
"dexie": "^3.2.4",
|
19 |
"eslint": "8.40.0",
|
20 |
"eslint-config-next": "13.4.2",
|
21 |
"fs-extra": "^11.1.1",
|
22 |
-
"hnswlib-node": "^1.4.2",
|
23 |
"langchain": "^0.0.90",
|
24 |
"next": "13.4.2",
|
25 |
"pdfjs-dist": "^3.7.107",
|
|
|
15 |
"@types/react": "18.2.6",
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
+
"chromadb": "^1.5.2",
|
19 |
"dexie": "^3.2.4",
|
20 |
"eslint": "8.40.0",
|
21 |
"eslint-config-next": "13.4.2",
|
22 |
"fs-extra": "^11.1.1",
|
|
|
23 |
"langchain": "^0.0.90",
|
24 |
"next": "13.4.2",
|
25 |
"pdfjs-dist": "^3.7.107",
|
src/pages/api/docHandle.ts
CHANGED
@@ -1,15 +1,14 @@
|
|
1 |
import type { NextApiRequest, NextApiResponse } from 'next';
|
2 |
-
import {
|
3 |
-
vectorStoreToHNSWLibModel,
|
4 |
-
} from '@/utils/file-handler';
|
5 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
6 |
-
import {
|
7 |
import XenovaTransformersEmbeddings from '../../embed/hf'
|
8 |
|
9 |
async function handleDocs(text: string) {
|
10 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
11 |
const docs = await textSplitter.createDocuments([text]);
|
12 |
-
const vectorStore = await
|
|
|
|
|
13 |
return vectorStore;
|
14 |
}
|
15 |
|
@@ -25,9 +24,8 @@ export default async function handler(
|
|
25 |
}
|
26 |
|
27 |
const vectorStore = await handleDocs(text);
|
28 |
-
const model = await vectorStoreToHNSWLibModel(vectorStore);
|
29 |
res.status(200).send({
|
30 |
-
|
31 |
});
|
32 |
}
|
33 |
|
|
|
1 |
import type { NextApiRequest, NextApiResponse } from 'next';
|
|
|
|
|
|
|
2 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
3 |
+
import { Chroma } from "langchain/vectorstores/chroma";
|
4 |
import XenovaTransformersEmbeddings from '../../embed/hf'
|
5 |
|
6 |
async function handleDocs(text: string) {
|
7 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
8 |
const docs = await textSplitter.createDocuments([text]);
|
9 |
+
const vectorStore = await Chroma.fromDocuments(docs, new XenovaTransformersEmbeddings(), {
|
10 |
+
collectionName: 'docs'
|
11 |
+
});
|
12 |
return vectorStore;
|
13 |
}
|
14 |
|
|
|
24 |
}
|
25 |
|
26 |
const vectorStore = await handleDocs(text);
|
|
|
27 |
res.status(200).send({
|
28 |
+
model: vectorStore,
|
29 |
});
|
30 |
}
|
31 |
|
src/utils/file-handler.ts
DELETED
@@ -1,76 +0,0 @@
|
|
1 |
-
import type XenovaTransformersEmbeddings from '@/embed/hf';
|
2 |
-
// import { HuggingFaceInferenceEmbeddings } from 'langchain/embeddings/hf';
|
3 |
-
import fs from 'fs-extra';
|
4 |
-
import {
|
5 |
-
HNSWLib,
|
6 |
-
type HNSWLib as StoreTypeHNSWLib,
|
7 |
-
} from 'langchain/vectorstores/hnswlib';
|
8 |
-
import path from 'path';
|
9 |
-
|
10 |
-
const ifDev = process.env.NODE_ENV === 'development';
|
11 |
-
// in prod mode, only allowed to write to /tmp/
|
12 |
-
// https://vercel.com/guides/how-can-i-use-files-in-serverless-functions
|
13 |
-
export const storesDir = ifDev ? 'tmp/hnswlib-stores' : '/tmp/hnswlib-stores';
|
14 |
-
|
15 |
-
type HNSWLibModel = {
|
16 |
-
args: string;
|
17 |
-
docstore: string;
|
18 |
-
hnswlibIndex: string;
|
19 |
-
};
|
20 |
-
|
21 |
-
const HNSWLibModelFilesName = {
|
22 |
-
args: 'args.json',
|
23 |
-
docstore: 'docstore.json',
|
24 |
-
hnswlibIndex: 'hnswlib.index',
|
25 |
-
};
|
26 |
-
|
27 |
-
// looking forward to a better way to transfrom hnswlibStore <=> indexes
|
28 |
-
export async function HNSWLibModelToVectorStore(
|
29 |
-
model: HNSWLibModel,
|
30 |
-
embeddings: XenovaTransformersEmbeddings,
|
31 |
-
) {
|
32 |
-
await saveHNSWLibModelToLocal(model);
|
33 |
-
// load from dir
|
34 |
-
const vectorStore = await HNSWLib.load(storesDir, embeddings);
|
35 |
-
return vectorStore;
|
36 |
-
}
|
37 |
-
|
38 |
-
export async function saveHNSWLibModelToLocal(model: HNSWLibModel) {
|
39 |
-
// save model to /tmp/
|
40 |
-
await Promise.all(
|
41 |
-
Object.keys(HNSWLibModelFilesName).map((key) => {
|
42 |
-
const fullPath = path.join(
|
43 |
-
storesDir,
|
44 |
-
(HNSWLibModelFilesName as Record<string, string>)[key],
|
45 |
-
);
|
46 |
-
console.log(fullPath);
|
47 |
-
const data = (model as Record<string, string>)[key];
|
48 |
-
console.log(data);
|
49 |
-
|
50 |
-
return fs.writeFile(fullPath, data);
|
51 |
-
}),
|
52 |
-
);
|
53 |
-
}
|
54 |
-
|
55 |
-
export async function vectorStoreToHNSWLibModel(
|
56 |
-
store: StoreTypeHNSWLib,
|
57 |
-
): Promise<HNSWLibModel> {
|
58 |
-
await store.save(storesDir);
|
59 |
-
return await readHNSWLibModelFromLocal();
|
60 |
-
}
|
61 |
-
|
62 |
-
export async function readHNSWLibModelFromLocal(): Promise<HNSWLibModel> {
|
63 |
-
const [args, docstore, hnswlibIndex] = await Promise.all([
|
64 |
-
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.args), 'utf-8'),
|
65 |
-
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.docstore), 'utf-8'),
|
66 |
-
fs.readFile(
|
67 |
-
path.join(storesDir, HNSWLibModelFilesName.hnswlibIndex),
|
68 |
-
'hex',
|
69 |
-
),
|
70 |
-
]);
|
71 |
-
return {
|
72 |
-
args,
|
73 |
-
docstore,
|
74 |
-
hnswlibIndex,
|
75 |
-
};
|
76 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|