matt HOFFNER commited on
Commit
5e14bd6
Β·
1 Parent(s): 81c1854

use chromadb to run in browser

Browse files
package-lock.json CHANGED
@@ -15,6 +15,7 @@
15
  "@types/react": "18.2.6",
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
 
18
  "dexie": "^3.2.4",
19
  "eslint": "8.40.0",
20
  "eslint-config-next": "13.4.2",
@@ -2212,9 +2213,7 @@
2212
  "node_modules/chromadb": {
2213
  "version": "1.5.2",
2214
  "resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
2215
- "integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ==",
2216
- "optional": true,
2217
- "peer": true
2218
  },
2219
  "node_modules/client-only": {
2220
  "version": "0.0.1",
 
15
  "@types/react": "18.2.6",
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
18
+ "chromadb": "^1.5.2",
19
  "dexie": "^3.2.4",
20
  "eslint": "8.40.0",
21
  "eslint-config-next": "13.4.2",
 
2213
  "node_modules/chromadb": {
2214
  "version": "1.5.2",
2215
  "resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
2216
+ "integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ=="
 
 
2217
  },
2218
  "node_modules/client-only": {
2219
  "version": "0.0.1",
package.json CHANGED
@@ -15,11 +15,11 @@
15
  "@types/react": "18.2.6",
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
 
18
  "dexie": "^3.2.4",
19
  "eslint": "8.40.0",
20
  "eslint-config-next": "13.4.2",
21
  "fs-extra": "^11.1.1",
22
- "hnswlib-node": "^1.4.2",
23
  "langchain": "^0.0.90",
24
  "next": "13.4.2",
25
  "pdfjs-dist": "^3.7.107",
 
15
  "@types/react": "18.2.6",
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
18
+ "chromadb": "^1.5.2",
19
  "dexie": "^3.2.4",
20
  "eslint": "8.40.0",
21
  "eslint-config-next": "13.4.2",
22
  "fs-extra": "^11.1.1",
 
23
  "langchain": "^0.0.90",
24
  "next": "13.4.2",
25
  "pdfjs-dist": "^3.7.107",
src/pages/api/docHandle.ts CHANGED
@@ -1,15 +1,14 @@
1
  import type { NextApiRequest, NextApiResponse } from 'next';
2
- import {
3
- vectorStoreToHNSWLibModel,
4
- } from '@/utils/file-handler';
5
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
6
- import { HNSWLib } from 'langchain/vectorstores/hnswlib';
7
  import XenovaTransformersEmbeddings from '../../embed/hf'
8
 
9
  async function handleDocs(text: string) {
10
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
11
  const docs = await textSplitter.createDocuments([text]);
12
- const vectorStore = await HNSWLib.fromDocuments(docs, new XenovaTransformersEmbeddings());
 
 
13
  return vectorStore;
14
  }
15
 
@@ -25,9 +24,8 @@ export default async function handler(
25
  }
26
 
27
  const vectorStore = await handleDocs(text);
28
- const model = await vectorStoreToHNSWLibModel(vectorStore);
29
  res.status(200).send({
30
- ...model,
31
  });
32
  }
33
 
 
1
  import type { NextApiRequest, NextApiResponse } from 'next';
 
 
 
2
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
3
+ import { Chroma } from "langchain/vectorstores/chroma";
4
  import XenovaTransformersEmbeddings from '../../embed/hf'
5
 
6
  async function handleDocs(text: string) {
7
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
8
  const docs = await textSplitter.createDocuments([text]);
9
+ const vectorStore = await Chroma.fromDocuments(docs, new XenovaTransformersEmbeddings(), {
10
+ collectionName: 'docs'
11
+ });
12
  return vectorStore;
13
  }
14
 
 
24
  }
25
 
26
  const vectorStore = await handleDocs(text);
 
27
  res.status(200).send({
28
+ model: vectorStore,
29
  });
30
  }
31
 
src/utils/file-handler.ts DELETED
@@ -1,76 +0,0 @@
1
- import type XenovaTransformersEmbeddings from '@/embed/hf';
2
- // import { HuggingFaceInferenceEmbeddings } from 'langchain/embeddings/hf';
3
- import fs from 'fs-extra';
4
- import {
5
- HNSWLib,
6
- type HNSWLib as StoreTypeHNSWLib,
7
- } from 'langchain/vectorstores/hnswlib';
8
- import path from 'path';
9
-
10
- const ifDev = process.env.NODE_ENV === 'development';
11
- // in prod mode, only allowed to write to /tmp/
12
- // https://vercel.com/guides/how-can-i-use-files-in-serverless-functions
13
- export const storesDir = ifDev ? 'tmp/hnswlib-stores' : '/tmp/hnswlib-stores';
14
-
15
- type HNSWLibModel = {
16
- args: string;
17
- docstore: string;
18
- hnswlibIndex: string;
19
- };
20
-
21
- const HNSWLibModelFilesName = {
22
- args: 'args.json',
23
- docstore: 'docstore.json',
24
- hnswlibIndex: 'hnswlib.index',
25
- };
26
-
27
- // looking forward to a better way to transfrom hnswlibStore <=> indexes
28
- export async function HNSWLibModelToVectorStore(
29
- model: HNSWLibModel,
30
- embeddings: XenovaTransformersEmbeddings,
31
- ) {
32
- await saveHNSWLibModelToLocal(model);
33
- // load from dir
34
- const vectorStore = await HNSWLib.load(storesDir, embeddings);
35
- return vectorStore;
36
- }
37
-
38
- export async function saveHNSWLibModelToLocal(model: HNSWLibModel) {
39
- // save model to /tmp/
40
- await Promise.all(
41
- Object.keys(HNSWLibModelFilesName).map((key) => {
42
- const fullPath = path.join(
43
- storesDir,
44
- (HNSWLibModelFilesName as Record<string, string>)[key],
45
- );
46
- console.log(fullPath);
47
- const data = (model as Record<string, string>)[key];
48
- console.log(data);
49
-
50
- return fs.writeFile(fullPath, data);
51
- }),
52
- );
53
- }
54
-
55
- export async function vectorStoreToHNSWLibModel(
56
- store: StoreTypeHNSWLib,
57
- ): Promise<HNSWLibModel> {
58
- await store.save(storesDir);
59
- return await readHNSWLibModelFromLocal();
60
- }
61
-
62
- export async function readHNSWLibModelFromLocal(): Promise<HNSWLibModel> {
63
- const [args, docstore, hnswlibIndex] = await Promise.all([
64
- fs.readFile(path.join(storesDir, HNSWLibModelFilesName.args), 'utf-8'),
65
- fs.readFile(path.join(storesDir, HNSWLibModelFilesName.docstore), 'utf-8'),
66
- fs.readFile(
67
- path.join(storesDir, HNSWLibModelFilesName.hnswlibIndex),
68
- 'hex',
69
- ),
70
- ]);
71
- return {
72
- args,
73
- docstore,
74
- hnswlibIndex,
75
- };
76
- }