Spaces:
Running
Running
matt HOFFNER
commited on
Commit
β’
73a1dae
1
Parent(s):
d072c37
add docHandle
Browse files- package-lock.json +38 -4
- package.json +2 -0
- src/pages/api/docHandle.ts +57 -0
- src/utils/file-handler.ts +75 -0
- src/utils/index.ts +1 -0
package-lock.json
CHANGED
@@ -17,6 +17,7 @@
|
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"eslint": "8.40.0",
|
19 |
"eslint-config-next": "13.4.2",
|
|
|
20 |
"langchain": "^0.0.90",
|
21 |
"next": "13.4.2",
|
22 |
"react": "18.2.0",
|
@@ -25,6 +26,7 @@
|
|
25 |
"uuid": "^9.0.0"
|
26 |
},
|
27 |
"devDependencies": {
|
|
|
28 |
"@types/uuid": "^9.0.1"
|
29 |
}
|
30 |
},
|
@@ -1354,11 +1356,30 @@
|
|
1354 |
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.1.tgz",
|
1355 |
"integrity": "sha512-LG4opVs2ANWZ1TJoKc937iMmNstM/d0ae1vNbnBvBhqCSezgVUOzcLCqbI5elV8Vy6WKwKjaqR+zO9VKirBBCA=="
|
1356 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1357 |
"node_modules/@types/json5": {
|
1358 |
"version": "0.0.29",
|
1359 |
"resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
|
1360 |
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
|
1361 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1362 |
"node_modules/@types/long": {
|
1363 |
"version": "4.0.2",
|
1364 |
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
@@ -3144,16 +3165,16 @@
|
|
3144 |
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
|
3145 |
},
|
3146 |
"node_modules/fs-extra": {
|
3147 |
-
"version": "
|
3148 |
-
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-
|
3149 |
-
"integrity": "sha512-
|
3150 |
"dependencies": {
|
3151 |
"graceful-fs": "^4.2.0",
|
3152 |
"jsonfile": "^6.0.1",
|
3153 |
"universalify": "^2.0.0"
|
3154 |
},
|
3155 |
"engines": {
|
3156 |
-
"node": ">=
|
3157 |
}
|
3158 |
},
|
3159 |
"node_modules/fs.realpath": {
|
@@ -5422,6 +5443,19 @@
|
|
5422 |
"node": ">= 8.0.0"
|
5423 |
}
|
5424 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5425 |
"node_modules/run-applescript": {
|
5426 |
"version": "5.0.0",
|
5427 |
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-5.0.0.tgz",
|
|
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"eslint": "8.40.0",
|
19 |
"eslint-config-next": "13.4.2",
|
20 |
+
"fs-extra": "^11.1.1",
|
21 |
"langchain": "^0.0.90",
|
22 |
"next": "13.4.2",
|
23 |
"react": "18.2.0",
|
|
|
26 |
"uuid": "^9.0.0"
|
27 |
},
|
28 |
"devDependencies": {
|
29 |
+
"@types/fs-extra": "^11.0.1",
|
30 |
"@types/uuid": "^9.0.1"
|
31 |
}
|
32 |
},
|
|
|
1356 |
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.1.tgz",
|
1357 |
"integrity": "sha512-LG4opVs2ANWZ1TJoKc937iMmNstM/d0ae1vNbnBvBhqCSezgVUOzcLCqbI5elV8Vy6WKwKjaqR+zO9VKirBBCA=="
|
1358 |
},
|
1359 |
+
"node_modules/@types/fs-extra": {
|
1360 |
+
"version": "11.0.1",
|
1361 |
+
"resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.1.tgz",
|
1362 |
+
"integrity": "sha512-MxObHvNl4A69ofaTRU8DFqvgzzv8s9yRtaPPm5gud9HDNvpB3GPQFvNuTWAI59B9huVGV5jXYJwbCsmBsOGYWA==",
|
1363 |
+
"dev": true,
|
1364 |
+
"dependencies": {
|
1365 |
+
"@types/jsonfile": "*",
|
1366 |
+
"@types/node": "*"
|
1367 |
+
}
|
1368 |
+
},
|
1369 |
"node_modules/@types/json5": {
|
1370 |
"version": "0.0.29",
|
1371 |
"resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
|
1372 |
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
|
1373 |
},
|
1374 |
+
"node_modules/@types/jsonfile": {
|
1375 |
+
"version": "6.1.1",
|
1376 |
+
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz",
|
1377 |
+
"integrity": "sha512-GSgiRCVeapDN+3pqA35IkQwasaCh/0YFH5dEF6S88iDvEn901DjOeH3/QPY+XYP1DFzDZPvIvfeEgk+7br5png==",
|
1378 |
+
"dev": true,
|
1379 |
+
"dependencies": {
|
1380 |
+
"@types/node": "*"
|
1381 |
+
}
|
1382 |
+
},
|
1383 |
"node_modules/@types/long": {
|
1384 |
"version": "4.0.2",
|
1385 |
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
|
|
3165 |
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
|
3166 |
},
|
3167 |
"node_modules/fs-extra": {
|
3168 |
+
"version": "11.1.1",
|
3169 |
+
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz",
|
3170 |
+
"integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==",
|
3171 |
"dependencies": {
|
3172 |
"graceful-fs": "^4.2.0",
|
3173 |
"jsonfile": "^6.0.1",
|
3174 |
"universalify": "^2.0.0"
|
3175 |
},
|
3176 |
"engines": {
|
3177 |
+
"node": ">=14.14"
|
3178 |
}
|
3179 |
},
|
3180 |
"node_modules/fs.realpath": {
|
|
|
5443 |
"node": ">= 8.0.0"
|
5444 |
}
|
5445 |
},
|
5446 |
+
"node_modules/rollup-plugin-typescript2/node_modules/fs-extra": {
|
5447 |
+
"version": "10.1.0",
|
5448 |
+
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
|
5449 |
+
"integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
|
5450 |
+
"dependencies": {
|
5451 |
+
"graceful-fs": "^4.2.0",
|
5452 |
+
"jsonfile": "^6.0.1",
|
5453 |
+
"universalify": "^2.0.0"
|
5454 |
+
},
|
5455 |
+
"engines": {
|
5456 |
+
"node": ">=12"
|
5457 |
+
}
|
5458 |
+
},
|
5459 |
"node_modules/run-applescript": {
|
5460 |
"version": "5.0.0",
|
5461 |
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-5.0.0.tgz",
|
package.json
CHANGED
@@ -17,6 +17,7 @@
|
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"eslint": "8.40.0",
|
19 |
"eslint-config-next": "13.4.2",
|
|
|
20 |
"langchain": "^0.0.90",
|
21 |
"next": "13.4.2",
|
22 |
"react": "18.2.0",
|
@@ -25,6 +26,7 @@
|
|
25 |
"uuid": "^9.0.0"
|
26 |
},
|
27 |
"devDependencies": {
|
|
|
28 |
"@types/uuid": "^9.0.1"
|
29 |
}
|
30 |
}
|
|
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"eslint": "8.40.0",
|
19 |
"eslint-config-next": "13.4.2",
|
20 |
+
"fs-extra": "^11.1.1",
|
21 |
"langchain": "^0.0.90",
|
22 |
"next": "13.4.2",
|
23 |
"react": "18.2.0",
|
|
|
26 |
"uuid": "^9.0.0"
|
27 |
},
|
28 |
"devDependencies": {
|
29 |
+
"@types/fs-extra": "^11.0.1",
|
30 |
"@types/uuid": "^9.0.1"
|
31 |
}
|
32 |
}
|
src/pages/api/docHandle.ts
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { NextApiRequest, NextApiResponse } from 'next';
|
2 |
+
|
3 |
+
import {
|
4 |
+
readHNSWLibModelFromLocal,
|
5 |
+
storesDir,
|
6 |
+
vectorStoreToHNSWLibModel,
|
7 |
+
} from '@/utils/file-handler';
|
8 |
+
import fs from 'fs-extra';
|
9 |
+
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
10 |
+
import { HNSWLib } from 'langchain/vectorstores/hnswlib';
|
11 |
+
import { XenovaTransformersEmbeddings } from '../../embed/hf'
|
12 |
+
|
13 |
+
async function handleDocs(text: string) {
|
14 |
+
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
15 |
+
const docs = await textSplitter.createDocuments([text]);
|
16 |
+
console.log(docs);
|
17 |
+
|
18 |
+
const vectorStore = await HNSWLib.fromDocuments(docs, new XenovaTransformersEmbeddings());
|
19 |
+
console.log(vectorStore);
|
20 |
+
|
21 |
+
return vectorStore;
|
22 |
+
}
|
23 |
+
|
24 |
+
export default async function handler(
|
25 |
+
req: NextApiRequest,
|
26 |
+
res: NextApiResponse,
|
27 |
+
) {
|
28 |
+
const { text } = JSON.parse(req.body);
|
29 |
+
// console.log(text);
|
30 |
+
|
31 |
+
if (!text) {
|
32 |
+
return res.status(400).json({ message: 'No question in the request' });
|
33 |
+
}
|
34 |
+
|
35 |
+
const exists = await fs.exists(storesDir);
|
36 |
+
console.log(exists);
|
37 |
+
|
38 |
+
if (exists) {
|
39 |
+
console.log('read from ' + storesDir);
|
40 |
+
const model = await readHNSWLibModelFromLocal();
|
41 |
+
return res.status(200).send({
|
42 |
+
...model,
|
43 |
+
});
|
44 |
+
}
|
45 |
+
|
46 |
+
const vectorStore = await handleDocs(text);
|
47 |
+
const model = await vectorStoreToHNSWLibModel(vectorStore);
|
48 |
+
res.status(200).send({
|
49 |
+
...model,
|
50 |
+
});
|
51 |
+
}
|
52 |
+
|
53 |
+
export const config = {
|
54 |
+
api: {
|
55 |
+
bodyParser: true, // Disallow body parsing, consume as stream
|
56 |
+
},
|
57 |
+
};
|
src/utils/file-handler.ts
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fs from 'fs-extra';
|
2 |
+
import type { OpenAIEmbeddings } from 'langchain/embeddings/openai';
|
3 |
+
import {
|
4 |
+
HNSWLib,
|
5 |
+
type HNSWLib as StoreTypeHNSWLib,
|
6 |
+
} from 'langchain/vectorstores/hnswlib';
|
7 |
+
import path from 'path';
|
8 |
+
|
9 |
+
const ifDev = process.env.NODE_ENV === 'development';
|
10 |
+
// in prod mode, only allowed to write to /tmp/
|
11 |
+
// https://vercel.com/guides/how-can-i-use-files-in-serverless-functions
|
12 |
+
export const storesDir = ifDev ? 'tmp/hnswlib-stores' : '/tmp/hnswlib-stores';
|
13 |
+
|
14 |
+
type HNSWLibModel = {
|
15 |
+
args: string;
|
16 |
+
docstore: string;
|
17 |
+
hnswlibIndex: string;
|
18 |
+
};
|
19 |
+
|
20 |
+
const HNSWLibModelFilesName = {
|
21 |
+
args: 'args.json',
|
22 |
+
docstore: 'docstore.json',
|
23 |
+
hnswlibIndex: 'hnswlib.index',
|
24 |
+
};
|
25 |
+
|
26 |
+
// looking forward to a better way to transfrom hnswlibStore <=> indexes
|
27 |
+
export async function HNSWLibModelToVectorStore(
|
28 |
+
model: HNSWLibModel,
|
29 |
+
embeddings: OpenAIEmbeddings,
|
30 |
+
) {
|
31 |
+
await saveHNSWLibModelToLocal(model);
|
32 |
+
// load from dir
|
33 |
+
const vectorStore = await HNSWLib.load(storesDir, embeddings);
|
34 |
+
return vectorStore;
|
35 |
+
}
|
36 |
+
|
37 |
+
export async function saveHNSWLibModelToLocal(model: HNSWLibModel) {
|
38 |
+
// save model to /tmp/
|
39 |
+
await Promise.all(
|
40 |
+
Object.keys(HNSWLibModelFilesName).map((key) => {
|
41 |
+
const fullPath = path.join(
|
42 |
+
storesDir,
|
43 |
+
(HNSWLibModelFilesName as Record<string, string>)[key],
|
44 |
+
);
|
45 |
+
console.log(fullPath);
|
46 |
+
const data = (model as Record<string, string>)[key];
|
47 |
+
console.log(data);
|
48 |
+
|
49 |
+
return fs.writeFile(fullPath, data);
|
50 |
+
}),
|
51 |
+
);
|
52 |
+
}
|
53 |
+
|
54 |
+
export async function vectorStoreToHNSWLibModel(
|
55 |
+
store: StoreTypeHNSWLib,
|
56 |
+
): Promise<HNSWLibModel> {
|
57 |
+
await store.save(storesDir);
|
58 |
+
return await readHNSWLibModelFromLocal();
|
59 |
+
}
|
60 |
+
|
61 |
+
export async function readHNSWLibModelFromLocal(): Promise<HNSWLibModel> {
|
62 |
+
const [args, docstore, hnswlibIndex] = await Promise.all([
|
63 |
+
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.args), 'utf-8'),
|
64 |
+
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.docstore), 'utf-8'),
|
65 |
+
fs.readFile(
|
66 |
+
path.join(storesDir, HNSWLibModelFilesName.hnswlibIndex),
|
67 |
+
'hex',
|
68 |
+
),
|
69 |
+
]);
|
70 |
+
return {
|
71 |
+
args,
|
72 |
+
docstore,
|
73 |
+
hnswlibIndex,
|
74 |
+
};
|
75 |
+
}
|
src/utils/index.ts
CHANGED
@@ -89,3 +89,4 @@ export function throttle<T extends (...args: any[]) => any>(
|
|
89 |
|
90 |
export const DEFAULT_TEMPERATURE =
|
91 |
parseFloat(process.env.NEXT_PUBLIC_DEFAULT_TEMPERATURE || "1");
|
|
|
|
89 |
|
90 |
export const DEFAULT_TEMPERATURE =
|
91 |
parseFloat(process.env.NEXT_PUBLIC_DEFAULT_TEMPERATURE || "1");
|
92 |
+
|