Spaces:
Running
Running
matt HOFFNER
commited on
Commit
β’
a73e8b4
1
Parent(s):
da2af98
refactor
Browse files- app/api/chat/route.ts +4 -1
- app/tools/odds.ts +48 -0
- package-lock.json +47 -0
- package.json +1 -0
- pages/api/functions/embed.ts +42 -0
- pages/api/functions/index.ts +6 -97
- pages/api/functions/utils.ts +40 -0
- pages/api/functions/vector-store.ts +23 -0
app/api/chat/route.ts
CHANGED
@@ -2,9 +2,11 @@ import { Configuration, OpenAIApi } from "openai-edge";
|
|
2 |
import { OpenAIStream, StreamingTextResponse } from "ai";
|
3 |
import { createUrlSurfer } from "@/app/tools/surfer";
|
4 |
import { createSearchApi } from "@/app/tools/search";
|
|
|
5 |
|
6 |
const [, urlSurferSchema] = createUrlSurfer();
|
7 |
const [, serpApiSchema] = createSearchApi({ apiKey: process.env.SERP_API_KEY || '' });
|
|
|
8 |
|
9 |
const config = new Configuration({
|
10 |
apiKey: process.env.OPENAI_API_KEY,
|
@@ -13,7 +15,8 @@ const openai = new OpenAIApi(config);
|
|
13 |
|
14 |
const functions: any[] = [
|
15 |
urlSurferSchema,
|
16 |
-
serpApiSchema
|
|
|
17 |
];
|
18 |
|
19 |
export async function POST(req: Request) {
|
|
|
2 |
import { OpenAIStream, StreamingTextResponse } from "ai";
|
3 |
import { createUrlSurfer } from "@/app/tools/surfer";
|
4 |
import { createSearchApi } from "@/app/tools/search";
|
5 |
+
import { createOddsApi } from "@/app/tools/odds";
|
6 |
|
7 |
const [, urlSurferSchema] = createUrlSurfer();
|
8 |
const [, serpApiSchema] = createSearchApi({ apiKey: process.env.SERP_API_KEY || '' });
|
9 |
+
const [, oddsApiSchema] = createOddsApi({ apiKey: process.env.ODDS_API_KEY || '' });
|
10 |
|
11 |
const config = new Configuration({
|
12 |
apiKey: process.env.OPENAI_API_KEY,
|
|
|
15 |
|
16 |
const functions: any[] = [
|
17 |
urlSurferSchema,
|
18 |
+
serpApiSchema,
|
19 |
+
oddsApiSchema
|
20 |
];
|
21 |
|
22 |
export async function POST(req: Request) {
|
app/tools/odds.ts
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Tool } from 'openai-function-calling-tools';
|
2 |
+
import { z } from 'zod';
|
3 |
+
|
4 |
+
function createOddsApi({ apiKey }: { apiKey: string }) {
|
5 |
+
const paramsSchema = z.object({
|
6 |
+
input: z.string(),
|
7 |
+
});
|
8 |
+
const name = 'oddsApi';
|
9 |
+
const description = 'A realtime Sports Odds API. Useful for when you need to answer questions about sports odds, currently NBA and NFL. Input should be a sport and a corresponding game. Outputs a JSON array of results.';
|
10 |
+
|
11 |
+
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
12 |
+
try {
|
13 |
+
const oddsFormat = 'american';
|
14 |
+
const dateFormat = 'iso';
|
15 |
+
const regions = 'us';
|
16 |
+
let sportKey;
|
17 |
+
|
18 |
+
let market;
|
19 |
+
|
20 |
+
// americanfootball_nfl_super_bowl_winner
|
21 |
+
if (input.includes('nba')) {
|
22 |
+
sportKey = 'basketball_nba';
|
23 |
+
} else if (input.includes('nfl')) {
|
24 |
+
sportKey = 'americanfootball_nfl';
|
25 |
+
} else {
|
26 |
+
sportKey = 'upcoming';
|
27 |
+
}
|
28 |
+
|
29 |
+
if (input.includes('spread')) {
|
30 |
+
market = 'spread';
|
31 |
+
} else if (input.includes('o/u')) {
|
32 |
+
market = 'totals';
|
33 |
+
} else {
|
34 |
+
market = 'h2h';
|
35 |
+
}
|
36 |
+
const activeSports = await fetch(`https://api.the-odds-api.com/v4/sports/${sportKey}/odds?apiKey=${apiKey}&oddsFormat=${oddsFormat}&dateFormat=${dateFormat}&market=${market}®ions=${regions}`);
|
37 |
+
const oddsResponse = await activeSports.json();
|
38 |
+
console.log(oddsResponse);
|
39 |
+
return JSON.stringify(oddsResponse);
|
40 |
+
} catch (error) {
|
41 |
+
throw new Error(`Error in oddsApi: ${error}`);
|
42 |
+
}
|
43 |
+
};
|
44 |
+
|
45 |
+
return new Tool(paramsSchema, name, description, execute).tool;
|
46 |
+
}
|
47 |
+
|
48 |
+
export { createOddsApi };
|
package-lock.json
CHANGED
@@ -30,6 +30,7 @@
|
|
30 |
"react-markdown": "^9.0.0",
|
31 |
"serpapi": "^2.0.0",
|
32 |
"sonner": "^1.1.0",
|
|
|
33 |
"zod": "^3.22.4",
|
34 |
"zod-to-json-schema": "^3.21.4"
|
35 |
},
|
@@ -7792,6 +7793,52 @@
|
|
7792 |
"integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
|
7793 |
"dev": true
|
7794 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7795 |
"node_modules/through": {
|
7796 |
"version": "2.3.8",
|
7797 |
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
|
|
30 |
"react-markdown": "^9.0.0",
|
31 |
"serpapi": "^2.0.0",
|
32 |
"sonner": "^1.1.0",
|
33 |
+
"the-odds-api": "^2.1.0",
|
34 |
"zod": "^3.22.4",
|
35 |
"zod-to-json-schema": "^3.21.4"
|
36 |
},
|
|
|
7793 |
"integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
|
7794 |
"dev": true
|
7795 |
},
|
7796 |
+
"node_modules/the-odds-api": {
|
7797 |
+
"version": "2.1.0",
|
7798 |
+
"resolved": "https://registry.npmjs.org/the-odds-api/-/the-odds-api-2.1.0.tgz",
|
7799 |
+
"integrity": "sha512-Xil75sSw/WJSD4Af5314AQKw7KFNIEiU9NQxVRYbouDzUmCy2HbaL/6PLRP3ExqYx6xp/3D3vL7rmHHXqHYLPw==",
|
7800 |
+
"dependencies": {
|
7801 |
+
"node-fetch": "^2.3.0"
|
7802 |
+
}
|
7803 |
+
},
|
7804 |
+
"node_modules/the-odds-api/node_modules/node-fetch": {
|
7805 |
+
"version": "2.7.0",
|
7806 |
+
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
7807 |
+
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
7808 |
+
"dependencies": {
|
7809 |
+
"whatwg-url": "^5.0.0"
|
7810 |
+
},
|
7811 |
+
"engines": {
|
7812 |
+
"node": "4.x || >=6.0.0"
|
7813 |
+
},
|
7814 |
+
"peerDependencies": {
|
7815 |
+
"encoding": "^0.1.0"
|
7816 |
+
},
|
7817 |
+
"peerDependenciesMeta": {
|
7818 |
+
"encoding": {
|
7819 |
+
"optional": true
|
7820 |
+
}
|
7821 |
+
}
|
7822 |
+
},
|
7823 |
+
"node_modules/the-odds-api/node_modules/tr46": {
|
7824 |
+
"version": "0.0.3",
|
7825 |
+
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
7826 |
+
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
|
7827 |
+
},
|
7828 |
+
"node_modules/the-odds-api/node_modules/webidl-conversions": {
|
7829 |
+
"version": "3.0.1",
|
7830 |
+
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
7831 |
+
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
|
7832 |
+
},
|
7833 |
+
"node_modules/the-odds-api/node_modules/whatwg-url": {
|
7834 |
+
"version": "5.0.0",
|
7835 |
+
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
7836 |
+
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
7837 |
+
"dependencies": {
|
7838 |
+
"tr46": "~0.0.3",
|
7839 |
+
"webidl-conversions": "^3.0.0"
|
7840 |
+
}
|
7841 |
+
},
|
7842 |
"node_modules/through": {
|
7843 |
"version": "2.3.8",
|
7844 |
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
package.json
CHANGED
@@ -31,6 +31,7 @@
|
|
31 |
"react-markdown": "^9.0.0",
|
32 |
"serpapi": "^2.0.0",
|
33 |
"sonner": "^1.1.0",
|
|
|
34 |
"zod": "^3.22.4",
|
35 |
"zod-to-json-schema": "^3.21.4"
|
36 |
},
|
|
|
31 |
"react-markdown": "^9.0.0",
|
32 |
"serpapi": "^2.0.0",
|
33 |
"sonner": "^1.1.0",
|
34 |
+
"the-odds-api": "^2.1.0",
|
35 |
"zod": "^3.22.4",
|
36 |
"zod-to-json-schema": "^3.21.4"
|
37 |
},
|
pages/api/functions/embed.ts
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { createSearchApi } from '../../../app/tools/search';
|
2 |
+
import { createOddsApi } from '@/app/tools/odds';
|
3 |
+
import { handleContentText } from './utils';
|
4 |
+
import { similaritySearch } from './vector-store';
|
5 |
+
|
6 |
+
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
7 |
+
|
8 |
+
const [serpApi] =
|
9 |
+
createSearchApi({
|
10 |
+
apiKey: process.env.SERP_API_KEY || "",
|
11 |
+
});
|
12 |
+
|
13 |
+
const [oddsApi] = createOddsApi({ apiKey: process.env.ODDS_API_KEY || "" });
|
14 |
+
type FunctionOutput = any;
|
15 |
+
type FunctionInput = any;
|
16 |
+
|
17 |
+
export const odds: FunctionOutput = async ({ input }: FunctionInput) => {
|
18 |
+
const content = await oddsApi({input});
|
19 |
+
const oddsApiResults = await similaritySearch(input, content);
|
20 |
+
return oddsApiResults;
|
21 |
+
}
|
22 |
+
|
23 |
+
export const surfer: FunctionOutput = async ({ input }: FunctionInput) => {
|
24 |
+
const urls = input.match(urlRegex);
|
25 |
+
const targetUrl = urls ? urls[0] : null;
|
26 |
+
const promptWithoutUrl = urls ? input.replace(urlRegex, '').trim() : input;
|
27 |
+
|
28 |
+
const content: string = await handleContentText(targetUrl)
|
29 |
+
if (!content) {
|
30 |
+
return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`;
|
31 |
+
}
|
32 |
+
|
33 |
+
const surferApiResults = await similaritySearch(promptWithoutUrl, content);
|
34 |
+
return surferApiResults;
|
35 |
+
}
|
36 |
+
|
37 |
+
export const serp: FunctionOutput = async ({ input }: FunctionInput) => {
|
38 |
+
const content: string = await serpApi({input})
|
39 |
+
const serpApiResults = await similaritySearch(input, content);
|
40 |
+
return serpApiResults;
|
41 |
+
}
|
42 |
+
|
pages/api/functions/index.ts
CHANGED
@@ -1,13 +1,5 @@
|
|
1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
2 |
-
import
|
3 |
-
import { JSDOM } from 'jsdom';
|
4 |
-
// @ts-ignore
|
5 |
-
import pdfParse from 'pdf-parse';
|
6 |
-
import puppeteer from 'puppeteer';
|
7 |
-
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
8 |
-
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
9 |
-
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
10 |
-
import { createSearchApi } from '../../../app/tools/search'
|
11 |
|
12 |
export const config = {
|
13 |
api: {
|
@@ -17,95 +9,12 @@ export const config = {
|
|
17 |
},
|
18 |
};
|
19 |
|
20 |
-
|
21 |
-
const VECTOR_STORE_SIZE = 10;
|
22 |
-
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: DEFAULT_CHUNK_SIZE });
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
const model = new HuggingFaceTransformersEmbeddings({
|
30 |
-
modelName: "Xenova/all-MiniLM-L6-v2",
|
31 |
-
});
|
32 |
-
|
33 |
-
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
34 |
-
|
35 |
-
const [serpApi] =
|
36 |
-
createSearchApi({
|
37 |
-
apiKey: process.env.SERP_API_KEY || "",
|
38 |
-
});
|
39 |
-
|
40 |
-
const handleContentText = async (targetUrl: string) => {
|
41 |
-
const response = await fetch(targetUrl);
|
42 |
-
const status = response.status;
|
43 |
-
const contentType = response.headers.get('content-type') || '';
|
44 |
-
let content;
|
45 |
-
|
46 |
-
if (status >= 400) {
|
47 |
-
// If status is 400 or greater, try using puppeteer
|
48 |
-
const browser = await puppeteer.launch();
|
49 |
-
const page = await browser.newPage();
|
50 |
-
await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
|
51 |
-
content = await page.evaluate(() => document.body.innerText);
|
52 |
-
await browser.close();
|
53 |
-
return content;
|
54 |
-
} else if (contentType.includes('application/pdf')) {
|
55 |
-
const buffer = await response.arrayBuffer();
|
56 |
-
content = await extractTextFromPDF(buffer as any);
|
57 |
-
} else if (contentType.includes('text/html')) {
|
58 |
-
const html = await response.text();
|
59 |
-
const dom = new JSDOM(html);
|
60 |
-
const scripts = dom.window.document.querySelectorAll('script, style');
|
61 |
-
scripts.forEach(element => element.remove());
|
62 |
-
content = dom.window.document.body.textContent || '';
|
63 |
-
} else {
|
64 |
-
content = await response.text();
|
65 |
-
}
|
66 |
-
return content.trim();
|
67 |
-
}
|
68 |
-
|
69 |
-
|
70 |
-
const surferEmbedApi = async ({ input }: any) => {
|
71 |
-
const urls = input.match(urlRegex);
|
72 |
-
const targetUrl = urls ? urls[0] : null;
|
73 |
-
const promptWithoutUrl = urls ? input.replace(urlRegex, '').trim() : input;
|
74 |
-
|
75 |
-
const content: string = await handleContentText(targetUrl)
|
76 |
-
if (!content) {
|
77 |
-
return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`;
|
78 |
-
}
|
79 |
-
|
80 |
-
const documents = await textSplitter.createDocuments([content]);
|
81 |
-
|
82 |
-
const vectorStore = await MemoryVectorStore.fromTexts(
|
83 |
-
// @ts-ignore
|
84 |
-
[...documents.map(doc => doc.pageContent)],
|
85 |
-
// @ts-ignore
|
86 |
-
[...documents.map((v, k) => k)],
|
87 |
-
model
|
88 |
-
)
|
89 |
-
const queryResult = await vectorStore.similaritySearch(promptWithoutUrl, VECTOR_STORE_SIZE);
|
90 |
-
return `Here is the context: ${JSON.stringify(queryResult.map(result => result.pageContent))} from using the prompt to lookup relevant information. Here is the prompt: ${promptWithoutUrl}`;
|
91 |
-
}
|
92 |
-
|
93 |
-
const serpEmbedApi = async ({ input }: any) => {
|
94 |
-
const content: string = await serpApi({input})
|
95 |
-
const documents = await textSplitter.createDocuments([content]);
|
96 |
-
const vectorStore = await MemoryVectorStore.fromTexts(
|
97 |
-
// @ts-ignore
|
98 |
-
[...documents.map(doc => doc.pageContent)],
|
99 |
-
// @ts-ignore
|
100 |
-
[...documents.map((v, k) => k)],
|
101 |
-
model
|
102 |
-
)
|
103 |
-
const queryResult = await vectorStore.similaritySearch(input, VECTOR_STORE_SIZE);
|
104 |
-
return queryResult;
|
105 |
-
}
|
106 |
-
const handlers: any = {
|
107 |
-
'searchApi': serpEmbedApi,
|
108 |
-
'surfer': surferEmbedApi
|
109 |
};
|
110 |
|
111 |
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
|
|
1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
2 |
+
import { odds, serp, surfer } from './embed';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
export const config = {
|
5 |
api: {
|
|
|
9 |
},
|
10 |
};
|
11 |
|
12 |
+
type FunctionHandler = any;
|
|
|
|
|
13 |
|
14 |
+
const handlers: FunctionHandler = {
|
15 |
+
'searchApi': serp,
|
16 |
+
'surfer': surfer,
|
17 |
+
'oddsApi': odds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
};
|
19 |
|
20 |
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
pages/api/functions/utils.ts
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fetch from 'node-fetch';
|
2 |
+
import { JSDOM } from 'jsdom';
|
3 |
+
import pdfParse from 'pdf-parse';
|
4 |
+
import puppeteer from 'puppeteer';
|
5 |
+
|
6 |
+
|
7 |
+
export const extractTextFromPDF = async (buffer: Buffer): Promise<string> => {
|
8 |
+
const data = await pdfParse(buffer);
|
9 |
+
return data.text;
|
10 |
+
}
|
11 |
+
|
12 |
+
export const handleContentText = async (targetUrl: string) => {
|
13 |
+
const response = await fetch(targetUrl);
|
14 |
+
const status = response.status;
|
15 |
+
const contentType = response.headers.get('content-type') || '';
|
16 |
+
let content;
|
17 |
+
|
18 |
+
if (status >= 400) {
|
19 |
+
// If status is 400 or greater, try using puppeteer
|
20 |
+
const browser = await puppeteer.launch();
|
21 |
+
const page = await browser.newPage();
|
22 |
+
await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
|
23 |
+
content = await page.evaluate(() => document.body.innerText);
|
24 |
+
await browser.close();
|
25 |
+
return content;
|
26 |
+
} else if (contentType.includes('application/pdf')) {
|
27 |
+
const buffer = await response.arrayBuffer();
|
28 |
+
content = await extractTextFromPDF(buffer as any);
|
29 |
+
} else if (contentType.includes('text/html')) {
|
30 |
+
const html = await response.text();
|
31 |
+
const dom = new JSDOM(html);
|
32 |
+
const scripts = dom.window.document.querySelectorAll('script, style');
|
33 |
+
scripts.forEach(element => element.remove());
|
34 |
+
content = dom.window.document.body.textContent || '';
|
35 |
+
} else {
|
36 |
+
content = await response.text();
|
37 |
+
}
|
38 |
+
return content.trim();
|
39 |
+
}
|
40 |
+
|
pages/api/functions/vector-store.ts
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
2 |
+
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
3 |
+
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
4 |
+
|
5 |
+
const DEFAULT_CHUNK_SIZE = 1000;
|
6 |
+
const VECTOR_STORE_SIZE = 3;
|
7 |
+
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: DEFAULT_CHUNK_SIZE });
|
8 |
+
|
9 |
+
const model = new HuggingFaceTransformersEmbeddings({
|
10 |
+
modelName: "Xenova/all-MiniLM-L6-v2",
|
11 |
+
});
|
12 |
+
|
13 |
+
export const similaritySearch = async (input: string, content: string) => {
|
14 |
+
const documents = await textSplitter.createDocuments([content]);
|
15 |
+
|
16 |
+
const vectorStore = await MemoryVectorStore.fromTexts(
|
17 |
+
[...documents.map(doc => doc.pageContent)],
|
18 |
+
[...documents.map((v, k) => k)],
|
19 |
+
model
|
20 |
+
)
|
21 |
+
const queryResult = await vectorStore.similaritySearch(input, VECTOR_STORE_SIZE);
|
22 |
+
return queryResult;
|
23 |
+
}
|