matt HOFFNER commited on
Commit
81c1854
Β·
1 Parent(s): f16ab24

use less langchain

Browse files
package-lock.json CHANGED
@@ -19,6 +19,7 @@
19
  "eslint": "8.40.0",
20
  "eslint-config-next": "13.4.2",
21
  "fs-extra": "^11.1.1",
 
22
  "langchain": "^0.0.90",
23
  "next": "13.4.2",
24
  "pdfjs-dist": "^3.7.107",
@@ -911,6 +912,16 @@
911
  "node": ">=16.15"
912
  }
913
  },
 
 
 
 
 
 
 
 
 
 
914
  "node_modules/@humanwhocodes/config-array": {
915
  "version": "0.11.10",
916
  "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.10.tgz",
@@ -1952,6 +1963,14 @@
1952
  "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
1953
  "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
1954
  },
 
 
 
 
 
 
 
 
1955
  "node_modules/bl": {
1956
  "version": "4.1.0",
1957
  "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
@@ -2190,6 +2209,13 @@
2190
  "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
2191
  "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
2192
  },
 
 
 
 
 
 
 
2193
  "node_modules/client-only": {
2194
  "version": "0.0.1",
2195
  "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
@@ -3257,6 +3283,11 @@
3257
  "node": ">= 12"
3258
  }
3259
  },
 
 
 
 
 
3260
  "node_modules/filelist": {
3261
  "version": "1.0.4",
3262
  "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz",
@@ -3767,6 +3798,16 @@
3767
  "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==",
3768
  "optional": true
3769
  },
 
 
 
 
 
 
 
 
 
 
3770
  "node_modules/hoist-non-react-statics": {
3771
  "version": "3.3.2",
3772
  "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz",
 
19
  "eslint": "8.40.0",
20
  "eslint-config-next": "13.4.2",
21
  "fs-extra": "^11.1.1",
22
+ "hnswlib-node": "^1.4.2",
23
  "langchain": "^0.0.90",
24
  "next": "13.4.2",
25
  "pdfjs-dist": "^3.7.107",
 
912
  "node": ">=16.15"
913
  }
914
  },
915
+ "node_modules/@huggingface/inference": {
916
+ "version": "1.8.0",
917
+ "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-1.8.0.tgz",
918
+ "integrity": "sha512-Dkh7PiyMf6TINRocQsdceiR5LcqJiUHgWjaBMRpCUOCbs+GZA122VH9q+wodoSptj6rIQf7wIwtDsof+/gd0WA==",
919
+ "optional": true,
920
+ "peer": true,
921
+ "engines": {
922
+ "node": ">=18"
923
+ }
924
+ },
925
  "node_modules/@humanwhocodes/config-array": {
926
  "version": "0.11.10",
927
  "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.10.tgz",
 
1963
  "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
1964
  "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
1965
  },
1966
+ "node_modules/bindings": {
1967
+ "version": "1.5.0",
1968
+ "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
1969
+ "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
1970
+ "dependencies": {
1971
+ "file-uri-to-path": "1.0.0"
1972
+ }
1973
+ },
1974
  "node_modules/bl": {
1975
  "version": "4.1.0",
1976
  "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
 
2209
  "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
2210
  "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
2211
  },
2212
+ "node_modules/chromadb": {
2213
+ "version": "1.5.2",
2214
+ "resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
2215
+ "integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ==",
2216
+ "optional": true,
2217
+ "peer": true
2218
+ },
2219
  "node_modules/client-only": {
2220
  "version": "0.0.1",
2221
  "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
 
3283
  "node": ">= 12"
3284
  }
3285
  },
3286
+ "node_modules/file-uri-to-path": {
3287
+ "version": "1.0.0",
3288
+ "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
3289
+ "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
3290
+ },
3291
  "node_modules/filelist": {
3292
  "version": "1.0.4",
3293
  "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz",
 
3798
  "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==",
3799
  "optional": true
3800
  },
3801
+ "node_modules/hnswlib-node": {
3802
+ "version": "1.4.2",
3803
+ "resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
3804
+ "integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
3805
+ "hasInstallScript": true,
3806
+ "dependencies": {
3807
+ "bindings": "^1.5.0",
3808
+ "node-addon-api": "^6.0.0"
3809
+ }
3810
+ },
3811
  "node_modules/hoist-non-react-statics": {
3812
  "version": "3.3.2",
3813
  "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz",
package.json CHANGED
@@ -19,6 +19,7 @@
19
  "eslint": "8.40.0",
20
  "eslint-config-next": "13.4.2",
21
  "fs-extra": "^11.1.1",
 
22
  "langchain": "^0.0.90",
23
  "next": "13.4.2",
24
  "pdfjs-dist": "^3.7.107",
 
19
  "eslint": "8.40.0",
20
  "eslint-config-next": "13.4.2",
21
  "fs-extra": "^11.1.1",
22
+ "hnswlib-node": "^1.4.2",
23
  "langchain": "^0.0.90",
24
  "next": "13.4.2",
25
  "pdfjs-dist": "^3.7.107",
src/components/ChatWindow.jsx CHANGED
@@ -3,6 +3,7 @@ import Image from "next/image";
3
  import { useCallback, useEffect, useState } from "react";
4
  import MessageList from './MessageList';
5
  import {FileLoader} from './FileLoader';
 
6
  import Loader from "./Loader";
7
 
8
  function ChatWindow({
@@ -19,16 +20,32 @@ function ChatWindow({
19
 
20
  const isReady = loadingStatus.progress === 1;
21
 
22
- const handleSubmit = useCallback(() => {
23
  if (isGenerating || !isReady) {
24
  return;
25
  }
26
 
27
  if (fileId) {
28
- console.log('we have a fileId, so this should be contextual chat')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
-
31
- send(userInput, maxTokens, stopStrings);
32
  setUserInput("");
33
  }, [
34
  userInput,
 
3
  import { useCallback, useEffect, useState } from "react";
4
  import MessageList from './MessageList';
5
  import {FileLoader} from './FileLoader';
6
+ import { db } from '@/utils/db-client';
7
  import Loader from "./Loader";
8
 
9
  function ChatWindow({
 
20
 
21
  const isReady = loadingStatus.progress === 1;
22
 
23
+ const handleSubmit = useCallback(async () => {
24
  if (isGenerating || !isReady) {
25
  return;
26
  }
27
 
28
  if (fileId) {
29
+ const similarityMatches = 2;
30
+ const fileContents = await db.docs.get(fileId);
31
+ const vectorStore = await HNSWLib.fromDocuments(fileContents, new XenovaTransformersEmbeddings());
32
+ const result = await vectorStore.similaritySearch(userInput, similarityMatches);
33
+ const qaPrompt =
34
+ `You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
35
+ You should only provide hyperlinks that reference the context below. Do NOT make up hyperlinks.
36
+ If you can't find the answer in the context below, just say "Hmm, I'm not sure." Don't try to make up an answer.
37
+ If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
38
+ Question: ${userInput}
39
+ =========
40
+ ${result}
41
+ =========
42
+ Answer:
43
+ `
44
+ send(qaPrompt, maxTokens, stopStrings);
45
+ } else {
46
+ send(userInput, maxTokens, stopStrings);
47
  }
48
+
 
49
  setUserInput("");
50
  }, [
51
  userInput,
src/pages/api/docChat.ts DELETED
@@ -1,58 +0,0 @@
1
- import type { NextApiRequest, NextApiResponse } from 'next';
2
-
3
- import { storesDir } from '@/utils/file-handler';
4
-
5
- import { makeChain } from '@/utils/make-chain';
6
- import XenovaTransformersEmbeddings from '../../embed/hf';
7
- import { HNSWLib } from 'langchain/vectorstores/hnswlib';
8
-
9
- let vectorStore: HNSWLib;
10
-
11
- export default async function handler(
12
- req: NextApiRequest,
13
- res: NextApiResponse,
14
- ) {
15
- const { prompt, messages } = req.body;
16
-
17
- if (!prompt) {
18
- return res.status(400).json({ message: 'No question in the request' });
19
- }
20
- // OpenAI recommends replacing newlines with spaces for best results
21
- const sanitizedQuestion = prompt.trim().replaceAll('\n', ' ');
22
-
23
- res.writeHead(200, {
24
- 'Content-Type': 'text/event-stream',
25
- 'Cache-Control': 'no-cache, no-transform',
26
- Connection: 'keep-alive',
27
- });
28
-
29
- const sendData = (data: string) => {
30
- res.write(`${data}\n\n`);
31
- };
32
-
33
- // load store
34
- if (!vectorStore) {
35
- vectorStore = await HNSWLib.load(storesDir, new XenovaTransformersEmbeddings());
36
- }
37
- //create chain
38
- const chain = makeChain(vectorStore, (token: string) => {
39
- // skipping stremaing for now
40
- // sendData(JSON.stringify({ data: token }));
41
- });
42
-
43
- try {
44
- //Ask a question
45
- const response = await chain.call({
46
- question: sanitizedQuestion,
47
- chat_history: messages || [],
48
- });
49
-
50
- console.log('response', response);
51
- sendData(response.text);
52
- } catch (error) {
53
- console.log('error', error);
54
- } finally {
55
- // sendData('[DONE]');
56
- res.end();
57
- }
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/pages/api/docHandle.ts CHANGED
@@ -1,10 +1,7 @@
1
  import type { NextApiRequest, NextApiResponse } from 'next';
2
  import {
3
- readHNSWLibModelFromLocal,
4
- storesDir,
5
  vectorStoreToHNSWLibModel,
6
  } from '@/utils/file-handler';
7
- import fs from 'fs-extra';
8
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
9
  import { HNSWLib } from 'langchain/vectorstores/hnswlib';
10
  import XenovaTransformersEmbeddings from '../../embed/hf'
@@ -27,20 +24,6 @@ export default async function handler(
27
  return res.status(400).json({ message: 'No question in the request' });
28
  }
29
 
30
- /*
31
- use dexie instead to get contents
32
- const exists = await fs.exists(storesDir);
33
- console.log(exists);
34
-
35
- if (exists) {
36
- console.log('read from ' + storesDir);
37
- const model = await readHNSWLibModelFromLocal();
38
- return res.status(200).send({
39
- ...model,
40
- });
41
- }
42
- */
43
-
44
  const vectorStore = await handleDocs(text);
45
  const model = await vectorStoreToHNSWLibModel(vectorStore);
46
  res.status(200).send({
 
1
  import type { NextApiRequest, NextApiResponse } from 'next';
2
  import {
 
 
3
  vectorStoreToHNSWLibModel,
4
  } from '@/utils/file-handler';
 
5
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
6
  import { HNSWLib } from 'langchain/vectorstores/hnswlib';
7
  import XenovaTransformersEmbeddings from '../../embed/hf'
 
24
  return res.status(400).json({ message: 'No question in the request' });
25
  }
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  const vectorStore = await handleDocs(text);
28
  const model = await vectorStoreToHNSWLibModel(vectorStore);
29
  res.status(200).send({
src/utils/make-chain.ts DELETED
@@ -1,57 +0,0 @@
1
- import { CallbackManager } from 'langchain/callbacks';
2
- import { ConversationalRetrievalQAChain } from 'langchain/chains';
3
- import { OpenAIChat } from 'langchain/llms';
4
- import { PromptTemplate } from 'langchain/prompts';
5
- import { BufferMemory } from "langchain/memory";
6
- import { HNSWLib } from 'langchain/vectorstores/hnswlib';
7
-
8
- export const defaultPrompts = {
9
- CONDENSE_PROMPT: `Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
10
-
11
- Chat History:
12
- {chat_history}
13
- Follow Up Input: {question}
14
- Standalone question:`,
15
- QA_PROMPT: `You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
16
- You should only provide hyperlinks that reference the context below. Do NOT make up hyperlinks.
17
- If you can't find the answer in the context below, just say "Hmm, I'm not sure." Don't try to make up an answer.
18
- If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
19
-
20
- Question: {question}
21
- =========
22
- {context}
23
- =========
24
- Answer:`,
25
- };
26
-
27
- const CONDENSE_PROMPT = PromptTemplate.fromTemplate(
28
- defaultPrompts.CONDENSE_PROMPT,
29
- );
30
-
31
- const QA_PROMPT = PromptTemplate.fromTemplate(defaultPrompts.QA_PROMPT);
32
-
33
- export const makeChain = (
34
- vectorstore: HNSWLib,
35
- onTokenStream?: (token: string) => void,
36
- ) => {
37
-
38
- const model = new OpenAIChat({
39
- temperature: 0.8,
40
- modelName: "OpenAIModelID.GPT_3_5",
41
- streaming: false,
42
- callbackManager: onTokenStream
43
- ? CallbackManager.fromHandlers({
44
- async handleLLMNewToken(token) {
45
- onTokenStream(token);
46
- },
47
- })
48
- : undefined,
49
- })
50
-
51
- return ConversationalRetrievalQAChain.fromLLM(
52
- model, vectorstore.asRetriever(),
53
- {
54
- memory: new BufferMemory({
55
- memoryKey: "chat_history", // Must be set to "chat_history"
56
- }), qaTemplate: defaultPrompts.QA_PROMPT, questionGeneratorTemplate: defaultPrompts.CONDENSE_PROMPT })
57
- };