Spaces:
Paused
Paused
matt HOFFNER
commited on
Commit
Β·
872630d
1
Parent(s):
31998f6
init
Browse files- .gitignore +1 -0
- Dockerfile +61 -0
- README copy.md +14 -0
- next-env.d.ts +5 -0
- next.config.js +36 -0
- package-lock.json +0 -0
- package.json +31 -0
- public/next.svg +1 -0
- public/vercel.svg +1 -0
- screenshot.png +0 -0
- src/app/api/chat/route.ts +45 -0
- src/app/favicon.ico +0 -0
- src/app/globals.css +107 -0
- src/app/home.tsx +126 -0
- src/app/layout.tsx +22 -0
- src/app/page.module.css +361 -0
- src/app/page.tsx +7 -0
- src/components/FileEmbedder.tsx +14 -0
- src/components/FileLoader.tsx +59 -0
- src/components/embeddingsWorker.js +100 -0
- src/components/embeddingsWorker.spec.mjs +107 -0
- tsconfig.json +41 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
node_modules
|
Dockerfile
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM node:18 AS base
|
2 |
+
|
3 |
+
# Install dependencies only when needed
|
4 |
+
FROM base AS deps
|
5 |
+
|
6 |
+
WORKDIR /app
|
7 |
+
|
8 |
+
# Install dependencies based on the preferred package manager
|
9 |
+
COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
|
10 |
+
RUN \
|
11 |
+
if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
|
12 |
+
elif [ -f package-lock.json ]; then npm ci --legacy-peer-deps; \
|
13 |
+
elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i --frozen-lockfile; \
|
14 |
+
else echo "Lockfile not found." && exit 1; \
|
15 |
+
fi
|
16 |
+
|
17 |
+
# Uncomment the following lines if you want to use a secret at buildtime,
|
18 |
+
# for example to access your private npm packages
|
19 |
+
# RUN --mount=type=secret,id=HF_EXAMPLE_SECRET,mode=0444,required=true \
|
20 |
+
# $(cat /run/secrets/HF_EXAMPLE_SECRET)
|
21 |
+
|
22 |
+
# Rebuild the source code only when needed
|
23 |
+
FROM base AS builder
|
24 |
+
WORKDIR /app
|
25 |
+
COPY --from=deps /app/node_modules ./node_modules
|
26 |
+
COPY . .
|
27 |
+
|
28 |
+
# Next.js collects completely anonymous telemetry data about general usage.
|
29 |
+
# Learn more here: https://nextjs.org/telemetry
|
30 |
+
# Uncomment the following line in case you want to disable telemetry during the build.
|
31 |
+
# ENV NEXT_TELEMETRY_DISABLED 1
|
32 |
+
|
33 |
+
# RUN yarn build
|
34 |
+
|
35 |
+
# If you use yarn, comment out this line and use the line above
|
36 |
+
RUN npm run build
|
37 |
+
|
38 |
+
# Production image, copy all the files and run next
|
39 |
+
FROM base AS runner
|
40 |
+
WORKDIR /app
|
41 |
+
|
42 |
+
ENV NODE_ENV production
|
43 |
+
# Uncomment the following line in case you want to disable telemetry during runtime.
|
44 |
+
# ENV NEXT_TELEMETRY_DISABLED 1
|
45 |
+
|
46 |
+
RUN addgroup --system --gid 1001 nodejs
|
47 |
+
RUN adduser --system --uid 1001 nextjs
|
48 |
+
|
49 |
+
COPY --from=builder /app/public ./public
|
50 |
+
|
51 |
+
# Automatically leverage output traces to reduce image size
|
52 |
+
# https://nextjs.org/docs/advanced-features/output-file-tracing
|
53 |
+
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
|
54 |
+
|
55 |
+
USER nextjs
|
56 |
+
|
57 |
+
EXPOSE 3000
|
58 |
+
|
59 |
+
ENV PORT 3000
|
60 |
+
|
61 |
+
CMD ["npm", "run", "start"]
|
README copy.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# edge-chat
|
2 |
+
|
3 |
+
Simple demo using transformers.js embeddings, a simple vector store and PDF text extraction with Web Workers. No file data is stored on the server.
|
4 |
+
|
5 |
+
![](./screenshot.png)
|
6 |
+
|
7 |
+
|
8 |
+
## Key files:
|
9 |
+
|
10 |
+
### embeddingsWorker.js
|
11 |
+
|
12 |
+
### FileLoader.tsx
|
13 |
+
|
14 |
+
### Home.tsx
|
next-env.d.ts
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/// <reference types="next" />
|
2 |
+
/// <reference types="next/image-types/global" />
|
3 |
+
|
4 |
+
// NOTE: This file should not be edited
|
5 |
+
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
next.config.js
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module.exports = {
|
2 |
+
webpack: (config, { isServer }) => {
|
3 |
+
if (!isServer) {
|
4 |
+
// Web Worker configuration
|
5 |
+
config.module.rules.push({
|
6 |
+
test: /\.worker\.js$/,
|
7 |
+
loader: 'worker-loader',
|
8 |
+
options: {
|
9 |
+
publicPath: '/_next/static/worker/',
|
10 |
+
filename: 'static/worker/[hash].worker.js',
|
11 |
+
},
|
12 |
+
});
|
13 |
+
|
14 |
+
// Configuration for .node files
|
15 |
+
config.module.rules.push({
|
16 |
+
test: /\.node$/,
|
17 |
+
loader: 'node-loader',
|
18 |
+
});
|
19 |
+
|
20 |
+
// Resolve aliases to prevent bundling certain server-side modules in client-side code
|
21 |
+
config.resolve.alias = {
|
22 |
+
...config.resolve.alias,
|
23 |
+
// Mock sharp module
|
24 |
+
"sharp$": false,
|
25 |
+
// Mock onnxruntime-node module
|
26 |
+
"onnxruntime-node$": false,
|
27 |
+
};
|
28 |
+
}
|
29 |
+
|
30 |
+
config.resolve.alias.canvas = false
|
31 |
+
config.resolve.alias.encoding = false
|
32 |
+
|
33 |
+
return config;
|
34 |
+
},
|
35 |
+
};
|
36 |
+
|
package-lock.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
package.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "edge-chat",
|
3 |
+
"version": "0.1.0",
|
4 |
+
"private": true,
|
5 |
+
"scripts": {
|
6 |
+
"dev": "next dev -p 4000",
|
7 |
+
"build": "next build",
|
8 |
+
"start": "next start",
|
9 |
+
"lint": "next lint"
|
10 |
+
},
|
11 |
+
"dependencies": {
|
12 |
+
"@xenova/transformers": "^2.14.0",
|
13 |
+
"ai": "^2.2.31",
|
14 |
+
"critters": "^0.0.20",
|
15 |
+
"next": "14.0.3",
|
16 |
+
"openai": "^4.24.7",
|
17 |
+
"pdfjs-dist": "^3.7.107",
|
18 |
+
"react": "^18",
|
19 |
+
"react-dom": "^18",
|
20 |
+
"worker-loader": "^3.0.8"
|
21 |
+
},
|
22 |
+
"devDependencies": {
|
23 |
+
"@types/node": "^20",
|
24 |
+
"@types/react": "^18",
|
25 |
+
"@types/react-dom": "^18",
|
26 |
+
"eslint": "^8",
|
27 |
+
"eslint-config-next": "14.0.4",
|
28 |
+
"node-loader": "^2.0.0",
|
29 |
+
"typescript": "^5"
|
30 |
+
}
|
31 |
+
}
|
public/next.svg
ADDED
public/vercel.svg
ADDED
screenshot.png
ADDED
src/app/api/chat/route.ts
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import OpenAI from 'openai';
|
2 |
+
import { OpenAIStream, StreamingTextResponse } from 'ai';
|
3 |
+
|
4 |
+
const openai = new OpenAI({
|
5 |
+
apiKey: process.env.OPENAI_API_KEY,
|
6 |
+
});
|
7 |
+
|
8 |
+
export const runtime = 'edge';
|
9 |
+
|
10 |
+
export async function POST(req: Request) {
|
11 |
+
const { messages, data } = await req.json();
|
12 |
+
const vectorStoreResultsString = data.vectorStoreResults;
|
13 |
+
|
14 |
+
const previousMessages = messages.slice(0, -1).map((message: any) => {
|
15 |
+
return message.role === "user" ? `User: ${message.content}\n` : `Assistant: ${message.content}\n`;
|
16 |
+
}).join("");
|
17 |
+
const lastMessage = messages[messages.length - 1]?.content || 'No message found';
|
18 |
+
|
19 |
+
const response = await openai.chat.completions.create({
|
20 |
+
model: "gpt-3.5-turbo-1106",
|
21 |
+
stream: true,
|
22 |
+
messages: [
|
23 |
+
{
|
24 |
+
role: "system",
|
25 |
+
content:
|
26 |
+
"Use the following pieces of context (or previous conversaton if needed) to answer the users question in markdown format.",
|
27 |
+
},
|
28 |
+
{
|
29 |
+
role: "user",
|
30 |
+
content: `Use the following pieces of context (or previous conversaton if needed) to answer the users question in markdown format. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.
|
31 |
+
\n----------------\n
|
32 |
+
PREVIOUS CONVERSATION:
|
33 |
+
${previousMessages}
|
34 |
+
\n----------------\n
|
35 |
+
CONTEXT:
|
36 |
+
${vectorStoreResultsString}
|
37 |
+
USER INPUT: ${lastMessage}`,
|
38 |
+
},
|
39 |
+
],
|
40 |
+
});
|
41 |
+
|
42 |
+
const stream = OpenAIStream(response);
|
43 |
+
return new StreamingTextResponse(stream);
|
44 |
+
}
|
45 |
+
|
src/app/favicon.ico
ADDED
src/app/globals.css
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
:root {
|
2 |
+
--max-width: 1100px;
|
3 |
+
--border-radius: 12px;
|
4 |
+
--font-mono: ui-monospace, Menlo, Monaco, 'Cascadia Mono', 'Segoe UI Mono',
|
5 |
+
'Roboto Mono', 'Oxygen Mono', 'Ubuntu Monospace', 'Source Code Pro',
|
6 |
+
'Fira Mono', 'Droid Sans Mono', 'Courier New', monospace;
|
7 |
+
|
8 |
+
--foreground-rgb: 0, 0, 0;
|
9 |
+
--background-start-rgb: 214, 219, 220;
|
10 |
+
--background-end-rgb: 255, 255, 255;
|
11 |
+
|
12 |
+
--primary-glow: conic-gradient(
|
13 |
+
from 180deg at 50% 50%,
|
14 |
+
#16abff33 0deg,
|
15 |
+
#0885ff33 55deg,
|
16 |
+
#54d6ff33 120deg,
|
17 |
+
#0071ff33 160deg,
|
18 |
+
transparent 360deg
|
19 |
+
);
|
20 |
+
--secondary-glow: radial-gradient(
|
21 |
+
rgba(255, 255, 255, 1),
|
22 |
+
rgba(255, 255, 255, 0)
|
23 |
+
);
|
24 |
+
|
25 |
+
--tile-start-rgb: 239, 245, 249;
|
26 |
+
--tile-end-rgb: 228, 232, 233;
|
27 |
+
--tile-border: conic-gradient(
|
28 |
+
#00000080,
|
29 |
+
#00000040,
|
30 |
+
#00000030,
|
31 |
+
#00000020,
|
32 |
+
#00000010,
|
33 |
+
#00000010,
|
34 |
+
#00000080
|
35 |
+
);
|
36 |
+
|
37 |
+
--callout-rgb: 238, 240, 241;
|
38 |
+
--callout-border-rgb: 172, 175, 176;
|
39 |
+
--card-rgb: 180, 185, 188;
|
40 |
+
--card-border-rgb: 131, 134, 135;
|
41 |
+
}
|
42 |
+
|
43 |
+
@media (prefers-color-scheme: dark) {
|
44 |
+
:root {
|
45 |
+
--foreground-rgb: 255, 255, 255;
|
46 |
+
--background-start-rgb: 0, 0, 0;
|
47 |
+
--background-end-rgb: 0, 0, 0;
|
48 |
+
|
49 |
+
--primary-glow: radial-gradient(rgba(1, 65, 255, 0.4), rgba(1, 65, 255, 0));
|
50 |
+
--secondary-glow: linear-gradient(
|
51 |
+
to bottom right,
|
52 |
+
rgba(1, 65, 255, 0),
|
53 |
+
rgba(1, 65, 255, 0),
|
54 |
+
rgba(1, 65, 255, 0.3)
|
55 |
+
);
|
56 |
+
|
57 |
+
--tile-start-rgb: 2, 13, 46;
|
58 |
+
--tile-end-rgb: 2, 5, 19;
|
59 |
+
--tile-border: conic-gradient(
|
60 |
+
#ffffff80,
|
61 |
+
#ffffff40,
|
62 |
+
#ffffff30,
|
63 |
+
#ffffff20,
|
64 |
+
#ffffff10,
|
65 |
+
#ffffff10,
|
66 |
+
#ffffff80
|
67 |
+
);
|
68 |
+
|
69 |
+
--callout-rgb: 20, 20, 20;
|
70 |
+
--callout-border-rgb: 108, 108, 108;
|
71 |
+
--card-rgb: 100, 100, 100;
|
72 |
+
--card-border-rgb: 200, 200, 200;
|
73 |
+
}
|
74 |
+
}
|
75 |
+
|
76 |
+
* {
|
77 |
+
box-sizing: border-box;
|
78 |
+
padding: 0;
|
79 |
+
margin: 0;
|
80 |
+
}
|
81 |
+
|
82 |
+
html,
|
83 |
+
body {
|
84 |
+
max-width: 100vw;
|
85 |
+
overflow-x: hidden;
|
86 |
+
}
|
87 |
+
|
88 |
+
body {
|
89 |
+
color: rgb(var(--foreground-rgb));
|
90 |
+
background: linear-gradient(
|
91 |
+
to bottom,
|
92 |
+
transparent,
|
93 |
+
rgb(var(--background-end-rgb))
|
94 |
+
)
|
95 |
+
rgb(var(--background-start-rgb));
|
96 |
+
}
|
97 |
+
|
98 |
+
a {
|
99 |
+
color: inherit;
|
100 |
+
text-decoration: none;
|
101 |
+
}
|
102 |
+
|
103 |
+
@media (prefers-color-scheme: dark) {
|
104 |
+
html {
|
105 |
+
color-scheme: dark;
|
106 |
+
}
|
107 |
+
}
|
src/app/home.tsx
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use client";
|
2 |
+
|
3 |
+
import { FormEvent, useEffect, useRef, useState } from 'react';
|
4 |
+
import { FileLoader } from '../components/FileLoader';
|
5 |
+
import styles from './page.module.css';
|
6 |
+
import { useChat } from 'ai/react';
|
7 |
+
|
8 |
+
const Home: React.FC = () => {
|
9 |
+
// eslint-disable-next-line no-unused-vars
|
10 |
+
const [isWorkerLoading, setIsWorkerLoading] = useState(false);
|
11 |
+
const [searchInProgress, setSearchInProgress] = useState(false);
|
12 |
+
const [searchIsLoading, setIsLoading] = useState(false);
|
13 |
+
const [fileText, setFileText] = useState<string>('');
|
14 |
+
const embeddingsWorkerRef = useRef<Worker | null>(null);
|
15 |
+
const { isLoading, messages, input, handleInputChange, handleSubmit } = useChat();
|
16 |
+
|
17 |
+
useEffect(() => {
|
18 |
+
if (typeof window === 'undefined') return;
|
19 |
+
|
20 |
+
const EmbeddingsWorker = require('worker-loader!../components/embeddingsWorker.js').default;
|
21 |
+
embeddingsWorkerRef.current = new EmbeddingsWorker();
|
22 |
+
|
23 |
+
return () => {
|
24 |
+
embeddingsWorkerRef.current?.terminate();
|
25 |
+
};
|
26 |
+
}, []);
|
27 |
+
|
28 |
+
useEffect(() => {
|
29 |
+
if (fileText && embeddingsWorkerRef.current) {
|
30 |
+
embeddingsWorkerRef.current.postMessage({
|
31 |
+
action: 'addDocumentsToStore',
|
32 |
+
documents: [fileText]
|
33 |
+
});
|
34 |
+
}
|
35 |
+
}, [fileText]);
|
36 |
+
|
37 |
+
const handleSearch = () => {
|
38 |
+
return new Promise((resolve) => {
|
39 |
+
const handleMessage = (event: any) => {
|
40 |
+
if (event.data.action === 'searchResults') {
|
41 |
+
embeddingsWorkerRef.current?.removeEventListener('message', handleMessage);
|
42 |
+
setSearchInProgress(false);
|
43 |
+
resolve(event.data.results);
|
44 |
+
}
|
45 |
+
};
|
46 |
+
|
47 |
+
embeddingsWorkerRef.current?.addEventListener('message', handleMessage);
|
48 |
+
embeddingsWorkerRef.current?.postMessage({
|
49 |
+
action: 'searchSimilarDocuments',
|
50 |
+
query: input,
|
51 |
+
topK: 5
|
52 |
+
});
|
53 |
+
});
|
54 |
+
};
|
55 |
+
|
56 |
+
const modifiedHandleSubmit = async (e: FormEvent<HTMLFormElement>) => {
|
57 |
+
e.preventDefault();
|
58 |
+
|
59 |
+
if (input.trim() === '' || searchInProgress) {
|
60 |
+
return; // Prevent empty submissions or when a search is already in progress
|
61 |
+
}
|
62 |
+
|
63 |
+
setSearchInProgress(true);
|
64 |
+
|
65 |
+
try {
|
66 |
+
const results = await handleSearch();
|
67 |
+
const serializedResults = JSON.stringify(results);
|
68 |
+
|
69 |
+
const chatRequestOptions = {
|
70 |
+
data: { vectorStoreResults: serializedResults },
|
71 |
+
};
|
72 |
+
|
73 |
+
handleSubmit(e, chatRequestOptions);
|
74 |
+
} catch (error) {
|
75 |
+
console.error('Error during search:', error);
|
76 |
+
setSearchInProgress(false);
|
77 |
+
}
|
78 |
+
};
|
79 |
+
|
80 |
+
return (
|
81 |
+
<main className={styles.main}>
|
82 |
+
<div className={styles.description}>
|
83 |
+
<p>
|
84 |
+
Upload a PDF to start the analysis.
|
85 |
+
</p>
|
86 |
+
<FileLoader setFileText={setFileText} />
|
87 |
+
</div>
|
88 |
+
|
89 |
+
{fileText && (
|
90 |
+
<div className={styles.center}>
|
91 |
+
<p>Processed Text Complete</p>
|
92 |
+
</div>
|
93 |
+
)}
|
94 |
+
|
95 |
+
{isLoading || searchIsLoading && (
|
96 |
+
<div className={styles.spinner}>
|
97 |
+
<div>...</div>
|
98 |
+
</div>
|
99 |
+
)}
|
100 |
+
|
101 |
+
<div className={styles.chatContainer}>
|
102 |
+
<div className={styles.messagesContainer}>
|
103 |
+
{messages.map(m => (
|
104 |
+
<div key={m.id} className={m.role === 'user' ? styles.userMessage : styles.aiMessage}>
|
105 |
+
<span className={styles.messageRole}>{m.role === 'user' ? 'You: ' : 'AI: '}</span>
|
106 |
+
<span className={styles.messageContent}>{m.content}</span>
|
107 |
+
</div>
|
108 |
+
))}
|
109 |
+
</div>
|
110 |
+
|
111 |
+
<form onSubmit={modifiedHandleSubmit} className={styles.chatForm}>
|
112 |
+
<input
|
113 |
+
className={styles.chatInput}
|
114 |
+
value={input}
|
115 |
+
onChange={handleInputChange}
|
116 |
+
placeholder="Say something..."
|
117 |
+
/>
|
118 |
+
<button type="submit" className={styles.sendButton}>Send</button>
|
119 |
+
</form>
|
120 |
+
</div>
|
121 |
+
|
122 |
+
</main>
|
123 |
+
);
|
124 |
+
}
|
125 |
+
|
126 |
+
export default Home;
|
src/app/layout.tsx
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { Metadata } from 'next'
|
2 |
+
import { Inter } from 'next/font/google'
|
3 |
+
import './globals.css'
|
4 |
+
|
5 |
+
const inter = Inter({ subsets: ['latin'] })
|
6 |
+
|
7 |
+
export const metadata: Metadata = {
|
8 |
+
title: 'Create Next App',
|
9 |
+
description: 'Generated by create next app',
|
10 |
+
}
|
11 |
+
|
12 |
+
export default function RootLayout({
|
13 |
+
children,
|
14 |
+
}: {
|
15 |
+
children: React.ReactNode
|
16 |
+
}) {
|
17 |
+
return (
|
18 |
+
<html lang="en">
|
19 |
+
<body className={inter.className}>{children}</body>
|
20 |
+
</html>
|
21 |
+
)
|
22 |
+
}
|
src/app/page.module.css
ADDED
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.main {
|
2 |
+
display: flex;
|
3 |
+
flex-direction: column;
|
4 |
+
justify-content: flex-start; /* changed from space-between to start to ensure content is at the top */
|
5 |
+
align-items: center;
|
6 |
+
padding: 4%; /* reduced from 6rem and changed to a percentage */
|
7 |
+
min-height: 100vh;
|
8 |
+
width: 100%; /* ensure it takes full width */
|
9 |
+
}
|
10 |
+
|
11 |
+
@media (max-width: 768px) {
|
12 |
+
.main {
|
13 |
+
padding: 2%; /* even less padding on small screens */
|
14 |
+
}
|
15 |
+
}
|
16 |
+
|
17 |
+
.description {
|
18 |
+
display: inherit;
|
19 |
+
justify-content: inherit;
|
20 |
+
align-items: inherit;
|
21 |
+
font-size: 0.85rem;
|
22 |
+
max-width: var(--max-width);
|
23 |
+
z-index: 2;
|
24 |
+
font-family: var(--font-mono);
|
25 |
+
}
|
26 |
+
|
27 |
+
.description a {
|
28 |
+
display: flex;
|
29 |
+
justify-content: center;
|
30 |
+
align-items: center;
|
31 |
+
gap: 0.5rem;
|
32 |
+
}
|
33 |
+
|
34 |
+
.description p {
|
35 |
+
position: relative;
|
36 |
+
margin: 1.5rem;
|
37 |
+
padding: 1rem;
|
38 |
+
background-color: rgba(var(--callout-rgb), 0.5);
|
39 |
+
border: 1px solid rgba(var(--callout-border-rgb), 0.3);
|
40 |
+
border-radius: var(--border-radius);
|
41 |
+
}
|
42 |
+
|
43 |
+
.code {
|
44 |
+
font-weight: 700;
|
45 |
+
font-family: var(--font-mono);
|
46 |
+
}
|
47 |
+
|
48 |
+
.grid {
|
49 |
+
display: grid;
|
50 |
+
grid-template-columns: repeat(4, minmax(25%, auto));
|
51 |
+
max-width: 100%;
|
52 |
+
width: var(--max-width);
|
53 |
+
}
|
54 |
+
|
55 |
+
.card {
|
56 |
+
padding: 1rem 1.2rem;
|
57 |
+
border-radius: var(--border-radius);
|
58 |
+
background: rgba(var(--card-rgb), 0);
|
59 |
+
border: 1px solid rgba(var(--card-border-rgb), 0);
|
60 |
+
transition: background 200ms, border 200ms;
|
61 |
+
}
|
62 |
+
|
63 |
+
.card span {
|
64 |
+
display: inline-block;
|
65 |
+
transition: transform 200ms;
|
66 |
+
}
|
67 |
+
|
68 |
+
.card h2 {
|
69 |
+
font-weight: 600;
|
70 |
+
margin-bottom: 0.7rem;
|
71 |
+
}
|
72 |
+
|
73 |
+
.card p {
|
74 |
+
margin: 0;
|
75 |
+
opacity: 0.6;
|
76 |
+
font-size: 0.9rem;
|
77 |
+
line-height: 1.5;
|
78 |
+
max-width: 30ch;
|
79 |
+
}
|
80 |
+
|
81 |
+
.center {
|
82 |
+
display: flex;
|
83 |
+
justify-content: center;
|
84 |
+
align-items: center;
|
85 |
+
position: relative;
|
86 |
+
padding: 4rem 0;
|
87 |
+
}
|
88 |
+
|
89 |
+
.center::before {
|
90 |
+
background: var(--secondary-glow);
|
91 |
+
border-radius: 50%;
|
92 |
+
width: 480px;
|
93 |
+
height: 360px;
|
94 |
+
margin-left: -400px;
|
95 |
+
}
|
96 |
+
|
97 |
+
.center::after {
|
98 |
+
background: var(--primary-glow);
|
99 |
+
width: 240px;
|
100 |
+
height: 180px;
|
101 |
+
z-index: -1;
|
102 |
+
}
|
103 |
+
|
104 |
+
.center::before,
|
105 |
+
.center::after {
|
106 |
+
content: '';
|
107 |
+
left: 50%;
|
108 |
+
position: absolute;
|
109 |
+
filter: blur(45px);
|
110 |
+
transform: translateZ(0);
|
111 |
+
}
|
112 |
+
|
113 |
+
.logo {
|
114 |
+
position: relative;
|
115 |
+
}
|
116 |
+
/* Enable hover only on non-touch devices */
|
117 |
+
@media (hover: hover) and (pointer: fine) {
|
118 |
+
.card:hover {
|
119 |
+
background: rgba(var(--card-rgb), 0.1);
|
120 |
+
border: 1px solid rgba(var(--card-border-rgb), 0.15);
|
121 |
+
}
|
122 |
+
|
123 |
+
.card:hover span {
|
124 |
+
transform: translateX(4px);
|
125 |
+
}
|
126 |
+
}
|
127 |
+
|
128 |
+
@media (prefers-reduced-motion) {
|
129 |
+
.card:hover span {
|
130 |
+
transform: none;
|
131 |
+
}
|
132 |
+
}
|
133 |
+
|
134 |
+
/* Mobile */
|
135 |
+
@media (max-width: 700px) {
|
136 |
+
.content {
|
137 |
+
padding: 4rem;
|
138 |
+
}
|
139 |
+
|
140 |
+
.grid {
|
141 |
+
grid-template-columns: 1fr;
|
142 |
+
margin-bottom: 120px;
|
143 |
+
max-width: 320px;
|
144 |
+
text-align: center;
|
145 |
+
}
|
146 |
+
|
147 |
+
.card {
|
148 |
+
padding: 1rem 2.5rem;
|
149 |
+
}
|
150 |
+
|
151 |
+
.card h2 {
|
152 |
+
margin-bottom: 0.5rem;
|
153 |
+
}
|
154 |
+
|
155 |
+
.center {
|
156 |
+
padding: 8rem 0 6rem;
|
157 |
+
}
|
158 |
+
|
159 |
+
.center::before {
|
160 |
+
transform: none;
|
161 |
+
height: 300px;
|
162 |
+
}
|
163 |
+
|
164 |
+
.description {
|
165 |
+
font-size: 0.8rem;
|
166 |
+
}
|
167 |
+
|
168 |
+
.description a {
|
169 |
+
padding: 1rem;
|
170 |
+
}
|
171 |
+
|
172 |
+
.description p,
|
173 |
+
.description div {
|
174 |
+
display: flex;
|
175 |
+
justify-content: center;
|
176 |
+
position: fixed;
|
177 |
+
width: 100%;
|
178 |
+
}
|
179 |
+
|
180 |
+
.description p {
|
181 |
+
align-items: center;
|
182 |
+
inset: 0 0 auto;
|
183 |
+
padding: 2rem 1rem 1.4rem;
|
184 |
+
border-radius: 0;
|
185 |
+
border: none;
|
186 |
+
border-bottom: 1px solid rgba(var(--callout-border-rgb), 0.25);
|
187 |
+
background: linear-gradient(
|
188 |
+
to bottom,
|
189 |
+
rgba(var(--background-start-rgb), 1),
|
190 |
+
rgba(var(--callout-rgb), 0.5)
|
191 |
+
);
|
192 |
+
background-clip: padding-box;
|
193 |
+
backdrop-filter: blur(24px);
|
194 |
+
}
|
195 |
+
|
196 |
+
.description div {
|
197 |
+
align-items: flex-end;
|
198 |
+
pointer-events: none;
|
199 |
+
inset: auto 0 0;
|
200 |
+
padding: 2rem;
|
201 |
+
height: 200px;
|
202 |
+
background: linear-gradient(
|
203 |
+
to bottom,
|
204 |
+
transparent 0%,
|
205 |
+
rgb(var(--background-end-rgb)) 40%
|
206 |
+
);
|
207 |
+
z-index: 1;
|
208 |
+
}
|
209 |
+
}
|
210 |
+
|
211 |
+
/* Tablet and Smaller Desktop */
|
212 |
+
@media (min-width: 701px) and (max-width: 1120px) {
|
213 |
+
.grid {
|
214 |
+
grid-template-columns: repeat(2, 50%);
|
215 |
+
}
|
216 |
+
}
|
217 |
+
|
218 |
+
@media (prefers-color-scheme: dark) {
|
219 |
+
.vercelLogo {
|
220 |
+
filter: invert(1);
|
221 |
+
}
|
222 |
+
|
223 |
+
.logo {
|
224 |
+
filter: invert(1) drop-shadow(0 0 0.3rem #ffffff70);
|
225 |
+
}
|
226 |
+
}
|
227 |
+
|
228 |
+
@keyframes rotate {
|
229 |
+
from {
|
230 |
+
transform: rotate(360deg);
|
231 |
+
}
|
232 |
+
to {
|
233 |
+
transform: rotate(0deg);
|
234 |
+
}
|
235 |
+
}
|
236 |
+
|
237 |
+
.spinner {
|
238 |
+
display: flex;
|
239 |
+
justify-content: center;
|
240 |
+
align-items: center;
|
241 |
+
/* Add more styles as needed */
|
242 |
+
}
|
243 |
+
|
244 |
+
.spinner div {
|
245 |
+
border: 4px solid #f3f3f3;
|
246 |
+
border-top: 4px solid #3498db;
|
247 |
+
border-radius: 50%;
|
248 |
+
width: 40px;
|
249 |
+
height: 40px;
|
250 |
+
animation: spin 2s linear infinite;
|
251 |
+
}
|
252 |
+
|
253 |
+
@keyframes spin {
|
254 |
+
0% { transform: rotate(0deg); }
|
255 |
+
100% { transform: rotate(360deg); }
|
256 |
+
}
|
257 |
+
|
258 |
+
.chatContainer {
|
259 |
+
display: flex;
|
260 |
+
flex-direction: column;
|
261 |
+
width: 90%; /* Responsive width */
|
262 |
+
margin-bottom: 2px;
|
263 |
+
margin-top: 100px;
|
264 |
+
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
265 |
+
background-color: white;
|
266 |
+
min-height: 60vh; /* Minimum height */
|
267 |
+
max-height: 80vh; /* Maximum height relative to the viewport */
|
268 |
+
overflow: hidden; /* Hide overflow */
|
269 |
+
}
|
270 |
+
|
271 |
+
.messagesContainer {
|
272 |
+
flex-grow: 1; /* Take up available space */
|
273 |
+
overflow-y: auto; /* Make only this part scrollable */
|
274 |
+
padding: 10px;
|
275 |
+
background-color: #f9f9f9;
|
276 |
+
}
|
277 |
+
|
278 |
+
.userMessage, .aiMessage {
|
279 |
+
margin: 5px 0;
|
280 |
+
}
|
281 |
+
|
282 |
+
.userMessage {
|
283 |
+
text-align: right;
|
284 |
+
}
|
285 |
+
|
286 |
+
.aiMessage {
|
287 |
+
text-align: left;
|
288 |
+
}
|
289 |
+
|
290 |
+
.messageRole {
|
291 |
+
font-weight: bold;
|
292 |
+
}
|
293 |
+
|
294 |
+
.messageContent {
|
295 |
+
display: block;
|
296 |
+
margin-top: 2px;
|
297 |
+
}
|
298 |
+
|
299 |
+
.chatForm {
|
300 |
+
display: flex;
|
301 |
+
padding: 10px;
|
302 |
+
background-color: white; /* Light background for form */
|
303 |
+
border-top: 1px solid #ddd;
|
304 |
+
}
|
305 |
+
|
306 |
+
.chatInput {
|
307 |
+
flex-grow: 1;
|
308 |
+
padding: 10px;
|
309 |
+
margin-right: 10px;
|
310 |
+
border: 1px solid #ddd;
|
311 |
+
border-radius: 4px;
|
312 |
+
background-color: white; /* Light background for input */
|
313 |
+
color: black; /* Dark text for input */
|
314 |
+
}
|
315 |
+
|
316 |
+
.sendButton {
|
317 |
+
padding: 10px 20px;
|
318 |
+
background-color: #4CAF50;
|
319 |
+
color: white;
|
320 |
+
border: none;
|
321 |
+
border-radius: 4px;
|
322 |
+
cursor: pointer;
|
323 |
+
}
|
324 |
+
|
325 |
+
.sendButton:hover {
|
326 |
+
background-color: #45a049;
|
327 |
+
}
|
328 |
+
|
329 |
+
/* Dark mode styles */
|
330 |
+
@media (prefers-color-scheme: dark) {
|
331 |
+
.chatContainer {
|
332 |
+
background-color: black; /* Dark background for dark mode */
|
333 |
+
color: white; /* Light text for dark mode */
|
334 |
+
}
|
335 |
+
|
336 |
+
.messagesContainer {
|
337 |
+
background-color: #333; /* Darker shade for the message container */
|
338 |
+
}
|
339 |
+
|
340 |
+
.chatForm {
|
341 |
+
background-color: #222; /* Darker background for form in dark mode */
|
342 |
+
border-top: 1px solid #555; /* Darker border for form */
|
343 |
+
}
|
344 |
+
|
345 |
+
.chatInput {
|
346 |
+
background-color: #222; /* Darker input field */
|
347 |
+
color: white; /* Light text for input field */
|
348 |
+
border: 1px solid #555; /* Darker border for input */
|
349 |
+
}
|
350 |
+
|
351 |
+
.sendButton {
|
352 |
+
background-color: #555; /* Darker button color */
|
353 |
+
color: white; /* Light text for button */
|
354 |
+
}
|
355 |
+
|
356 |
+
.sendButton:hover {
|
357 |
+
background-color: #666; /* Slightly lighter on hover */
|
358 |
+
}
|
359 |
+
|
360 |
+
/* Add more dark mode specific styles as needed */
|
361 |
+
}
|
src/app/page.tsx
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import Home from './home'
|
2 |
+
|
3 |
+
const ParentComponent = () => {
|
4 |
+
return <Home />;
|
5 |
+
};
|
6 |
+
|
7 |
+
export default ParentComponent;
|
src/components/FileEmbedder.tsx
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use client"
|
2 |
+
|
3 |
+
interface FileEmbedderProps {
|
4 |
+
onFileSelect: (event: React.ChangeEvent<HTMLInputElement>) => void;
|
5 |
+
}
|
6 |
+
|
7 |
+
const FileEmbedder: React.FC<FileEmbedderProps> = ({ onFileSelect }) => {
|
8 |
+
return (
|
9 |
+
<input type="file" accept=".pdf, text/plain" onChange={onFileSelect} />
|
10 |
+
);
|
11 |
+
}
|
12 |
+
|
13 |
+
export default FileEmbedder;
|
14 |
+
|
src/components/FileLoader.tsx
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use client";
|
2 |
+
|
3 |
+
import React, { useState, useEffect } from 'react';
|
4 |
+
// @ts-ignore
|
5 |
+
import * as PDFJS from 'pdfjs-dist/build/pdf';
|
6 |
+
import FileEmbedder from './FileEmbedder';
|
7 |
+
|
8 |
+
PDFJS.GlobalWorkerOptions.workerSrc = "https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js";
|
9 |
+
|
10 |
+
interface FileLoaderProps {
|
11 |
+
setFileText: (text: string) => void;
|
12 |
+
}
|
13 |
+
|
14 |
+
export const FileLoader: React.FC<FileLoaderProps> = ({ setFileText }) => {
|
15 |
+
const [selectedFile, setSelectedFile] = useState<File | null>(null);
|
16 |
+
|
17 |
+
// Handle file processing
|
18 |
+
useEffect(() => {
|
19 |
+
const processPDF = async (file: File) => {
|
20 |
+
try {
|
21 |
+
const fileData = new Uint8Array(await file.arrayBuffer());
|
22 |
+
const pdf = await PDFJS.getDocument({ data: fileData }).promise;
|
23 |
+
const maxPages = pdf.numPages;
|
24 |
+
const pageTexts = [];
|
25 |
+
|
26 |
+
for (let pageNo = 1; pageNo <= maxPages; pageNo++) {
|
27 |
+
const page = await pdf.getPage(pageNo);
|
28 |
+
const tokenizedText = await page.getTextContent();
|
29 |
+
// @ts-ignore
|
30 |
+
const pageText = tokenizedText.items.map(token => token.str).join(' ');
|
31 |
+
pageTexts.push(pageText);
|
32 |
+
}
|
33 |
+
|
34 |
+
const documentText = pageTexts.join(' ');
|
35 |
+
setFileText(documentText);
|
36 |
+
} catch (error) {
|
37 |
+
console.error('PDF processing error:', error);
|
38 |
+
}
|
39 |
+
};
|
40 |
+
|
41 |
+
if (selectedFile && selectedFile.type === "application/pdf") {
|
42 |
+
processPDF(selectedFile);
|
43 |
+
}
|
44 |
+
}, [selectedFile, setFileText]);
|
45 |
+
|
46 |
+
// Handle file selection
|
47 |
+
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
|
48 |
+
if (event.target.files) {
|
49 |
+
setSelectedFile(event.target.files[0]);
|
50 |
+
}
|
51 |
+
};
|
52 |
+
|
53 |
+
return (
|
54 |
+
<div>
|
55 |
+
<FileEmbedder onFileSelect={handleFileChange} />
|
56 |
+
{/* Optionally display some status or progress indicator */}
|
57 |
+
</div>
|
58 |
+
);
|
59 |
+
};
|
src/components/embeddingsWorker.js
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { pipeline } from "@xenova/transformers";
|
2 |
+
|
3 |
+
const CHUNK_SIZE = 1000;
|
4 |
+
|
5 |
+
export class SimpleVectorStore {
|
6 |
+
constructor() {
|
7 |
+
this.documents = [];
|
8 |
+
this.embeddings = [];
|
9 |
+
}
|
10 |
+
|
11 |
+
addDocument(embedding, document) {
|
12 |
+
this.embeddings.push(embedding);
|
13 |
+
this.documents.push(document);
|
14 |
+
}
|
15 |
+
|
16 |
+
|
17 |
+
async similaritySearch(queryEmbedding, topK) {
|
18 |
+
let scores = this.embeddings.map((emb, index) => ({
|
19 |
+
score: cosineSimilarity(emb, queryEmbedding),
|
20 |
+
index: index
|
21 |
+
}));
|
22 |
+
|
23 |
+
scores.sort((a, b) => b.score - a.score);
|
24 |
+
|
25 |
+
return scores.slice(0, topK).map(score => ({
|
26 |
+
document: this.documents[score.index],
|
27 |
+
score: score.score
|
28 |
+
}));
|
29 |
+
}
|
30 |
+
}
|
31 |
+
|
32 |
+
export function cosineSimilarity(vecA, vecB) {
|
33 |
+
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
|
34 |
+
const magA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
|
35 |
+
const magB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
|
36 |
+
return dotProduct / (magA * magB);
|
37 |
+
}
|
38 |
+
|
39 |
+
class EmbeddingsWorker {
|
40 |
+
constructor(modelName = "Xenova/all-MiniLM-L6-v2") {
|
41 |
+
this.modelName = modelName;
|
42 |
+
this.client = null;
|
43 |
+
this.vectorStore = new SimpleVectorStore();
|
44 |
+
}
|
45 |
+
|
46 |
+
async loadClient() {
|
47 |
+
if (!this.client) {
|
48 |
+
this.client = await pipeline("feature-extraction", this.modelName);
|
49 |
+
}
|
50 |
+
}
|
51 |
+
|
52 |
+
async _embed(texts) {
|
53 |
+
await this.loadClient();
|
54 |
+
const embedResults = await Promise.all(
|
55 |
+
texts.map(async (text) => {
|
56 |
+
const response = await this.client(text, {
|
57 |
+
pooling: "mean",
|
58 |
+
normalize: true
|
59 |
+
});
|
60 |
+
return response.data;
|
61 |
+
})
|
62 |
+
);
|
63 |
+
return embedResults;
|
64 |
+
}
|
65 |
+
|
66 |
+
async addDocumentsToStore(docs, chunkSize = 1000) {
|
67 |
+
for (const doc of docs) {
|
68 |
+
const chunks = this.chunkText(doc, chunkSize);
|
69 |
+
const embeddings = await this._embed(chunks);
|
70 |
+
embeddings.forEach((embedding, index) => {
|
71 |
+
this.vectorStore.addDocument(embedding, chunks[index]);
|
72 |
+
});
|
73 |
+
}
|
74 |
+
}
|
75 |
+
|
76 |
+
chunkText(text, size) {
|
77 |
+
const chunks = [];
|
78 |
+
for (let i = 0; i < text.length; i += size) {
|
79 |
+
chunks.push(text.substring(i, i + size));
|
80 |
+
}
|
81 |
+
return chunks;
|
82 |
+
}
|
83 |
+
|
84 |
+
async searchSimilarDocuments(query, topK) {
|
85 |
+
const queryEmbedding = await this._embed([query]);
|
86 |
+
return this.vectorStore.similaritySearch(queryEmbedding[0], topK);
|
87 |
+
}
|
88 |
+
}
|
89 |
+
|
90 |
+
const worker = new EmbeddingsWorker();
|
91 |
+
|
92 |
+
self.addEventListener('message', async (event) => {
|
93 |
+
if (event.data.action === 'addDocumentsToStore') {
|
94 |
+
await worker.addDocumentsToStore(event.data.documents, CHUNK_SIZE);
|
95 |
+
self.postMessage({ action: 'documentsAdded' });
|
96 |
+
} else if (event.data.action === 'searchSimilarDocuments') {
|
97 |
+
const results = await worker.searchSimilarDocuments(event.data.query, event.data.topK);
|
98 |
+
self.postMessage({ action: 'searchResults', results });
|
99 |
+
}
|
100 |
+
});
|
src/components/embeddingsWorker.spec.mjs
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { pipeline } from "@xenova/transformers";
|
2 |
+
|
3 |
+
export class SimpleVectorStore {
|
4 |
+
constructor() {
|
5 |
+
this.documents = [];
|
6 |
+
this.embeddings = [];
|
7 |
+
}
|
8 |
+
|
9 |
+
addDocument(embedding, document) {
|
10 |
+
this.embeddings.push(embedding);
|
11 |
+
this.documents.push(document);
|
12 |
+
}
|
13 |
+
|
14 |
+
async similaritySearch(queryEmbedding, topK) {
|
15 |
+
let scores = this.embeddings.map((emb, index) => ({
|
16 |
+
score: cosineSimilarity(emb, queryEmbedding),
|
17 |
+
index: index
|
18 |
+
}));
|
19 |
+
|
20 |
+
// these are empty?
|
21 |
+
console.log('similaritySearch', queryEmbedding, scores, this.embeddings);
|
22 |
+
|
23 |
+
scores.sort((a, b) => b.score - a.score);
|
24 |
+
|
25 |
+
return scores.slice(0, topK).map(score => ({
|
26 |
+
document: this.documents[score.index],
|
27 |
+
score: score.score
|
28 |
+
}));
|
29 |
+
}
|
30 |
+
}
|
31 |
+
|
32 |
+
export function cosineSimilarity(vecA, vecB) {
|
33 |
+
console.log('cosineSimilarity', vecA, vecB);
|
34 |
+
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
|
35 |
+
const magA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
|
36 |
+
const magB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
|
37 |
+
return dotProduct / (magA * magB);
|
38 |
+
}
|
39 |
+
|
40 |
+
class EmbeddingsWorker {
|
41 |
+
constructor(modelName = "Xenova/all-MiniLM-L6-v2") {
|
42 |
+
this.modelName = modelName;
|
43 |
+
this.client = null;
|
44 |
+
this.vectorStore = new SimpleVectorStore();
|
45 |
+
}
|
46 |
+
|
47 |
+
async loadClient() {
|
48 |
+
if (!this.client) {
|
49 |
+
this.client = await pipeline("embeddings", this.modelName);
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
async _embed(texts) {
|
54 |
+
await this.loadClient();
|
55 |
+
return Promise.all(
|
56 |
+
texts.map(async (text) => {
|
57 |
+
const response = await this.client(text, {
|
58 |
+
pooling: "mean",
|
59 |
+
normalize: true
|
60 |
+
});
|
61 |
+
return response.data;
|
62 |
+
})
|
63 |
+
);
|
64 |
+
console.log("Embeddings: ", embeddings); // Debugging: Check embeddings
|
65 |
+
}
|
66 |
+
|
67 |
+
async addDocumentsToStore(docs) {
|
68 |
+
const embeddings = await this._embed(docs);
|
69 |
+
embeddings.forEach((embedding, index) => {
|
70 |
+
console.log(embedding, index);
|
71 |
+
this.vectorStore.addDocument(embedding, docs[index]);
|
72 |
+
});
|
73 |
+
}
|
74 |
+
|
75 |
+
async searchSimilarDocuments(query, topK) {
|
76 |
+
const queryEmbedding = await this._embed([query]);
|
77 |
+
console.log(queryEmbedding);
|
78 |
+
return this.vectorStore.similaritySearch(queryEmbedding[0], topK);
|
79 |
+
}
|
80 |
+
}
|
81 |
+
|
82 |
+
function testVectorStore() {
|
83 |
+
const store = new SimpleVectorStore();
|
84 |
+
|
85 |
+
// Mock embeddings (simple vectors for testing)
|
86 |
+
const mockEmbeddings = [
|
87 |
+
[1, 0, 0],
|
88 |
+
[0, 1, 0],
|
89 |
+
[0, 0, 1]
|
90 |
+
];
|
91 |
+
|
92 |
+
// Add mock embeddings to the store
|
93 |
+
mockEmbeddings.forEach((emb, index) => {
|
94 |
+
store.addDocument(emb, `Document ${index + 1}`);
|
95 |
+
});
|
96 |
+
|
97 |
+
// Test cosine similarity directly
|
98 |
+
const cosSimTest = cosineSimilarity([1, 0, 0], [0, 1, 0]);
|
99 |
+
console.log('Cosine Similarity Test:', cosSimTest); // Should be 0 for orthogonal vectors
|
100 |
+
|
101 |
+
// Perform a similarity search
|
102 |
+
const results = store.similaritySearch([1, 0, 0], 2);
|
103 |
+
console.log('Similarity Search Results:', results);
|
104 |
+
}
|
105 |
+
|
106 |
+
// Run the test function
|
107 |
+
testVectorStore();
|
tsconfig.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"compilerOptions": {
|
3 |
+
"target": "es5",
|
4 |
+
"lib": [
|
5 |
+
"dom",
|
6 |
+
"dom.iterable",
|
7 |
+
"esnext"
|
8 |
+
],
|
9 |
+
"allowJs": true,
|
10 |
+
"skipLibCheck": true,
|
11 |
+
"strict": true,
|
12 |
+
"noEmit": true,
|
13 |
+
"esModuleInterop": true,
|
14 |
+
"module": "esnext",
|
15 |
+
"moduleResolution": "node",
|
16 |
+
"resolveJsonModule": true,
|
17 |
+
"isolatedModules": true,
|
18 |
+
"jsx": "preserve",
|
19 |
+
"incremental": true,
|
20 |
+
"plugins": [
|
21 |
+
{
|
22 |
+
"name": "next"
|
23 |
+
}
|
24 |
+
],
|
25 |
+
"paths": {
|
26 |
+
"@/*": [
|
27 |
+
"./src/*"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
"forceConsistentCasingInFileNames": true
|
31 |
+
},
|
32 |
+
"include": [
|
33 |
+
"next-env.d.ts",
|
34 |
+
"**/*.ts",
|
35 |
+
"**/*.tsx",
|
36 |
+
".next/types/**/*.ts"
|
37 |
+
],
|
38 |
+
"exclude": [
|
39 |
+
"node_modules"
|
40 |
+
]
|
41 |
+
}
|