Spaces:
Running
Running
Commit
·
8066b54
0
Parent(s):
initial deploy
Browse files- .gitignore +6 -0
- Dockerfile +15 -0
- README.md +8 -0
- backend/main.py +128 -0
- backend/openai_utils.py +34 -0
- backend/static/index.html +274 -0
- backend/text_utils.py +44 -0
- backend/vector_store.py +73 -0
- frontend/dist/css/style.css +120 -0
- frontend/dist/index.html +45 -0
- frontend/dist/js/app.js +90 -0
- frontend/package-lock.json +6 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
*.py[cod]
|
3 |
+
.env
|
4 |
+
temp_*
|
5 |
+
*.txt
|
6 |
+
*.pdf
|
Dockerfile
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
# Copy requirements from root directory
|
6 |
+
COPY requirements.txt .
|
7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
+
|
9 |
+
# Copy backend files
|
10 |
+
COPY backend/ .
|
11 |
+
|
12 |
+
# Copy frontend files
|
13 |
+
COPY frontend/dist/ ./static/
|
14 |
+
|
15 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: FastAPI RAG Application
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
+
sdk: docker
|
7 |
+
app_port: 7860
|
8 |
+
---
|
backend/main.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi.staticfiles import StaticFiles
|
4 |
+
from fastapi.responses import FileResponse
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from typing import List
|
7 |
+
import os
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
# Import our local utilities instead of aimakerspace
|
11 |
+
from text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
|
12 |
+
from openai_utils import SystemRolePrompt, UserRolePrompt, ChatOpenAI
|
13 |
+
from vector_store import VectorDatabase
|
14 |
+
|
15 |
+
load_dotenv()
|
16 |
+
|
17 |
+
app = FastAPI()
|
18 |
+
|
19 |
+
# Configure CORS
|
20 |
+
app.add_middleware(
|
21 |
+
CORSMiddleware,
|
22 |
+
allow_origins=["*"],
|
23 |
+
allow_credentials=True,
|
24 |
+
allow_methods=["*"],
|
25 |
+
allow_headers=["*"],
|
26 |
+
)
|
27 |
+
|
28 |
+
# Prompt templates
|
29 |
+
system_template = """\
|
30 |
+
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
31 |
+
system_role_prompt = SystemRolePrompt(system_template)
|
32 |
+
|
33 |
+
user_prompt_template = """\
|
34 |
+
Context:
|
35 |
+
{context}
|
36 |
+
|
37 |
+
Question:
|
38 |
+
{question}
|
39 |
+
"""
|
40 |
+
user_role_prompt = UserRolePrompt(user_prompt_template)
|
41 |
+
|
42 |
+
# Initialize components
|
43 |
+
text_splitter = CharacterTextSplitter()
|
44 |
+
chat_openai = ChatOpenAI()
|
45 |
+
|
46 |
+
# Store vector databases for each session
|
47 |
+
vector_dbs = {}
|
48 |
+
|
49 |
+
class QueryRequest(BaseModel):
|
50 |
+
session_id: str
|
51 |
+
query: str
|
52 |
+
|
53 |
+
@app.post("/upload")
|
54 |
+
async def upload_file(file: UploadFile = File(...)):
|
55 |
+
try:
|
56 |
+
# Save file temporarily
|
57 |
+
file_path = f"temp_{file.filename}"
|
58 |
+
with open(file_path, "wb") as f:
|
59 |
+
content = await file.read()
|
60 |
+
f.write(content)
|
61 |
+
|
62 |
+
# Process file
|
63 |
+
loader = PDFLoader(file_path) if file.filename.lower().endswith('.pdf') else TextFileLoader(file_path)
|
64 |
+
documents = loader.load_documents()
|
65 |
+
texts = text_splitter.split_texts(documents)
|
66 |
+
|
67 |
+
# Create vector database
|
68 |
+
vector_db = VectorDatabase()
|
69 |
+
vector_db = await vector_db.abuild_from_list(texts)
|
70 |
+
|
71 |
+
# Generate session ID and store vector_db
|
72 |
+
import uuid
|
73 |
+
session_id = str(uuid.uuid4())
|
74 |
+
vector_dbs[session_id] = vector_db
|
75 |
+
|
76 |
+
# Cleanup
|
77 |
+
os.remove(file_path)
|
78 |
+
|
79 |
+
return {"session_id": session_id, "message": "File processed successfully"}
|
80 |
+
|
81 |
+
except Exception as e:
|
82 |
+
raise HTTPException(status_code=500, detail=str(e))
|
83 |
+
|
84 |
+
@app.post("/query")
|
85 |
+
async def query(request: QueryRequest):
|
86 |
+
try:
|
87 |
+
vector_db = vector_dbs.get(request.session_id)
|
88 |
+
if not vector_db:
|
89 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
90 |
+
|
91 |
+
# Retrieve context
|
92 |
+
context_list = await vector_db.search_by_text(request.query, k=4)
|
93 |
+
context_prompt = "\n".join([str(context[0]) for context in context_list])
|
94 |
+
|
95 |
+
# Generate prompts
|
96 |
+
formatted_system_prompt = system_role_prompt.create_message()
|
97 |
+
formatted_user_prompt = user_role_prompt.create_message(
|
98 |
+
question=request.query,
|
99 |
+
context=context_prompt
|
100 |
+
)
|
101 |
+
|
102 |
+
# Get response
|
103 |
+
response = await chat_openai.acomplete(
|
104 |
+
[formatted_system_prompt, formatted_user_prompt]
|
105 |
+
)
|
106 |
+
|
107 |
+
return {
|
108 |
+
"answer": str(response),
|
109 |
+
"context": [str(context[0]) for context in context_list]
|
110 |
+
}
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
raise HTTPException(status_code=500, detail=str(e))
|
114 |
+
|
115 |
+
# Optional: Cleanup endpoint
|
116 |
+
@app.delete("/session/{session_id}")
|
117 |
+
async def cleanup_session(session_id: str):
|
118 |
+
if session_id in vector_dbs:
|
119 |
+
del vector_dbs[session_id]
|
120 |
+
return {"message": "Session cleaned up successfully"}
|
121 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
122 |
+
|
123 |
+
# Serve static files from static directory
|
124 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
125 |
+
|
126 |
+
@app.get("/")
|
127 |
+
async def read_root():
|
128 |
+
return FileResponse('static/index.html')
|
backend/openai_utils.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict
|
2 |
+
from openai import AsyncOpenAI
|
3 |
+
import os
|
4 |
+
|
5 |
+
class SystemRolePrompt:
|
6 |
+
def __init__(self, template: str):
|
7 |
+
self.template = template
|
8 |
+
|
9 |
+
def create_message(self) -> Dict[str, str]:
|
10 |
+
return {
|
11 |
+
"role": "system",
|
12 |
+
"content": self.template
|
13 |
+
}
|
14 |
+
|
15 |
+
class UserRolePrompt:
|
16 |
+
def __init__(self, template: str):
|
17 |
+
self.template = template
|
18 |
+
|
19 |
+
def create_message(self, **kwargs) -> Dict[str, str]:
|
20 |
+
return {
|
21 |
+
"role": "user",
|
22 |
+
"content": self.template.format(**kwargs)
|
23 |
+
}
|
24 |
+
|
25 |
+
class ChatOpenAI:
|
26 |
+
def __init__(self):
|
27 |
+
self.client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
28 |
+
|
29 |
+
async def acomplete(self, messages: List[Dict[str, str]]) -> str:
|
30 |
+
response = await self.client.chat.completions.create(
|
31 |
+
model="gpt-3.5-turbo",
|
32 |
+
messages=messages
|
33 |
+
)
|
34 |
+
return response.choices[0].message.content
|
backend/static/index.html
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>RAG Application</title>
|
7 |
+
<style>
|
8 |
+
body {
|
9 |
+
font-family: Arial, sans-serif;
|
10 |
+
max-width: 800px;
|
11 |
+
margin: 0 auto;
|
12 |
+
padding: 20px;
|
13 |
+
}
|
14 |
+
.container {
|
15 |
+
display: flex;
|
16 |
+
flex-direction: column;
|
17 |
+
gap: 20px;
|
18 |
+
}
|
19 |
+
.query-section {
|
20 |
+
display: none;
|
21 |
+
}
|
22 |
+
.response-section {
|
23 |
+
margin-top: 20px;
|
24 |
+
border-top: 1px solid #ccc;
|
25 |
+
padding-top: 20px;
|
26 |
+
}
|
27 |
+
textarea {
|
28 |
+
width: 100%;
|
29 |
+
height: 100px;
|
30 |
+
}
|
31 |
+
button {
|
32 |
+
padding: 10px 20px;
|
33 |
+
background-color: #007bff;
|
34 |
+
color: white;
|
35 |
+
border: none;
|
36 |
+
border-radius: 4px;
|
37 |
+
cursor: pointer;
|
38 |
+
}
|
39 |
+
button:disabled {
|
40 |
+
background-color: #ccc;
|
41 |
+
}
|
42 |
+
.context {
|
43 |
+
background-color: #f8f9fa;
|
44 |
+
padding: 10px;
|
45 |
+
margin-top: 10px;
|
46 |
+
border-radius: 4px;
|
47 |
+
}
|
48 |
+
.loading {
|
49 |
+
opacity: 0.5;
|
50 |
+
pointer-events: none;
|
51 |
+
}
|
52 |
+
|
53 |
+
.spinner {
|
54 |
+
display: none;
|
55 |
+
margin-left: 10px;
|
56 |
+
color: #007bff;
|
57 |
+
}
|
58 |
+
|
59 |
+
.loading .spinner {
|
60 |
+
display: inline;
|
61 |
+
}
|
62 |
+
|
63 |
+
/* Radio toggle styles */
|
64 |
+
.toggle-radio {
|
65 |
+
display: none; /* Initially hidden */
|
66 |
+
align-items: center;
|
67 |
+
gap: 10px;
|
68 |
+
margin: 10px 0;
|
69 |
+
}
|
70 |
+
|
71 |
+
.toggle-radio input[type="radio"] {
|
72 |
+
margin-right: 5px;
|
73 |
+
}
|
74 |
+
|
75 |
+
.toggle-radio label {
|
76 |
+
display: flex;
|
77 |
+
align-items: center;
|
78 |
+
margin-right: 15px;
|
79 |
+
color: #666;
|
80 |
+
cursor: pointer;
|
81 |
+
}
|
82 |
+
|
83 |
+
#contextSection {
|
84 |
+
display: none; /* Initially hidden */
|
85 |
+
}
|
86 |
+
|
87 |
+
/* Initially hide these sections */
|
88 |
+
.query-section,
|
89 |
+
.response-section,
|
90 |
+
.toggle-radio,
|
91 |
+
#contextSection {
|
92 |
+
display: none;
|
93 |
+
}
|
94 |
+
</style>
|
95 |
+
</head>
|
96 |
+
<body>
|
97 |
+
<div class="container">
|
98 |
+
<h1>RAG Application</h1>
|
99 |
+
|
100 |
+
<div class="upload-section">
|
101 |
+
<h2>Upload Document</h2>
|
102 |
+
<input type="file" id="fileInput" accept=".txt,.pdf">
|
103 |
+
<button id="uploadButton" onclick="uploadFile()">Upload</button>
|
104 |
+
<span id="uploadSpinner" class="spinner">Processing...</span>
|
105 |
+
</div>
|
106 |
+
|
107 |
+
<div id="querySection" class="query-section">
|
108 |
+
<h2>Ask a Question</h2>
|
109 |
+
<textarea id="queryInput" placeholder="Enter your question here..."></textarea>
|
110 |
+
<button id="queryButton" onclick="submitQuery()">Submit Query</button>
|
111 |
+
<span id="querySpinner" class="spinner">Processing...</span>
|
112 |
+
</div>
|
113 |
+
|
114 |
+
<div id="responseSection" class="response-section">
|
115 |
+
<h2>Response</h2>
|
116 |
+
<div id="answer"></div>
|
117 |
+
|
118 |
+
<!-- Add radio toggle -->
|
119 |
+
<div class="toggle-radio">
|
120 |
+
<span>Context: </span>
|
121 |
+
<label>
|
122 |
+
<input type="radio" name="contextToggle" value="show" onchange="toggleContext(this)"> Show
|
123 |
+
</label>
|
124 |
+
<label>
|
125 |
+
<input type="radio" name="contextToggle" value="hide" onchange="toggleContext(this)" checked> Hide
|
126 |
+
</label>
|
127 |
+
</div>
|
128 |
+
|
129 |
+
<div id="contextSection">
|
130 |
+
<h3>Context</h3>
|
131 |
+
<div id="context"></div>
|
132 |
+
</div>
|
133 |
+
</div>
|
134 |
+
</div>
|
135 |
+
|
136 |
+
<script>
|
137 |
+
let currentSessionId = null;
|
138 |
+
|
139 |
+
function setLoading(isLoading, action) {
|
140 |
+
const button = document.getElementById(action === 'upload' ? 'uploadButton' : 'queryButton');
|
141 |
+
const spinner = document.getElementById(action === 'upload' ? 'uploadSpinner' : 'querySpinner');
|
142 |
+
|
143 |
+
if (isLoading) {
|
144 |
+
button.classList.add('loading');
|
145 |
+
spinner.style.display = 'inline';
|
146 |
+
button.disabled = true;
|
147 |
+
} else {
|
148 |
+
button.classList.remove('loading');
|
149 |
+
spinner.style.display = 'none';
|
150 |
+
button.disabled = false;
|
151 |
+
}
|
152 |
+
}
|
153 |
+
|
154 |
+
async function uploadFile() {
|
155 |
+
const fileInput = document.getElementById('fileInput');
|
156 |
+
const file = fileInput.files[0];
|
157 |
+
if (!file) {
|
158 |
+
alert('Please select a file first');
|
159 |
+
return;
|
160 |
+
}
|
161 |
+
|
162 |
+
setLoading(true, 'upload');
|
163 |
+
|
164 |
+
// Hide all sections during upload and processing
|
165 |
+
document.getElementById('querySection').style.display = 'none';
|
166 |
+
document.getElementById('responseSection').style.display = 'none';
|
167 |
+
document.getElementById('contextSection').style.display = 'none';
|
168 |
+
document.querySelector('.toggle-radio').style.display = 'none';
|
169 |
+
|
170 |
+
// Clear any previous content
|
171 |
+
document.getElementById('answer').innerHTML = '';
|
172 |
+
document.getElementById('context').innerHTML = '';
|
173 |
+
document.getElementById('queryInput').value = '';
|
174 |
+
|
175 |
+
const formData = new FormData();
|
176 |
+
formData.append('file', file);
|
177 |
+
|
178 |
+
try {
|
179 |
+
const response = await fetch('/upload', {
|
180 |
+
method: 'POST',
|
181 |
+
body: formData
|
182 |
+
});
|
183 |
+
|
184 |
+
if (!response.ok) {
|
185 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
186 |
+
}
|
187 |
+
|
188 |
+
const data = await response.json();
|
189 |
+
currentSessionId = data.session_id;
|
190 |
+
|
191 |
+
// Only show query section after successful upload
|
192 |
+
document.getElementById('querySection').style.display = 'block';
|
193 |
+
|
194 |
+
alert('File uploaded successfully! You can now ask questions.');
|
195 |
+
} catch (error) {
|
196 |
+
console.error('Error:', error);
|
197 |
+
alert('Error uploading file: ' + error.message);
|
198 |
+
} finally {
|
199 |
+
setLoading(false, 'upload');
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
function toggleContext(radio) {
|
204 |
+
const contextSection = document.getElementById('contextSection');
|
205 |
+
contextSection.style.display = radio.value === 'show' ? 'block' : 'none';
|
206 |
+
}
|
207 |
+
|
208 |
+
async function submitQuery() {
|
209 |
+
if (!currentSessionId) {
|
210 |
+
alert('Please upload a document first');
|
211 |
+
return;
|
212 |
+
}
|
213 |
+
|
214 |
+
const queryInput = document.getElementById('queryInput');
|
215 |
+
const query = queryInput.value.trim();
|
216 |
+
|
217 |
+
if (!query) {
|
218 |
+
alert('Please enter a query');
|
219 |
+
return;
|
220 |
+
}
|
221 |
+
|
222 |
+
setLoading(true, 'query');
|
223 |
+
|
224 |
+
// Show response section but hide context initially
|
225 |
+
document.getElementById('responseSection').style.display = 'block';
|
226 |
+
document.getElementById('contextSection').style.display = 'none';
|
227 |
+
document.querySelector('.toggle-radio').style.display = 'none';
|
228 |
+
// Reset radio to "Hide"
|
229 |
+
document.querySelector('input[value="hide"]').checked = true;
|
230 |
+
|
231 |
+
try {
|
232 |
+
const response = await fetch('/query', {
|
233 |
+
method: 'POST',
|
234 |
+
headers: {
|
235 |
+
'Content-Type': 'application/json'
|
236 |
+
},
|
237 |
+
body: JSON.stringify({
|
238 |
+
session_id: currentSessionId,
|
239 |
+
query: query
|
240 |
+
})
|
241 |
+
});
|
242 |
+
|
243 |
+
if (!response.ok) {
|
244 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
245 |
+
}
|
246 |
+
|
247 |
+
const data = await response.json();
|
248 |
+
|
249 |
+
// Display response
|
250 |
+
document.getElementById('answer').innerHTML = `<p><strong>Answer:</strong> ${data.answer}</p>`;
|
251 |
+
|
252 |
+
// Handle context display with error checking
|
253 |
+
if (data.context && Array.isArray(data.context)) {
|
254 |
+
const validContexts = data.context.filter(c => c != null && c !== '');
|
255 |
+
const contextHtml = validContexts.length > 0
|
256 |
+
? validContexts.join('<br><br>')
|
257 |
+
: 'No context available';
|
258 |
+
document.getElementById('context').innerHTML = `<div class="context">${contextHtml}</div>`;
|
259 |
+
|
260 |
+
// Show the radio toggle only after we have a valid response with context
|
261 |
+
document.querySelector('.toggle-radio').style.display = 'flex';
|
262 |
+
} else {
|
263 |
+
document.getElementById('context').innerHTML = `<div class="context">No context available</div>`;
|
264 |
+
}
|
265 |
+
} catch (error) {
|
266 |
+
console.error('Error:', error);
|
267 |
+
alert('Error submitting query: ' + error.message);
|
268 |
+
} finally {
|
269 |
+
setLoading(false, 'query');
|
270 |
+
}
|
271 |
+
}
|
272 |
+
</script>
|
273 |
+
</body>
|
274 |
+
</html>
|
backend/text_utils.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import PyPDF2
|
3 |
+
import io
|
4 |
+
|
5 |
+
class CharacterTextSplitter:
|
6 |
+
def __init__(self, chunk_size=1000, chunk_overlap=200):
|
7 |
+
self.chunk_size = chunk_size
|
8 |
+
self.chunk_overlap = chunk_overlap
|
9 |
+
|
10 |
+
def split_texts(self, documents: List[str]) -> List[str]:
|
11 |
+
texts = []
|
12 |
+
for doc in documents:
|
13 |
+
if not doc.strip():
|
14 |
+
continue
|
15 |
+
|
16 |
+
# Split the text into chunks
|
17 |
+
start = 0
|
18 |
+
while start < len(doc):
|
19 |
+
end = start + self.chunk_size
|
20 |
+
chunk = doc[start:end]
|
21 |
+
texts.append(chunk)
|
22 |
+
start = end - self.chunk_overlap
|
23 |
+
|
24 |
+
return texts
|
25 |
+
|
26 |
+
class TextFileLoader:
|
27 |
+
def __init__(self, file_path: str):
|
28 |
+
self.file_path = file_path
|
29 |
+
|
30 |
+
def load_documents(self) -> List[str]:
|
31 |
+
with open(self.file_path, 'r', encoding='utf-8') as file:
|
32 |
+
return [file.read()]
|
33 |
+
|
34 |
+
class PDFLoader:
|
35 |
+
def __init__(self, file_path: str):
|
36 |
+
self.file_path = file_path
|
37 |
+
|
38 |
+
def load_documents(self) -> List[str]:
|
39 |
+
with open(self.file_path, 'rb') as file:
|
40 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
41 |
+
text = ""
|
42 |
+
for page in pdf_reader.pages:
|
43 |
+
text += page.extract_text() + "\n"
|
44 |
+
return [text]
|
backend/vector_store.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Tuple
|
2 |
+
import numpy as np
|
3 |
+
from openai import AsyncOpenAI
|
4 |
+
import os
|
5 |
+
|
6 |
+
def cosine_similarity(a, b):
|
7 |
+
"""Calculate cosine similarity between two vectors."""
|
8 |
+
a = np.array(a)
|
9 |
+
b = np.array(b)
|
10 |
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
11 |
+
|
12 |
+
class VectorDatabase:
|
13 |
+
def __init__(self):
|
14 |
+
self.embeddings = []
|
15 |
+
self.texts = []
|
16 |
+
self.client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
17 |
+
|
18 |
+
async def abuild_from_list(self, texts):
|
19 |
+
self.texts = texts
|
20 |
+
self.embeddings = [] # Clear existing embeddings
|
21 |
+
|
22 |
+
try:
|
23 |
+
for text in texts:
|
24 |
+
if not text.strip(): # Skip empty texts
|
25 |
+
continue
|
26 |
+
|
27 |
+
response = await self.client.embeddings.create(
|
28 |
+
model="text-embedding-ada-002",
|
29 |
+
input=text.replace("\n", " ") # Replace newlines with spaces
|
30 |
+
)
|
31 |
+
if response and response.data and len(response.data) > 0:
|
32 |
+
self.embeddings.append(response.data[0].embedding)
|
33 |
+
else:
|
34 |
+
print(f"Warning: No embedding generated for text: {text[:100]}...")
|
35 |
+
|
36 |
+
return self
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error in abuild_from_list: {str(e)}")
|
39 |
+
raise e
|
40 |
+
|
41 |
+
async def search_by_text(self, query, k=4):
|
42 |
+
if not query.strip():
|
43 |
+
return []
|
44 |
+
|
45 |
+
try:
|
46 |
+
# Get query embedding
|
47 |
+
response = await self.client.embeddings.create(
|
48 |
+
model="text-embedding-ada-002",
|
49 |
+
input=query.replace("\n", " ") # Replace newlines with spaces
|
50 |
+
)
|
51 |
+
|
52 |
+
if not response or not response.data or len(response.data) == 0:
|
53 |
+
print("Warning: No embedding generated for query")
|
54 |
+
return []
|
55 |
+
|
56 |
+
query_embedding = response.data[0].embedding
|
57 |
+
|
58 |
+
# Calculate similarities
|
59 |
+
similarities = []
|
60 |
+
for idx, embedding in enumerate(self.embeddings):
|
61 |
+
if embedding: # Check if embedding exists
|
62 |
+
similarity = cosine_similarity(query_embedding, embedding)
|
63 |
+
similarities.append((self.texts[idx], similarity))
|
64 |
+
|
65 |
+
# Sort by similarity
|
66 |
+
similarities.sort(key=lambda x: x[1], reverse=True)
|
67 |
+
|
68 |
+
# Return top k results
|
69 |
+
return similarities[:k]
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
print(f"Error in search_by_text: {str(e)}")
|
73 |
+
raise e
|
frontend/dist/css/style.css
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
body {
|
2 |
+
font-family: Arial, sans-serif;
|
3 |
+
max-width: 800px;
|
4 |
+
margin: 0 auto;
|
5 |
+
padding: 20px;
|
6 |
+
}
|
7 |
+
|
8 |
+
.container {
|
9 |
+
max-width: 800px;
|
10 |
+
margin: 0 auto;
|
11 |
+
padding: 20px;
|
12 |
+
}
|
13 |
+
|
14 |
+
.query-section {
|
15 |
+
display: none;
|
16 |
+
}
|
17 |
+
|
18 |
+
#responseSection {
|
19 |
+
display: none;
|
20 |
+
margin-top: 20px;
|
21 |
+
border-top: 1px solid #ccc;
|
22 |
+
padding-top: 20px;
|
23 |
+
}
|
24 |
+
|
25 |
+
textarea {
|
26 |
+
width: 100%;
|
27 |
+
height: 100px;
|
28 |
+
}
|
29 |
+
|
30 |
+
button {
|
31 |
+
padding: 10px 20px;
|
32 |
+
background-color: #007bff;
|
33 |
+
color: white;
|
34 |
+
border: none;
|
35 |
+
border-radius: 4px;
|
36 |
+
cursor: pointer;
|
37 |
+
}
|
38 |
+
|
39 |
+
button:disabled {
|
40 |
+
background-color: #ccc;
|
41 |
+
}
|
42 |
+
|
43 |
+
.context {
|
44 |
+
background-color: #f8f9fa;
|
45 |
+
padding: 10px;
|
46 |
+
margin-top: 10px;
|
47 |
+
border-radius: 4px;
|
48 |
+
}
|
49 |
+
|
50 |
+
.loading {
|
51 |
+
opacity: 0.5;
|
52 |
+
pointer-events: none;
|
53 |
+
}
|
54 |
+
|
55 |
+
.spinner {
|
56 |
+
display: none;
|
57 |
+
}
|
58 |
+
|
59 |
+
.loading .spinner {
|
60 |
+
display: inline;
|
61 |
+
}
|
62 |
+
|
63 |
+
/* Switch styles */
|
64 |
+
.switch-container {
|
65 |
+
margin: 10px 0;
|
66 |
+
}
|
67 |
+
|
68 |
+
.switch {
|
69 |
+
position: relative;
|
70 |
+
display: inline-block;
|
71 |
+
width: 60px;
|
72 |
+
height: 34px;
|
73 |
+
}
|
74 |
+
|
75 |
+
.switch input {
|
76 |
+
opacity: 0;
|
77 |
+
width: 0;
|
78 |
+
height: 0;
|
79 |
+
}
|
80 |
+
|
81 |
+
.slider {
|
82 |
+
position: absolute;
|
83 |
+
cursor: pointer;
|
84 |
+
top: 0;
|
85 |
+
left: 0;
|
86 |
+
right: 0;
|
87 |
+
bottom: 0;
|
88 |
+
background-color: #ccc;
|
89 |
+
transition: .4s;
|
90 |
+
border-radius: 34px;
|
91 |
+
}
|
92 |
+
|
93 |
+
.slider:before {
|
94 |
+
position: absolute;
|
95 |
+
content: "";
|
96 |
+
height: 26px;
|
97 |
+
width: 26px;
|
98 |
+
left: 4px;
|
99 |
+
bottom: 4px;
|
100 |
+
background-color: white;
|
101 |
+
transition: .4s;
|
102 |
+
border-radius: 50%;
|
103 |
+
}
|
104 |
+
|
105 |
+
input:checked + .slider {
|
106 |
+
background-color: #007bff;
|
107 |
+
}
|
108 |
+
|
109 |
+
input:checked + .slider:before {
|
110 |
+
transform: translateX(26px);
|
111 |
+
}
|
112 |
+
|
113 |
+
.switch-label {
|
114 |
+
font-size: 0.9em;
|
115 |
+
color: #666;
|
116 |
+
}
|
117 |
+
|
118 |
+
#contextSection {
|
119 |
+
display: none;
|
120 |
+
}
|
frontend/dist/index.html
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>RAG Application</title>
|
7 |
+
<link rel="stylesheet" href="css/style.css">
|
8 |
+
</head>
|
9 |
+
<body>
|
10 |
+
<div class="container">
|
11 |
+
<h1>RAG Application</h1>
|
12 |
+
|
13 |
+
<div class="upload-section">
|
14 |
+
<h2>Upload Document</h2>
|
15 |
+
<input type="file" id="fileInput" accept=".txt,.pdf">
|
16 |
+
<button id="uploadButton" onclick="uploadFile()">Upload</button>
|
17 |
+
<span id="uploadSpinner" class="spinner">Processing...</span>
|
18 |
+
</div>
|
19 |
+
|
20 |
+
<div id="querySection" class="query-section">
|
21 |
+
<h2>Ask a Question</h2>
|
22 |
+
<textarea id="queryInput" placeholder="Enter your question here..."></textarea>
|
23 |
+
<button id="queryButton" onclick="submitQuery()">Submit Query</button>
|
24 |
+
<span id="querySpinner" class="spinner">Processing...</span>
|
25 |
+
</div>
|
26 |
+
|
27 |
+
<div id="responseSection">
|
28 |
+
<h2>Response</h2>
|
29 |
+
<div id="answer"></div>
|
30 |
+
<div class="switch-container">
|
31 |
+
<label class="switch">
|
32 |
+
<input type="checkbox" id="contextToggle" onchange="toggleContext()">
|
33 |
+
<span class="slider"></span>
|
34 |
+
</label>
|
35 |
+
<span class="switch-label">Show Context</span>
|
36 |
+
</div>
|
37 |
+
<div id="contextSection">
|
38 |
+
<h3>Context</h3>
|
39 |
+
<div id="context"></div>
|
40 |
+
</div>
|
41 |
+
</div>
|
42 |
+
</div>
|
43 |
+
<script src="js/app.js"></script>
|
44 |
+
</body>
|
45 |
+
</html>
|
frontend/dist/js/app.js
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
let sessionId = null;
|
2 |
+
|
3 |
+
function setLoading(isLoading, action) {
|
4 |
+
const button = document.getElementById(action === 'upload' ? 'uploadButton' : 'queryButton');
|
5 |
+
const spinner = document.getElementById(action === 'upload' ? 'uploadSpinner' : 'querySpinner');
|
6 |
+
|
7 |
+
if (isLoading) {
|
8 |
+
button.classList.add('loading');
|
9 |
+
spinner.style.display = 'inline';
|
10 |
+
button.disabled = true;
|
11 |
+
} else {
|
12 |
+
button.classList.remove('loading');
|
13 |
+
spinner.style.display = 'none';
|
14 |
+
button.disabled = false;
|
15 |
+
}
|
16 |
+
}
|
17 |
+
|
18 |
+
async function uploadFile() {
|
19 |
+
const fileInput = document.getElementById('fileInput');
|
20 |
+
const uploadSpinner = document.getElementById('uploadSpinner');
|
21 |
+
|
22 |
+
if (!fileInput.files.length) {
|
23 |
+
alert('Please select a file first');
|
24 |
+
return;
|
25 |
+
}
|
26 |
+
|
27 |
+
const file = fileInput.files[0];
|
28 |
+
const formData = new FormData();
|
29 |
+
formData.append('file', file);
|
30 |
+
|
31 |
+
uploadSpinner.style.display = 'inline';
|
32 |
+
|
33 |
+
try {
|
34 |
+
const response = await fetch('/upload', {
|
35 |
+
method: 'POST',
|
36 |
+
body: formData
|
37 |
+
});
|
38 |
+
|
39 |
+
const data = await response.json();
|
40 |
+
sessionId = data.session_id;
|
41 |
+
document.getElementById('querySection').style.display = 'block';
|
42 |
+
|
43 |
+
} catch (error) {
|
44 |
+
alert('Error uploading file: ' + error);
|
45 |
+
} finally {
|
46 |
+
uploadSpinner.style.display = 'none';
|
47 |
+
}
|
48 |
+
}
|
49 |
+
|
50 |
+
async function submitQuery() {
|
51 |
+
if (!sessionId) {
|
52 |
+
alert('Please upload a document first');
|
53 |
+
return;
|
54 |
+
}
|
55 |
+
|
56 |
+
const queryInput = document.getElementById('queryInput');
|
57 |
+
const querySpinner = document.getElementById('querySpinner');
|
58 |
+
|
59 |
+
querySpinner.style.display = 'inline';
|
60 |
+
|
61 |
+
try {
|
62 |
+
const response = await fetch('/query', {
|
63 |
+
method: 'POST',
|
64 |
+
headers: {
|
65 |
+
'Content-Type': 'application/json'
|
66 |
+
},
|
67 |
+
body: JSON.stringify({
|
68 |
+
session_id: sessionId,
|
69 |
+
query: queryInput.value
|
70 |
+
})
|
71 |
+
});
|
72 |
+
|
73 |
+
const data = await response.json();
|
74 |
+
document.getElementById('answer').textContent = data.answer;
|
75 |
+
document.getElementById('context').innerHTML = data.context.join('<br><br>');
|
76 |
+
|
77 |
+
} catch (error) {
|
78 |
+
alert('Error submitting query: ' + error);
|
79 |
+
} finally {
|
80 |
+
querySpinner.style.display = 'none';
|
81 |
+
}
|
82 |
+
}
|
83 |
+
|
84 |
+
function toggleContext() {
|
85 |
+
const contextSection = document.getElementById('contextSection');
|
86 |
+
contextSection.style.display = document.getElementById('contextToggle').checked ? 'block' : 'none';
|
87 |
+
}
|
88 |
+
|
89 |
+
// Initially hide the query section
|
90 |
+
document.getElementById('querySection').style.display = 'none';
|
frontend/package-lock.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "frontend",
|
3 |
+
"lockfileVersion": 2,
|
4 |
+
"requires": true,
|
5 |
+
"packages": {}
|
6 |
+
}
|