Spaces:
Runtime error
Runtime error
Initial commit
Browse files- Dockerfile +42 -42
- backend/app/api/endpoints/documents.py +21 -17
- docker-compose.yml +16 -7
- frontend/components/DocumentList.tsx +12 -0
- frontend/components/DocumentUpload.tsx +2 -0
Dockerfile
CHANGED
@@ -1,43 +1,43 @@
|
|
1 |
-
#
|
2 |
-
FROM node:18-alpine AS frontend-build
|
3 |
-
|
4 |
-
WORKDIR /app/frontend
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
#
|
12 |
-
FROM python:3.11-slim AS backend
|
13 |
-
|
14 |
-
WORKDIR /app/backend
|
15 |
-
|
16 |
-
# System
|
17 |
-
RUN apt-get update && apt-get install -y gcc curl && rm -rf /var/lib/apt/lists/*
|
18 |
-
|
19 |
-
#
|
20 |
-
COPY backend/requirements.txt ./
|
21 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
22 |
-
|
23 |
-
#
|
24 |
-
COPY backend/ ./
|
25 |
-
|
26 |
-
#
|
27 |
-
COPY --from=frontend-build /app/frontend/out /app/frontend_out
|
28 |
-
|
29 |
-
#
|
30 |
-
RUN mkdir -p /app/backend/
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
# Expose
|
35 |
-
EXPOSE 8000
|
36 |
-
|
37 |
-
#
|
38 |
-
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
39 |
-
|
40 |
-
|
41 |
-
# Start FastAPI
|
42 |
-
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
43 |
|
|
|
1 |
+
# ---------- Frontβend build stage ----------
|
2 |
+
FROM node:18-alpine AS frontend-build
|
3 |
+
|
4 |
+
WORKDIR /app/frontend
|
5 |
+
COPY frontend/package*.json ./
|
6 |
+
RUN npm ci --only=production
|
7 |
+
COPY frontend/ ./
|
8 |
+
RUN npm run build && npm run export
|
9 |
+
|
10 |
+
|
11 |
+
# ---------- Backβend stage ----------
|
12 |
+
FROM python:3.11-slim AS backend
|
13 |
+
|
14 |
+
WORKDIR /app/backend # <β matches compose volumes
|
15 |
+
|
16 |
+
# System build deps
|
17 |
+
RUN apt-get update && apt-get install -y gcc curl && rm -rf /var/lib/apt/lists/*
|
18 |
+
|
19 |
+
# Python deps
|
20 |
+
COPY backend/requirements.txt ./
|
21 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
22 |
+
|
23 |
+
# App source code
|
24 |
+
COPY backend/ ./
|
25 |
+
|
26 |
+
# Static export from previous stage
|
27 |
+
COPY --from=frontend-build /app/frontend/out /app/frontend_out
|
28 |
+
|
29 |
+
# Create folders & DB file and make them writable
|
30 |
+
RUN mkdir -p /app/backend/uploads /app/backend/chroma_db \
|
31 |
+
&& touch /app/backend/pdf_chatbot.db \
|
32 |
+
&& chmod -R 777 /app/backend/uploads /app/backend/chroma_db /app/backend/pdf_chatbot.db
|
33 |
+
|
34 |
+
# Expose API port
|
35 |
+
EXPOSE 8000
|
36 |
+
|
37 |
+
# Small healthβcheck
|
38 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
39 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
40 |
+
|
41 |
+
# Start FastAPI
|
42 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
43 |
|
backend/app/api/endpoints/documents.py
CHANGED
@@ -14,6 +14,8 @@ from app.services.pdf_processor import PDFProcessor
|
|
14 |
from app.services.vector_store import VectorStore
|
15 |
from app.models.document import ChatMessage
|
16 |
import shutil
|
|
|
|
|
17 |
|
18 |
router = APIRouter()
|
19 |
pdf_processor = PDFProcessor()
|
@@ -32,11 +34,11 @@ async def upload_document(
|
|
32 |
if doc_count >= 3:
|
33 |
raise HTTPException(status_code=400, detail="You can only upload up to 3 documents.")
|
34 |
# Validate file type
|
35 |
-
if not file.filename.lower().endswith('.pdf'):
|
36 |
raise HTTPException(status_code=400, detail="Only PDF files are allowed")
|
37 |
|
38 |
# Generate unique filename
|
39 |
-
file_extension = os.path.splitext(file.filename)[1]
|
40 |
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
41 |
file_path = os.path.join(settings.UPLOAD_DIR, unique_filename)
|
42 |
|
@@ -150,7 +152,7 @@ def delete_document(document_id: int, db: Session = Depends(get_db)):
|
|
150 |
vector_store.delete_document(str(document_id))
|
151 |
|
152 |
# Delete file from filesystem
|
153 |
-
if os.path.exists(document.file_path):
|
154 |
os.remove(document.file_path)
|
155 |
|
156 |
# Delete from database
|
@@ -168,7 +170,7 @@ def delete_document(document_id: int, db: Session = Depends(get_db)):
|
|
168 |
|
169 |
|
170 |
@router.post("/clear_all")
|
171 |
-
def clear_all_data(db: Session = Depends(get_db)):
|
172 |
"""Admin endpoint to clear all documents, chat messages, uploaded files, and vector store."""
|
173 |
try:
|
174 |
# Delete all documents and chat messages from DB
|
@@ -177,17 +179,20 @@ def clear_all_data(db: Session = Depends(get_db)):
|
|
177 |
db.commit()
|
178 |
# Delete all files in uploads directory
|
179 |
upload_dir = settings.UPLOAD_DIR
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
189 |
# Clear ChromaDB vector store using the singleton
|
190 |
-
vector_store.clear_all
|
191 |
return {"success": True, "message": "All documents, chat messages, uploads, and vectors cleared."}
|
192 |
except Exception as e:
|
193 |
return {"success": False, "message": f"Error clearing data: {str(e)}"}
|
@@ -199,9 +204,8 @@ def get_document_stats(db: Session = Depends(get_db)):
|
|
199 |
try:
|
200 |
total_documents = db.query(Document).count()
|
201 |
processed_documents = db.query(Document).filter(Document.processed == True).count()
|
202 |
-
|
203 |
-
|
204 |
-
).scalar() or 0
|
205 |
|
206 |
vector_stats = vector_store.get_collection_stats()
|
207 |
|
|
|
14 |
from app.services.vector_store import VectorStore
|
15 |
from app.models.document import ChatMessage
|
16 |
import shutil
|
17 |
+
import asyncio
|
18 |
+
from concurrent.futures import ThreadPoolExecutor
|
19 |
|
20 |
router = APIRouter()
|
21 |
pdf_processor = PDFProcessor()
|
|
|
34 |
if doc_count >= 3:
|
35 |
raise HTTPException(status_code=400, detail="You can only upload up to 3 documents.")
|
36 |
# Validate file type
|
37 |
+
if not file.filename or not file.filename.lower().endswith('.pdf'):
|
38 |
raise HTTPException(status_code=400, detail="Only PDF files are allowed")
|
39 |
|
40 |
# Generate unique filename
|
41 |
+
file_extension = os.path.splitext(str(file.filename))[1]
|
42 |
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
43 |
file_path = os.path.join(settings.UPLOAD_DIR, unique_filename)
|
44 |
|
|
|
152 |
vector_store.delete_document(str(document_id))
|
153 |
|
154 |
# Delete file from filesystem
|
155 |
+
if isinstance(document.file_path, str) and os.path.exists(document.file_path):
|
156 |
os.remove(document.file_path)
|
157 |
|
158 |
# Delete from database
|
|
|
170 |
|
171 |
|
172 |
@router.post("/clear_all")
|
173 |
+
async def clear_all_data(db: Session = Depends(get_db)):
|
174 |
"""Admin endpoint to clear all documents, chat messages, uploaded files, and vector store."""
|
175 |
try:
|
176 |
# Delete all documents and chat messages from DB
|
|
|
179 |
db.commit()
|
180 |
# Delete all files in uploads directory
|
181 |
upload_dir = settings.UPLOAD_DIR
|
182 |
+
loop = asyncio.get_event_loop()
|
183 |
+
def remove_uploads():
|
184 |
+
for filename in os.listdir(upload_dir):
|
185 |
+
file_path = os.path.join(upload_dir, filename)
|
186 |
+
try:
|
187 |
+
if os.path.isfile(file_path) or os.path.islink(file_path):
|
188 |
+
os.unlink(file_path)
|
189 |
+
elif os.path.isdir(file_path):
|
190 |
+
shutil.rmtree(file_path)
|
191 |
+
except Exception as e:
|
192 |
+
print(f"Failed to delete {file_path}: {e}")
|
193 |
+
await loop.run_in_executor(None, remove_uploads)
|
194 |
# Clear ChromaDB vector store using the singleton
|
195 |
+
await loop.run_in_executor(None, vector_store.clear_all)
|
196 |
return {"success": True, "message": "All documents, chat messages, uploads, and vectors cleared."}
|
197 |
except Exception as e:
|
198 |
return {"success": False, "message": f"Error clearing data: {str(e)}"}
|
|
|
204 |
try:
|
205 |
total_documents = db.query(Document).count()
|
206 |
processed_documents = db.query(Document).filter(Document.processed == True).count()
|
207 |
+
from sqlalchemy import func as sa_func
|
208 |
+
total_size = db.query(sa_func.sum(Document.file_size)).scalar() or 0
|
|
|
209 |
|
210 |
vector_stats = vector_store.get_collection_stats()
|
211 |
|
docker-compose.yml
CHANGED
@@ -1,25 +1,34 @@
|
|
1 |
-
version:
|
2 |
|
3 |
services:
|
4 |
backend:
|
5 |
build:
|
6 |
-
context: ./backend
|
7 |
dockerfile: Dockerfile
|
8 |
ports:
|
9 |
- "8000:8000"
|
|
|
|
|
10 |
environment:
|
11 |
-
|
12 |
-
-
|
13 |
-
|
|
|
|
|
|
|
14 |
- MAX_FILE_SIZE=10485760
|
15 |
- ALLOWED_EXTENSIONS=[".pdf"]
|
16 |
-
- BACKEND_CORS_ORIGINS=["http://localhost:3000","http://localhost:3001",
|
|
|
17 |
env_file:
|
18 |
-
- ./backend/.env
|
|
|
|
|
19 |
volumes:
|
20 |
- ./backend/uploads:/app/backend/uploads
|
21 |
- ./backend/chroma_db:/app/backend/chroma_db
|
22 |
- ./backend/pdf_chatbot.db:/app/backend/pdf_chatbot.db
|
|
|
23 |
restart: unless-stopped
|
24 |
|
25 |
frontend:
|
|
|
1 |
+
version: "3.8"
|
2 |
|
3 |
services:
|
4 |
backend:
|
5 |
build:
|
6 |
+
context: ./backend # <β your backend folder on host
|
7 |
dockerfile: Dockerfile
|
8 |
ports:
|
9 |
- "8000:8000"
|
10 |
+
|
11 |
+
# ββ Environment the code expects βββββββββββββββββββββββββββββ
|
12 |
environment:
|
13 |
+
# βΌ SQLite file lives in the same folder as your code
|
14 |
+
- DATABASE_URL=sqlite:///./pdf_chatbot.db
|
15 |
+
# βΌ Chroma + uploads are relative to the WORKDIR (/app/backend)
|
16 |
+
- CHROMA_PERSIST_DIRECTORY=./chroma_db
|
17 |
+
- UPLOAD_DIR=./uploads
|
18 |
+
# other settings
|
19 |
- MAX_FILE_SIZE=10485760
|
20 |
- ALLOWED_EXTENSIONS=[".pdf"]
|
21 |
+
- BACKEND_CORS_ORIGINS=["http://localhost:3000","http://localhost:3001",
|
22 |
+
"http://127.0.0.1:3000","http://127.0.0.1:3001"]
|
23 |
env_file:
|
24 |
+
- ./backend/.env # optional extra secrets
|
25 |
+
|
26 |
+
# ββ Persist data on host in the same structure you already have ββ
|
27 |
volumes:
|
28 |
- ./backend/uploads:/app/backend/uploads
|
29 |
- ./backend/chroma_db:/app/backend/chroma_db
|
30 |
- ./backend/pdf_chatbot.db:/app/backend/pdf_chatbot.db
|
31 |
+
|
32 |
restart: unless-stopped
|
33 |
|
34 |
frontend:
|
frontend/components/DocumentList.tsx
CHANGED
@@ -14,6 +14,18 @@ export default function DocumentList({ onDocumentChange }: { onDocumentChange?:
|
|
14 |
loadStats()
|
15 |
}, [])
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
const loadDocuments = async () => {
|
18 |
try {
|
19 |
setLoading(true)
|
|
|
14 |
loadStats()
|
15 |
}, [])
|
16 |
|
17 |
+
// Re-fetch stats and documents when onDocumentChange is triggered
|
18 |
+
useEffect(() => {
|
19 |
+
if (!onDocumentChange) return;
|
20 |
+
const handler = () => {
|
21 |
+
loadDocuments();
|
22 |
+
loadStats();
|
23 |
+
};
|
24 |
+
// Listen for custom event
|
25 |
+
window.addEventListener('documentChange', handler);
|
26 |
+
return () => window.removeEventListener('documentChange', handler);
|
27 |
+
}, [onDocumentChange]);
|
28 |
+
|
29 |
const loadDocuments = async () => {
|
30 |
try {
|
31 |
setLoading(true)
|
frontend/components/DocumentUpload.tsx
CHANGED
@@ -55,6 +55,7 @@ export default function DocumentUpload({ disabled, onDocumentChange }: DocumentU
|
|
55 |
)
|
56 |
)
|
57 |
if (onDocumentChange) onDocumentChange();
|
|
|
58 |
} catch (error: any) {
|
59 |
setUploadStatuses(prev =>
|
60 |
prev.map(upload =>
|
@@ -94,6 +95,7 @@ export default function DocumentUpload({ disabled, onDocumentChange }: DocumentU
|
|
94 |
}
|
95 |
setUploadStatuses(prev => prev.filter(upload => upload.file !== file))
|
96 |
if (onDocumentChange) onDocumentChange();
|
|
|
97 |
}
|
98 |
|
99 |
const formatFileSize = (bytes: number) => {
|
|
|
55 |
)
|
56 |
)
|
57 |
if (onDocumentChange) onDocumentChange();
|
58 |
+
window.dispatchEvent(new Event('documentChange'));
|
59 |
} catch (error: any) {
|
60 |
setUploadStatuses(prev =>
|
61 |
prev.map(upload =>
|
|
|
95 |
}
|
96 |
setUploadStatuses(prev => prev.filter(upload => upload.file !== file))
|
97 |
if (onDocumentChange) onDocumentChange();
|
98 |
+
window.dispatchEvent(new Event('documentChange'));
|
99 |
}
|
100 |
|
101 |
const formatFileSize = (bytes: number) => {
|