Spaces:
Running
Running
feat: lightweight RAG with Pinecone + Gemini embeddings + Render deploy
Browse files- .env.example +6 -10
- .github/workflows/deploy.yml +28 -32
- Dockerfile +1 -1
- README.md +92 -53
- app.py +30 -38
- config.py +3 -5
- models.py +29 -4
- rag/embeddings.py +98 -49
- rag/generator.py +8 -5
- rag/retriever.py +57 -66
- render.yaml +19 -0
- requirements.txt +3 -4
- templates/profile.html +39 -7
.env.example
CHANGED
|
@@ -1,15 +1,11 @@
|
|
| 1 |
-
MODEL_NAME=
|
| 2 |
-
OLLAMA_BASE_URL=
|
| 3 |
-
CHROMA_DB_PATH=
|
| 4 |
UPLOAD_FOLDER=uploads
|
| 5 |
|
| 6 |
-
OAUTHLIB_INSECURE_TRANSPORT=
|
| 7 |
-
OAUTHLIB_RELAX_TOKEN_SCOPE=
|
| 8 |
-
TUNNEL_URL=
|
| 9 |
|
| 10 |
SECRET_KEY=
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
UPLOAD_FOLDER=uploads
|
| 2 |
|
| 3 |
+
OAUTHLIB_INSECURE_TRANSPORT=1
|
| 4 |
+
OAUTHLIB_RELAX_TOKEN_SCOPE=1
|
|
|
|
| 5 |
|
| 6 |
SECRET_KEY=
|
| 7 |
+
ENCRYPTION_KEY=
|
| 8 |
+
MONGO_URI=
|
|
|
|
| 9 |
|
| 10 |
+
GOOGLE_CLIENT_ID=
|
| 11 |
+
GOOGLE_CLIENT_SECRET=
|
.github/workflows/deploy.yml
CHANGED
|
@@ -1,47 +1,43 @@
|
|
| 1 |
-
name:
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches: [ "main" ]
|
|
|
|
|
|
|
| 6 |
|
| 7 |
env:
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
SERVICE: rag-pdf-assistant
|
| 11 |
-
IMAGE: ${{ secrets.GCP_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/rag-app/rag-pdf-assistant
|
| 12 |
|
| 13 |
jobs:
|
| 14 |
-
|
| 15 |
runs-on: ubuntu-latest
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
steps:
|
| 18 |
-
-
|
|
|
|
| 19 |
|
| 20 |
-
- name:
|
| 21 |
-
uses:
|
| 22 |
with:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
uses: google-github-actions/setup-gcloud@v2
|
| 27 |
-
|
| 28 |
-
- name: Configure Docker for Artifact Registry
|
| 29 |
-
run: gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev --quiet
|
| 30 |
|
| 31 |
-
- name:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
docker push ${{ env.IMAGE }}:${{ github.sha }}
|
| 37 |
-
docker push ${{ env.IMAGE }}:latest
|
| 38 |
|
| 39 |
-
- name:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
--allow-unauthenticated \
|
| 47 |
-
--set-env-vars "SECRET_KEY=${{ secrets.SECRET_KEY }},ENCRYPTION_KEY=${{ secrets.ENCRYPTION_KEY }},MONGO_URI=${{ secrets.MONGO_URI }},GOOGLE_CLIENT_ID=${{ secrets.GOOGLE_CLIENT_ID }},GOOGLE_CLIENT_SECRET=${{ secrets.GOOGLE_CLIENT_SECRET }}"
|
|
|
|
| 1 |
+
name: Docker Image CI
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches: [ "main" ]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [ "main" ]
|
| 8 |
|
| 9 |
env:
|
| 10 |
+
REGISTRY: ghcr.io
|
| 11 |
+
IMAGE_NAME: ${{ github.repository }}
|
|
|
|
|
|
|
| 12 |
|
| 13 |
jobs:
|
| 14 |
+
build-and-push:
|
| 15 |
runs-on: ubuntu-latest
|
| 16 |
+
permissions:
|
| 17 |
+
contents: read
|
| 18 |
+
packages: write
|
| 19 |
|
| 20 |
steps:
|
| 21 |
+
- name: Checkout repository
|
| 22 |
+
uses: actions/checkout@v4
|
| 23 |
|
| 24 |
+
- name: Log in to GitHub Container Registry
|
| 25 |
+
uses: docker/login-action@v3
|
| 26 |
with:
|
| 27 |
+
registry: ${{ env.REGISTRY }}
|
| 28 |
+
username: ${{ github.actor }}
|
| 29 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
- name: Extract metadata (tags, labels)
|
| 32 |
+
id: meta
|
| 33 |
+
uses: docker/metadata-action@v5
|
| 34 |
+
with:
|
| 35 |
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
- name: Build and push Docker image
|
| 38 |
+
uses: docker/build-push-action@v5
|
| 39 |
+
with:
|
| 40 |
+
context: .
|
| 41 |
+
push: true
|
| 42 |
+
tags: ${{ steps.meta.outputs.tags }}
|
| 43 |
+
labels: ${{ steps.meta.outputs.labels }}
|
|
|
|
|
|
Dockerfile
CHANGED
|
@@ -17,7 +17,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 17 |
COPY . .
|
| 18 |
|
| 19 |
# Create persistent storage directories
|
| 20 |
-
RUN mkdir -p uploads
|
| 21 |
|
| 22 |
# Command to run the application using Gunicorn for production
|
| 23 |
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "app:app"]
|
|
|
|
| 17 |
COPY . .
|
| 18 |
|
| 19 |
# Create persistent storage directories
|
| 20 |
+
RUN mkdir -p uploads
|
| 21 |
|
| 22 |
# Command to run the application using Gunicorn for production
|
| 23 |
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "app:app"]
|
README.md
CHANGED
|
@@ -1,21 +1,22 @@
|
|
| 1 |
# π RAG PDF Assistant
|
| 2 |
|
| 3 |
-
A **Retrieval-Augmented Generation (RAG)**
|
| 4 |
|
| 5 |
---
|
| 6 |
|
| 7 |
## π Features
|
| 8 |
|
| 9 |
-
- π **Multi-format Uploads**: Support for PDF, DOCX, and
|
| 10 |
- π¬ **Interactive Chat**: Query and chat with your documents using AI.
|
| 11 |
-
- π **RAG-based Retrieval**: Fast and accurate
|
| 12 |
-
- π§ **Multiple LLM Support**: Powered by Groq API (Llama 3
|
| 13 |
- π **Robust Authentication**: Supports Google OAuth as well as standard Email/Password login.
|
| 14 |
- πΌοΈ **User Profiles**: Custom profile picture uploads & Google profile pic sync.
|
| 15 |
-
- π€ **Data Isolation**: Per-user
|
| 16 |
-
-
|
| 17 |
-
-
|
| 18 |
- π± **Responsive UI**: Minimal and modern front-end for seamless user experience.
|
|
|
|
| 19 |
|
| 20 |
---
|
| 21 |
|
|
@@ -25,9 +26,10 @@ A **Retrieval-Augmented Generation (RAG)** based document assistant built with F
|
|
| 25 |
|-------|------------|
|
| 26 |
| **Backend** | Flask (Python) |
|
| 27 |
| **Authentication** | Flask-Login + Flask-Dance (Google OAuth) |
|
| 28 |
-
| **Embeddings** |
|
| 29 |
-
| **Vector Store** |
|
| 30 |
-
| **LLMs** | Groq API & Google Gemini |
|
|
|
|
| 31 |
| **Frontend** | HTML, CSS, Vanilla JS |
|
| 32 |
|
| 33 |
---
|
|
@@ -37,14 +39,16 @@ A **Retrieval-Augmented Generation (RAG)** based document assistant built with F
|
|
| 37 |
```text
|
| 38 |
RAG_App/
|
| 39 |
βββ app.py # Main Flask application & routes
|
| 40 |
-
βββ models.py #
|
| 41 |
-
βββ config.py # Configuration & env variables
|
| 42 |
βββ requirements.txt # Python dependencies
|
| 43 |
-
βββ .
|
|
|
|
|
|
|
| 44 |
βββ rag/
|
| 45 |
β βββ chunker.py # Document parsing & chunking logic
|
| 46 |
-
β βββ embeddings.py #
|
| 47 |
-
β βββ retriever.py #
|
| 48 |
β βββ generator.py # LLM integration for answer generation
|
| 49 |
βββ templates/
|
| 50 |
β βββ index.html # File management & upload dashboard
|
|
@@ -52,10 +56,12 @@ RAG_App/
|
|
| 52 |
β βββ login.html # User login page
|
| 53 |
β βββ register.html # User registration page
|
| 54 |
β βββ admin.html # Admin dashboard
|
| 55 |
-
β βββ profile.html # User profile &
|
| 56 |
βββ static/ # Static assets (CSS, JS, profile_pics)
|
| 57 |
βββ uploads/ # User-uploaded files (isolated per user)
|
| 58 |
-
βββ
|
|
|
|
|
|
|
| 59 |
```
|
| 60 |
|
| 61 |
---
|
|
@@ -65,6 +71,7 @@ RAG_App/
|
|
| 65 |
### 1. Clone the Repository
|
| 66 |
```bash
|
| 67 |
git clone https://github.com/param20h/PDF-Assistant-RAG.git
|
|
|
|
| 68 |
```
|
| 69 |
|
| 70 |
### 2. Create and Activate Virtual Environment
|
|
@@ -84,14 +91,27 @@ pip install -r requirements.txt
|
|
| 84 |
```
|
| 85 |
|
| 86 |
### 4. Configure Environment Variables
|
| 87 |
-
Create a `.env` file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
```env
|
| 89 |
-
SECRET_KEY=
|
| 90 |
-
|
|
|
|
| 91 |
GOOGLE_CLIENT_ID=your_google_oauth_client_id
|
| 92 |
GOOGLE_CLIENT_SECRET=your_google_oauth_client_secret
|
| 93 |
```
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
### 5. Run the Application
|
| 97 |
```bash
|
|
@@ -103,59 +123,78 @@ Visit `http://localhost:5000` in your web browser.
|
|
| 103 |
|
| 104 |
---
|
| 105 |
|
| 106 |
-
## π
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
4. Set the Authorized redirect URI to: `http://localhost:5000/login/google/authorized`
|
| 119 |
-
5. Copy your `Client ID` and `Client Secret` into the `.env` file
|
| 120 |
|
| 121 |
---
|
| 122 |
|
| 123 |
## π How It Works (The RAG Pipeline)
|
| 124 |
|
| 125 |
-
1. **Upload**: User uploads a document (PDF, DOCX, TXT).
|
| 126 |
2. **Chunking**: The document is parsed and split into manageable textual chunks.
|
| 127 |
-
3. **Embedding**: Chunks are converted to
|
| 128 |
-
4. **Vector Storage**: Vectors are
|
| 129 |
5. **Querying**: The user submits a question.
|
| 130 |
-
6. **Retrieval**:
|
| 131 |
7. **Generation**: The retrieved context is passed to the selected LLM (Groq or Gemini) to generate an accurate, grounded answer.
|
| 132 |
|
| 133 |
---
|
| 134 |
|
| 135 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
| Tool | Purpose |
|
| 138 |
|------|---------|
|
| 139 |
-
| `GitHub Actions` | CI/CD Pipeline
|
| 140 |
-
| `Bandit` | SAST
|
| 141 |
| `Gitleaks` | Hardcoded secret and credential detection |
|
| 142 |
| `Trivy` | Container and dependency vulnerability checking |
|
| 143 |
| `Snyk` | Advanced dependency vulnerability scanning |
|
| 144 |
-
| `OWASP ZAP` | DAST
|
| 145 |
| `SonarCloud` | Overall code quality and security analysis |
|
| 146 |
-
| `
|
| 147 |
-
|
| 148 |
-
---
|
| 149 |
-
|
| 150 |
-
## π³ Docker Deployment
|
| 151 |
-
|
| 152 |
-
To spin up the application along with its dedicated MongoDB container, simply run:
|
| 153 |
-
|
| 154 |
-
```bash
|
| 155 |
-
docker-compose up -d --build
|
| 156 |
-
```
|
| 157 |
-
|
| 158 |
-
Then visit `http://localhost:5000` in your web browser!
|
| 159 |
|
| 160 |
---
|
| 161 |
|
|
|
|
| 1 |
# π RAG PDF Assistant
|
| 2 |
|
| 3 |
+
A **Retrieval-Augmented Generation (RAG)** document assistant built with Flask, Pinecone, Gemini Embeddings, Groq API, and Google Gemini. Upload PDFs, DOCX, TXT, or MD files and intuitively chat with them using modern AI models.
|
| 4 |
|
| 5 |
---
|
| 6 |
|
| 7 |
## π Features
|
| 8 |
|
| 9 |
+
- π **Multi-format Uploads**: Support for PDF, DOCX, TXT, and Markdown files.
|
| 10 |
- π¬ **Interactive Chat**: Query and chat with your documents using AI.
|
| 11 |
+
- π **RAG-based Retrieval**: Fast and accurate semantic search using Pinecone vector database.
|
| 12 |
+
- π§ **Multiple LLM Support**: Powered by Groq API (Llama 3) and Google Gemini.
|
| 13 |
- π **Robust Authentication**: Supports Google OAuth as well as standard Email/Password login.
|
| 14 |
- πΌοΈ **User Profiles**: Custom profile picture uploads & Google profile pic sync.
|
| 15 |
+
- π€ **Data Isolation**: Per-user namespaces in Pinecone for complete privacy.
|
| 16 |
+
- π‘οΈ **Admin Dashboard**: Admin panel to monitor users and uploaded files.
|
| 17 |
+
- ποΈ **Data Management**: Intuitive UI to delete files and clear vector stores.
|
| 18 |
- π± **Responsive UI**: Minimal and modern front-end for seamless user experience.
|
| 19 |
+
- βοΈ **Lightweight & Cloud-Native**: Zero local ML models β all embeddings and LLM calls are cloud-based API calls, requiring minimal server RAM.
|
| 20 |
|
| 21 |
---
|
| 22 |
|
|
|
|
| 26 |
|-------|------------|
|
| 27 |
| **Backend** | Flask (Python) |
|
| 28 |
| **Authentication** | Flask-Login + Flask-Dance (Google OAuth) |
|
| 29 |
+
| **Embeddings** | Google Gemini (`gemini-embedding-001`) |
|
| 30 |
+
| **Vector Store** | Pinecone (Serverless) |
|
| 31 |
+
| **LLMs** | Groq API (Llama 3.3 70B) & Google Gemini |
|
| 32 |
+
| **User Database** | MongoDB Atlas |
|
| 33 |
| **Frontend** | HTML, CSS, Vanilla JS |
|
| 34 |
|
| 35 |
---
|
|
|
|
| 39 |
```text
|
| 40 |
RAG_App/
|
| 41 |
βββ app.py # Main Flask application & routes
|
| 42 |
+
βββ models.py # MongoDB user model & encrypted key storage
|
| 43 |
+
βββ config.py # Configuration & env variables
|
| 44 |
βββ requirements.txt # Python dependencies
|
| 45 |
+
βββ render.yaml # Render deployment blueprint
|
| 46 |
+
βββ Dockerfile # Docker containerization
|
| 47 |
+
βββ .env.example # Environment variable template
|
| 48 |
βββ rag/
|
| 49 |
β βββ chunker.py # Document parsing & chunking logic
|
| 50 |
+
β βββ embeddings.py # Gemini embeddings + Pinecone upsert
|
| 51 |
+
β βββ retriever.py # Pinecone semantic search & retrieval
|
| 52 |
β βββ generator.py # LLM integration for answer generation
|
| 53 |
βββ templates/
|
| 54 |
β βββ index.html # File management & upload dashboard
|
|
|
|
| 56 |
β βββ login.html # User login page
|
| 57 |
β βββ register.html # User registration page
|
| 58 |
β βββ admin.html # Admin dashboard
|
| 59 |
+
β βββ profile.html # User profile & API key settings
|
| 60 |
βββ static/ # Static assets (CSS, JS, profile_pics)
|
| 61 |
βββ uploads/ # User-uploaded files (isolated per user)
|
| 62 |
+
βββ .github/workflows/
|
| 63 |
+
βββ devsecops.yml # Security scanning pipeline
|
| 64 |
+
βββ deploy.yml # Docker build & GHCR push pipeline
|
| 65 |
```
|
| 66 |
|
| 67 |
---
|
|
|
|
| 71 |
### 1. Clone the Repository
|
| 72 |
```bash
|
| 73 |
git clone https://github.com/param20h/PDF-Assistant-RAG.git
|
| 74 |
+
cd PDF-Assistant-RAG
|
| 75 |
```
|
| 76 |
|
| 77 |
### 2. Create and Activate Virtual Environment
|
|
|
|
| 91 |
```
|
| 92 |
|
| 93 |
### 4. Configure Environment Variables
|
| 94 |
+
Create a `.env` file using the template:
|
| 95 |
+
```bash
|
| 96 |
+
cp .env.example .env
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
Fill in the required server-side variables:
|
| 100 |
```env
|
| 101 |
+
SECRET_KEY=your_secure_random_key
|
| 102 |
+
ENCRYPTION_KEY=your_fernet_key
|
| 103 |
+
MONGO_URI=mongodb+srv://user:pass@cluster.mongodb.net/rag_app
|
| 104 |
GOOGLE_CLIENT_ID=your_google_oauth_client_id
|
| 105 |
GOOGLE_CLIENT_SECRET=your_google_oauth_client_secret
|
| 106 |
```
|
| 107 |
+
|
| 108 |
+
> **Generate keys:**
|
| 109 |
+
> ```bash
|
| 110 |
+
> # SECRET_KEY
|
| 111 |
+
> python -c "import secrets; print(secrets.token_hex(32))"
|
| 112 |
+
> # ENCRYPTION_KEY
|
| 113 |
+
> python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
| 114 |
+
> ```
|
| 115 |
|
| 116 |
### 5. Run the Application
|
| 117 |
```bash
|
|
|
|
| 123 |
|
| 124 |
---
|
| 125 |
|
| 126 |
+
## π User Setup (Per-User API Keys)
|
| 127 |
+
|
| 128 |
+
After registering/logging in, each user must add their own API keys on the **Profile** page:
|
| 129 |
+
|
| 130 |
+
| Service | Required? | Where to Get | Notes |
|
| 131 |
+
|---------|-----------|--------------|-------|
|
| 132 |
+
| **Gemini API Key** | β
Required | [aistudio.google.com](https://aistudio.google.com) | Free β used for embeddings & chat |
|
| 133 |
+
| **Pinecone API Key** | β
Required | [app.pinecone.io](https://app.pinecone.io) | Free tier available |
|
| 134 |
+
| **Pinecone Index Name** | β
Required | Pinecone Dashboard | Create: dim `3072`, metric `cosine` |
|
| 135 |
+
| **Groq API Key** | Optional | [console.groq.com](https://console.groq.com) | For Llama 3 chat generation |
|
| 136 |
|
| 137 |
+
### π² Pinecone Index Setup
|
| 138 |
+
1. Create a free account at [pinecone.io](https://app.pinecone.io)
|
| 139 |
+
2. Create a **Serverless** index with:
|
| 140 |
+
- **Dimension**: `3072`
|
| 141 |
+
- **Metric**: `cosine`
|
| 142 |
+
3. Copy your API key and index name into the Profile page
|
| 143 |
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## π Google OAuth Setup
|
| 147 |
+
1. Go to **Google Cloud Console** β [console.cloud.google.com](https://console.cloud.google.com)
|
| 148 |
+
2. Create a new project and navigate to **APIs & Services** β **Credentials**
|
| 149 |
+
3. Click **Create Credentials** β **OAuth Client ID**
|
| 150 |
4. Set the Authorized redirect URI to: `http://localhost:5000/login/google/authorized`
|
| 151 |
+
5. Copy your `Client ID` and `Client Secret` into the `.env` file
|
| 152 |
|
| 153 |
---
|
| 154 |
|
| 155 |
## π How It Works (The RAG Pipeline)
|
| 156 |
|
| 157 |
+
1. **Upload**: User uploads a document (PDF, DOCX, TXT, or MD).
|
| 158 |
2. **Chunking**: The document is parsed and split into manageable textual chunks.
|
| 159 |
+
3. **Embedding**: Chunks are converted to 3072-dimensional vectors using `gemini-embedding-001`.
|
| 160 |
+
4. **Vector Storage**: Vectors are stored in the user's Pinecone namespace.
|
| 161 |
5. **Querying**: The user submits a question.
|
| 162 |
+
6. **Retrieval**: Pinecone retrieves the most semantically relevant chunks.
|
| 163 |
7. **Generation**: The retrieved context is passed to the selected LLM (Groq or Gemini) to generate an accurate, grounded answer.
|
| 164 |
|
| 165 |
---
|
| 166 |
|
| 167 |
+
## π Deployment
|
| 168 |
+
|
| 169 |
+
### Deploy to Render (Recommended β Free)
|
| 170 |
+
1. Push your code to GitHub
|
| 171 |
+
2. Go to [Render](https://dashboard.render.com) β **New** β **Web Service**
|
| 172 |
+
3. Connect your GitHub repository
|
| 173 |
+
4. Render auto-detects `render.yaml` and configures everything
|
| 174 |
+
5. Add environment variables: `SECRET_KEY`, `ENCRYPTION_KEY`, `MONGO_URI`, `GOOGLE_CLIENT_ID`, `GOOGLE_CLIENT_SECRET`
|
| 175 |
+
6. Update Google OAuth redirect URI to: `https://your-app.onrender.com/login/google/authorized`
|
| 176 |
+
7. Deploy!
|
| 177 |
+
|
| 178 |
+
### Deploy with Docker
|
| 179 |
+
```bash
|
| 180 |
+
docker build -t rag-app .
|
| 181 |
+
docker run -p 5000:5000 --env-file .env rag-app
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
---
|
| 185 |
+
|
| 186 |
+
## π DevSecOps Pipeline
|
| 187 |
|
| 188 |
| Tool | Purpose |
|
| 189 |
|------|---------|
|
| 190 |
+
| `GitHub Actions` | CI/CD Pipeline |
|
| 191 |
+
| `Bandit` | SAST β Python security vulnerability scanning |
|
| 192 |
| `Gitleaks` | Hardcoded secret and credential detection |
|
| 193 |
| `Trivy` | Container and dependency vulnerability checking |
|
| 194 |
| `Snyk` | Advanced dependency vulnerability scanning |
|
| 195 |
+
| `OWASP ZAP` | DAST β Dynamic web security scanning |
|
| 196 |
| `SonarCloud` | Overall code quality and security analysis |
|
| 197 |
+
| `GHCR` | Docker image hosting via GitHub Container Registry |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
---
|
| 200 |
|
app.py
CHANGED
|
@@ -26,16 +26,14 @@ def patched_fetch(self, *args, **kwargs):
|
|
| 26 |
return original_fetch(self, *args, **kwargs)
|
| 27 |
OAuth2Session.fetch_token = patched_fetch
|
| 28 |
|
| 29 |
-
import
|
| 30 |
-
import shutil
|
| 31 |
-
from flask import Flask, request, jsonify, render_template, redirect, url_for
|
| 32 |
from flask_login import LoginManager, login_user, logout_user, login_required, current_user
|
| 33 |
from flask_dance.contrib.google import make_google_blueprint, google
|
| 34 |
from flask_dance.consumer import oauth_authorized
|
| 35 |
from dotenv import load_dotenv
|
| 36 |
from models import User
|
| 37 |
from rag.chunker import load_and_chunk
|
| 38 |
-
from rag.embeddings import store_embeddings
|
| 39 |
from rag.retriever import retrieve_chunks
|
| 40 |
from rag.generator import generate_answer
|
| 41 |
from config import SECRET_KEY, MONGO_URI, GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET
|
|
@@ -47,10 +45,7 @@ app = Flask(__name__)
|
|
| 47 |
app.config["SECRET_KEY"] = SECRET_KEY
|
| 48 |
app.config["UPLOAD_FOLDER"] = "uploads"
|
| 49 |
|
| 50 |
-
META_PATH = os.path.join("vectorstore", "metadata.pkl")
|
| 51 |
-
|
| 52 |
os.makedirs("uploads", exist_ok=True)
|
| 53 |
-
os.makedirs("vectorstore", exist_ok=True)
|
| 54 |
|
| 55 |
# ββ Google Blueprint ββββββββββββββββββββββββββββββββββ
|
| 56 |
google_bp = make_google_blueprint(
|
|
@@ -172,10 +167,9 @@ def get_user_upload_folder(username):
|
|
| 172 |
os.makedirs(folder, exist_ok=True)
|
| 173 |
return folder
|
| 174 |
|
| 175 |
-
def
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
return os.path.join(path, "metadata.pkl")
|
| 179 |
|
| 180 |
# ββ Auth Routes βββββββββββββββββββββββββββββββββββββββ
|
| 181 |
|
|
@@ -254,7 +248,6 @@ def admin_dashboard():
|
|
| 254 |
@app.route("/download/<username>/<filename>")
|
| 255 |
@login_required
|
| 256 |
def download_file(username, filename):
|
| 257 |
-
# Only the owner or an admin can download
|
| 258 |
if current_user.username != username and not current_user.is_admin:
|
| 259 |
return "Unauthorized", 403
|
| 260 |
|
|
@@ -263,7 +256,6 @@ def download_file(username, filename):
|
|
| 263 |
if not os.path.exists(filepath):
|
| 264 |
return "File not found", 404
|
| 265 |
|
| 266 |
-
from flask import send_file
|
| 267 |
return send_file(filepath, as_attachment=True)
|
| 268 |
|
| 269 |
@app.route("/profile", methods=["GET"])
|
|
@@ -278,19 +270,31 @@ def update_settings():
|
|
| 278 |
data = request.get_json()
|
| 279 |
current_user.preferred_model = data.get("preferred_model", "groq")
|
| 280 |
|
| 281 |
-
#
|
| 282 |
groq_req = data.get("groq_key", "").strip()
|
| 283 |
-
gemini_req = data.get("gemini_key", "").strip()
|
| 284 |
-
|
| 285 |
if groq_req == "DELETE":
|
| 286 |
current_user.set_groq_key(None)
|
| 287 |
elif groq_req:
|
| 288 |
current_user.set_groq_key(groq_req)
|
| 289 |
|
|
|
|
|
|
|
| 290 |
if gemini_req == "DELETE":
|
| 291 |
current_user.set_gemini_key(None)
|
| 292 |
elif gemini_req:
|
| 293 |
current_user.set_gemini_key(gemini_req)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
current_user.save()
|
| 296 |
return jsonify({"message": "Settings updated successfully!"}), 200
|
|
@@ -311,8 +315,8 @@ def get_files():
|
|
| 311 |
@login_required
|
| 312 |
def upload():
|
| 313 |
try:
|
| 314 |
-
if not
|
| 315 |
-
return jsonify({"error": "β οΈ Please add your
|
| 316 |
|
| 317 |
if "pdf" not in request.files:
|
| 318 |
return jsonify({"error": "No file found"}), 400
|
|
@@ -329,9 +333,8 @@ def upload():
|
|
| 329 |
filepath = os.path.join(folder, file.filename)
|
| 330 |
file.save(filepath)
|
| 331 |
|
| 332 |
-
meta_path = get_user_meta_path(current_user.username)
|
| 333 |
chunks = load_and_chunk(filepath)
|
| 334 |
-
store_embeddings(chunks, file.filename,
|
| 335 |
|
| 336 |
return jsonify({"message": f"{file.filename} uploaded successfully!"}), 200
|
| 337 |
|
|
@@ -342,8 +345,8 @@ def upload():
|
|
| 342 |
@login_required
|
| 343 |
def ask():
|
| 344 |
try:
|
| 345 |
-
if not
|
| 346 |
-
return jsonify({"error": "β οΈ Please add your
|
| 347 |
|
| 348 |
data = request.get_json()
|
| 349 |
question = data.get("question", "").strip()
|
|
@@ -352,8 +355,7 @@ def ask():
|
|
| 352 |
if not question:
|
| 353 |
return jsonify({"error": "Question cannot be empty"}), 400
|
| 354 |
|
| 355 |
-
|
| 356 |
-
context_chunks = retrieve_chunks(question, filename, meta_path)
|
| 357 |
answer = generate_answer(question, context_chunks, current_user)
|
| 358 |
|
| 359 |
username = current_user.username
|
|
@@ -410,13 +412,8 @@ def delete():
|
|
| 410 |
|
| 411 |
os.remove(filepath)
|
| 412 |
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
with open(meta_path, "rb") as f:
|
| 416 |
-
metadata = pickle.load(f)
|
| 417 |
-
new_metadata = [m for m in metadata if m["filename"] != filename]
|
| 418 |
-
with open(meta_path, "wb") as f:
|
| 419 |
-
pickle.dump(new_metadata, f)
|
| 420 |
|
| 421 |
return jsonify({"message": f"{filename} deleted successfully!"}), 200
|
| 422 |
|
|
@@ -427,13 +424,8 @@ def delete():
|
|
| 427 |
@login_required
|
| 428 |
def clear_vectorstore():
|
| 429 |
try:
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
if os.path.exists(vectorstore_path):
|
| 434 |
-
shutil.rmtree(vectorstore_path)
|
| 435 |
-
os.makedirs(vectorstore_path, exist_ok=True)
|
| 436 |
-
|
| 437 |
return jsonify({"message": "Vector store cleared successfully!"}), 200
|
| 438 |
|
| 439 |
except Exception as e:
|
|
|
|
| 26 |
return original_fetch(self, *args, **kwargs)
|
| 27 |
OAuth2Session.fetch_token = patched_fetch
|
| 28 |
|
| 29 |
+
from flask import Flask, request, jsonify, render_template, redirect, url_for, send_file
|
|
|
|
|
|
|
| 30 |
from flask_login import LoginManager, login_user, logout_user, login_required, current_user
|
| 31 |
from flask_dance.contrib.google import make_google_blueprint, google
|
| 32 |
from flask_dance.consumer import oauth_authorized
|
| 33 |
from dotenv import load_dotenv
|
| 34 |
from models import User
|
| 35 |
from rag.chunker import load_and_chunk
|
| 36 |
+
from rag.embeddings import store_embeddings, delete_embeddings, clear_all_embeddings
|
| 37 |
from rag.retriever import retrieve_chunks
|
| 38 |
from rag.generator import generate_answer
|
| 39 |
from config import SECRET_KEY, MONGO_URI, GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET
|
|
|
|
| 45 |
app.config["SECRET_KEY"] = SECRET_KEY
|
| 46 |
app.config["UPLOAD_FOLDER"] = "uploads"
|
| 47 |
|
|
|
|
|
|
|
| 48 |
os.makedirs("uploads", exist_ok=True)
|
|
|
|
| 49 |
|
| 50 |
# ββ Google Blueprint ββββββββββββββββββββββββββββββββββ
|
| 51 |
google_bp = make_google_blueprint(
|
|
|
|
| 167 |
os.makedirs(folder, exist_ok=True)
|
| 168 |
return folder
|
| 169 |
|
| 170 |
+
def user_has_rag_keys(user):
|
| 171 |
+
"""Check if user has all required keys for RAG operations."""
|
| 172 |
+
return (user.get_gemini_key() and user.get_pinecone_key() and user.pinecone_index_name)
|
|
|
|
| 173 |
|
| 174 |
# ββ Auth Routes βββββββββββββββββββββββββββββββββββββββ
|
| 175 |
|
|
|
|
| 248 |
@app.route("/download/<username>/<filename>")
|
| 249 |
@login_required
|
| 250 |
def download_file(username, filename):
|
|
|
|
| 251 |
if current_user.username != username and not current_user.is_admin:
|
| 252 |
return "Unauthorized", 403
|
| 253 |
|
|
|
|
| 256 |
if not os.path.exists(filepath):
|
| 257 |
return "File not found", 404
|
| 258 |
|
|
|
|
| 259 |
return send_file(filepath, as_attachment=True)
|
| 260 |
|
| 261 |
@app.route("/profile", methods=["GET"])
|
|
|
|
| 270 |
data = request.get_json()
|
| 271 |
current_user.preferred_model = data.get("preferred_model", "groq")
|
| 272 |
|
| 273 |
+
# ββ Groq Key ββ
|
| 274 |
groq_req = data.get("groq_key", "").strip()
|
|
|
|
|
|
|
| 275 |
if groq_req == "DELETE":
|
| 276 |
current_user.set_groq_key(None)
|
| 277 |
elif groq_req:
|
| 278 |
current_user.set_groq_key(groq_req)
|
| 279 |
|
| 280 |
+
# ββ Gemini Key ββ
|
| 281 |
+
gemini_req = data.get("gemini_key", "").strip()
|
| 282 |
if gemini_req == "DELETE":
|
| 283 |
current_user.set_gemini_key(None)
|
| 284 |
elif gemini_req:
|
| 285 |
current_user.set_gemini_key(gemini_req)
|
| 286 |
+
|
| 287 |
+
# ββ Pinecone Key ββ
|
| 288 |
+
pinecone_req = data.get("pinecone_key", "").strip()
|
| 289 |
+
if pinecone_req == "DELETE":
|
| 290 |
+
current_user.set_pinecone_key(None)
|
| 291 |
+
elif pinecone_req:
|
| 292 |
+
current_user.set_pinecone_key(pinecone_req)
|
| 293 |
+
|
| 294 |
+
# ββ Pinecone Index Name ββ
|
| 295 |
+
pinecone_index = data.get("pinecone_index", "").strip()
|
| 296 |
+
if pinecone_index:
|
| 297 |
+
current_user.pinecone_index_name = pinecone_index
|
| 298 |
|
| 299 |
current_user.save()
|
| 300 |
return jsonify({"message": "Settings updated successfully!"}), 200
|
|
|
|
| 315 |
@login_required
|
| 316 |
def upload():
|
| 317 |
try:
|
| 318 |
+
if not user_has_rag_keys(current_user):
|
| 319 |
+
return jsonify({"error": "β οΈ Please add your Gemini API key, Pinecone API key, and Pinecone index name in the Profile page to upload and chat."}), 400
|
| 320 |
|
| 321 |
if "pdf" not in request.files:
|
| 322 |
return jsonify({"error": "No file found"}), 400
|
|
|
|
| 333 |
filepath = os.path.join(folder, file.filename)
|
| 334 |
file.save(filepath)
|
| 335 |
|
|
|
|
| 336 |
chunks = load_and_chunk(filepath)
|
| 337 |
+
store_embeddings(chunks, file.filename, current_user)
|
| 338 |
|
| 339 |
return jsonify({"message": f"{file.filename} uploaded successfully!"}), 200
|
| 340 |
|
|
|
|
| 345 |
@login_required
|
| 346 |
def ask():
|
| 347 |
try:
|
| 348 |
+
if not user_has_rag_keys(current_user):
|
| 349 |
+
return jsonify({"error": "β οΈ Please add your Gemini API key, Pinecone API key, and Pinecone index name in the Profile page to upload and chat."}), 400
|
| 350 |
|
| 351 |
data = request.get_json()
|
| 352 |
question = data.get("question", "").strip()
|
|
|
|
| 355 |
if not question:
|
| 356 |
return jsonify({"error": "Question cannot be empty"}), 400
|
| 357 |
|
| 358 |
+
context_chunks = retrieve_chunks(question, filename, current_user)
|
|
|
|
| 359 |
answer = generate_answer(question, context_chunks, current_user)
|
| 360 |
|
| 361 |
username = current_user.username
|
|
|
|
| 412 |
|
| 413 |
os.remove(filepath)
|
| 414 |
|
| 415 |
+
# Delete vectors from Pinecone
|
| 416 |
+
delete_embeddings(filename, current_user)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
return jsonify({"message": f"{filename} deleted successfully!"}), 200
|
| 419 |
|
|
|
|
| 424 |
@login_required
|
| 425 |
def clear_vectorstore():
|
| 426 |
try:
|
| 427 |
+
# Clear all vectors in user's Pinecone namespace
|
| 428 |
+
clear_all_embeddings(current_user)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
return jsonify({"message": "Vector store cleared successfully!"}), 200
|
| 430 |
|
| 431 |
except Exception as e:
|
config.py
CHANGED
|
@@ -12,15 +12,13 @@ MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/rag_app")
|
|
| 12 |
UPLOAD_FOLDER = "uploads"
|
| 13 |
ALLOWED_EXTENSIONS = {"pdf", "docx", "txt", "md"}
|
| 14 |
|
| 15 |
-
# ββ
|
| 16 |
-
|
| 17 |
-
CHROMA_DB_PATH = "vectorstore"
|
| 18 |
-
TOP_K = 50
|
| 19 |
CHUNK_SIZE = 500
|
| 20 |
CHUNK_OVERLAP = 50
|
| 21 |
|
| 22 |
# ββ Groq Config ββββββββββββββββββββββββββββββββββββββ
|
| 23 |
-
GROQ_MODEL = "llama-3.3-70b-versatile"
|
| 24 |
|
| 25 |
# ββ Google OAuth Config ββββββββββββββββββββββββββββββ
|
| 26 |
GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
|
|
|
|
| 12 |
UPLOAD_FOLDER = "uploads"
|
| 13 |
ALLOWED_EXTENSIONS = {"pdf", "docx", "txt", "md"}
|
| 14 |
|
| 15 |
+
# ββ RAG Config βββββββββββββββββββββββββββββββββββββββ
|
| 16 |
+
TOP_K = 5
|
|
|
|
|
|
|
| 17 |
CHUNK_SIZE = 500
|
| 18 |
CHUNK_OVERLAP = 50
|
| 19 |
|
| 20 |
# ββ Groq Config ββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
GROQ_MODEL = "llama-3.3-70b-versatile"
|
| 22 |
|
| 23 |
# ββ Google OAuth Config ββββββββββββββββββββββββββββββ
|
| 24 |
GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
|
models.py
CHANGED
|
@@ -7,15 +7,19 @@ from config import ENCRYPTION_KEY, MONGO_URI
|
|
| 7 |
|
| 8 |
# Connect to MongoDB
|
| 9 |
mongo_client = pymongo.MongoClient(MONGO_URI)
|
| 10 |
-
|
| 11 |
-
|
|
|
|
| 12 |
db = mongo_client["rag_app"]
|
| 13 |
|
| 14 |
users_collection = db["users"]
|
| 15 |
cipher_suite = Fernet(ENCRYPTION_KEY)
|
| 16 |
|
| 17 |
class User(UserMixin):
|
| 18 |
-
def __init__(self, username, email, password=None, _id=None, google_id=None,
|
|
|
|
|
|
|
|
|
|
| 19 |
self.username = username
|
| 20 |
self.email = email
|
| 21 |
self.password = password
|
|
@@ -23,6 +27,8 @@ class User(UserMixin):
|
|
| 23 |
self.profile_pic = profile_pic
|
| 24 |
self.groq_api_key = groq_api_key
|
| 25 |
self.gemini_api_key = gemini_api_key
|
|
|
|
|
|
|
| 26 |
self.preferred_model = preferred_model
|
| 27 |
self.is_admin = is_admin
|
| 28 |
if _id:
|
|
@@ -31,7 +37,7 @@ class User(UserMixin):
|
|
| 31 |
self.id = None
|
| 32 |
|
| 33 |
def get_id(self):
|
| 34 |
-
return self.id or self.username
|
| 35 |
|
| 36 |
def save(self):
|
| 37 |
user_data = {
|
|
@@ -42,6 +48,8 @@ class User(UserMixin):
|
|
| 42 |
"profile_pic": self.profile_pic,
|
| 43 |
"groq_api_key": self.groq_api_key,
|
| 44 |
"gemini_api_key": self.gemini_api_key,
|
|
|
|
|
|
|
| 45 |
"preferred_model": self.preferred_model,
|
| 46 |
"is_admin": self.is_admin
|
| 47 |
}
|
|
@@ -58,6 +66,7 @@ class User(UserMixin):
|
|
| 58 |
def check_password(self, password):
|
| 59 |
return check_password_hash(self.password, password)
|
| 60 |
|
|
|
|
| 61 |
def set_groq_key(self, api_key):
|
| 62 |
if api_key:
|
| 63 |
self.groq_api_key = cipher_suite.encrypt(api_key.encode('utf-8')).decode('utf-8')
|
|
@@ -72,6 +81,7 @@ class User(UserMixin):
|
|
| 72 |
return None
|
| 73 |
return None
|
| 74 |
|
|
|
|
| 75 |
def set_gemini_key(self, api_key):
|
| 76 |
if api_key:
|
| 77 |
self.gemini_api_key = cipher_suite.encrypt(api_key.encode('utf-8')).decode('utf-8')
|
|
@@ -86,6 +96,21 @@ class User(UserMixin):
|
|
| 86 |
return None
|
| 87 |
return None
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
@classmethod
|
| 90 |
def get(cls, user_id):
|
| 91 |
try:
|
|
|
|
| 7 |
|
| 8 |
# Connect to MongoDB
|
| 9 |
mongo_client = pymongo.MongoClient(MONGO_URI)
|
| 10 |
+
try:
|
| 11 |
+
db = mongo_client.get_default_database()
|
| 12 |
+
except pymongo.errors.ConfigurationError:
|
| 13 |
db = mongo_client["rag_app"]
|
| 14 |
|
| 15 |
users_collection = db["users"]
|
| 16 |
cipher_suite = Fernet(ENCRYPTION_KEY)
|
| 17 |
|
| 18 |
class User(UserMixin):
|
| 19 |
+
def __init__(self, username, email, password=None, _id=None, google_id=None,
|
| 20 |
+
profile_pic=None, groq_api_key=None, gemini_api_key=None,
|
| 21 |
+
pinecone_api_key=None, pinecone_index_name=None,
|
| 22 |
+
preferred_model="groq", is_admin=False):
|
| 23 |
self.username = username
|
| 24 |
self.email = email
|
| 25 |
self.password = password
|
|
|
|
| 27 |
self.profile_pic = profile_pic
|
| 28 |
self.groq_api_key = groq_api_key
|
| 29 |
self.gemini_api_key = gemini_api_key
|
| 30 |
+
self.pinecone_api_key = pinecone_api_key
|
| 31 |
+
self.pinecone_index_name = pinecone_index_name or ""
|
| 32 |
self.preferred_model = preferred_model
|
| 33 |
self.is_admin = is_admin
|
| 34 |
if _id:
|
|
|
|
| 37 |
self.id = None
|
| 38 |
|
| 39 |
def get_id(self):
|
| 40 |
+
return self.id or self.username
|
| 41 |
|
| 42 |
def save(self):
|
| 43 |
user_data = {
|
|
|
|
| 48 |
"profile_pic": self.profile_pic,
|
| 49 |
"groq_api_key": self.groq_api_key,
|
| 50 |
"gemini_api_key": self.gemini_api_key,
|
| 51 |
+
"pinecone_api_key": self.pinecone_api_key,
|
| 52 |
+
"pinecone_index_name": self.pinecone_index_name,
|
| 53 |
"preferred_model": self.preferred_model,
|
| 54 |
"is_admin": self.is_admin
|
| 55 |
}
|
|
|
|
| 66 |
def check_password(self, password):
|
| 67 |
return check_password_hash(self.password, password)
|
| 68 |
|
| 69 |
+
# ββ Groq Key βββββββββββββββββββββββββββββββββββββ
|
| 70 |
def set_groq_key(self, api_key):
|
| 71 |
if api_key:
|
| 72 |
self.groq_api_key = cipher_suite.encrypt(api_key.encode('utf-8')).decode('utf-8')
|
|
|
|
| 81 |
return None
|
| 82 |
return None
|
| 83 |
|
| 84 |
+
# ββ Gemini Key βββββββββββββββββββββββββββββββββββ
|
| 85 |
def set_gemini_key(self, api_key):
|
| 86 |
if api_key:
|
| 87 |
self.gemini_api_key = cipher_suite.encrypt(api_key.encode('utf-8')).decode('utf-8')
|
|
|
|
| 96 |
return None
|
| 97 |
return None
|
| 98 |
|
| 99 |
+
# ββ Pinecone Key βββββββββββββββββββββββββββββββββ
|
| 100 |
+
def set_pinecone_key(self, api_key):
|
| 101 |
+
if api_key:
|
| 102 |
+
self.pinecone_api_key = cipher_suite.encrypt(api_key.encode('utf-8')).decode('utf-8')
|
| 103 |
+
else:
|
| 104 |
+
self.pinecone_api_key = None
|
| 105 |
+
|
| 106 |
+
def get_pinecone_key(self):
|
| 107 |
+
if self.pinecone_api_key:
|
| 108 |
+
try:
|
| 109 |
+
return cipher_suite.decrypt(self.pinecone_api_key.encode('utf-8')).decode('utf-8')
|
| 110 |
+
except Exception:
|
| 111 |
+
return None
|
| 112 |
+
return None
|
| 113 |
+
|
| 114 |
@classmethod
|
| 115 |
def get(cls, user_id):
|
| 116 |
try:
|
rag/embeddings.py
CHANGED
|
@@ -1,49 +1,98 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
return
|
| 15 |
-
|
| 16 |
-
# ββ
|
| 17 |
-
def
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google import genai
|
| 2 |
+
from google.genai import types
|
| 3 |
+
from pinecone import Pinecone
|
| 4 |
+
from config import CHUNK_SIZE, CHUNK_OVERLAP
|
| 5 |
+
|
| 6 |
+
# ββ Gemini Embedding βββββββββββββββββββββββββββββββββ
|
| 7 |
+
def embed_text(text, gemini_key):
|
| 8 |
+
"""Generate embedding using Gemini's free gemini-embedding-001 model."""
|
| 9 |
+
client = genai.Client(api_key=gemini_key)
|
| 10 |
+
result = client.models.embed_content(
|
| 11 |
+
model="gemini-embedding-001",
|
| 12 |
+
contents=text
|
| 13 |
+
)
|
| 14 |
+
return result.embeddings[0].values # 3072-dimensional vector
|
| 15 |
+
|
| 16 |
+
# ββ Get Pinecone Index βββββββββββββββββββββββββββββββ
|
| 17 |
+
def get_pinecone_index(pinecone_key, index_name):
|
| 18 |
+
"""Connect to user's Pinecone index."""
|
| 19 |
+
pc = Pinecone(api_key=pinecone_key)
|
| 20 |
+
return pc.Index(index_name)
|
| 21 |
+
|
| 22 |
+
# ββ Store Embeddings in Pinecone βββββββββββββββββββββ
|
| 23 |
+
def store_embeddings(chunks, filename, user):
|
| 24 |
+
"""Embed chunks using Gemini and upsert into user's Pinecone index."""
|
| 25 |
+
gemini_key = user.get_gemini_key()
|
| 26 |
+
pinecone_key = user.get_pinecone_key()
|
| 27 |
+
index_name = user.pinecone_index_name
|
| 28 |
+
|
| 29 |
+
if not gemini_key:
|
| 30 |
+
raise ValueError("Gemini API key is required for embeddings. Please add it in your Profile.")
|
| 31 |
+
if not pinecone_key or not index_name:
|
| 32 |
+
raise ValueError("Pinecone API key and index name are required. Please add them in your Profile.")
|
| 33 |
+
|
| 34 |
+
index = get_pinecone_index(pinecone_key, index_name)
|
| 35 |
+
namespace = user.username
|
| 36 |
+
|
| 37 |
+
# Batch upsert vectors
|
| 38 |
+
batch_size = 50
|
| 39 |
+
for i in range(0, len(chunks), batch_size):
|
| 40 |
+
batch = chunks[i:i + batch_size]
|
| 41 |
+
vectors = []
|
| 42 |
+
|
| 43 |
+
for j, chunk in enumerate(batch):
|
| 44 |
+
embedding = embed_text(chunk["text"], gemini_key)
|
| 45 |
+
vector_id = f"{filename}_{i + j}"
|
| 46 |
+
|
| 47 |
+
vectors.append({
|
| 48 |
+
"id": vector_id,
|
| 49 |
+
"values": embedding,
|
| 50 |
+
"metadata": {
|
| 51 |
+
"text": chunk["text"],
|
| 52 |
+
"filename": filename,
|
| 53 |
+
"page": chunk["page"],
|
| 54 |
+
"chunk_index": i + j
|
| 55 |
+
}
|
| 56 |
+
})
|
| 57 |
+
|
| 58 |
+
index.upsert(vectors=vectors, namespace=namespace)
|
| 59 |
+
|
| 60 |
+
# ββ Delete Vectors by Filename βββββββββββββββββββββββ
|
| 61 |
+
def delete_embeddings(filename, user):
|
| 62 |
+
"""Delete all vectors for a specific file from user's Pinecone index."""
|
| 63 |
+
pinecone_key = user.get_pinecone_key()
|
| 64 |
+
index_name = user.pinecone_index_name
|
| 65 |
+
|
| 66 |
+
if not pinecone_key or not index_name:
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
index = get_pinecone_index(pinecone_key, index_name)
|
| 70 |
+
namespace = user.username
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
dummy_vector = [0.0] * 3072
|
| 74 |
+
results = index.query(
|
| 75 |
+
vector=dummy_vector,
|
| 76 |
+
top_k=10000,
|
| 77 |
+
namespace=namespace,
|
| 78 |
+
filter={"filename": {"$eq": filename}},
|
| 79 |
+
include_metadata=False
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
if results.matches:
|
| 83 |
+
ids_to_delete = [match.id for match in results.matches]
|
| 84 |
+
index.delete(ids=ids_to_delete, namespace=namespace)
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"Error deleting embeddings: {e}")
|
| 87 |
+
|
| 88 |
+
# ββ Clear All Vectors for User βββββββββββββββββββββββ
|
| 89 |
+
def clear_all_embeddings(user):
|
| 90 |
+
"""Delete all vectors in user's namespace."""
|
| 91 |
+
pinecone_key = user.get_pinecone_key()
|
| 92 |
+
index_name = user.pinecone_index_name
|
| 93 |
+
|
| 94 |
+
if not pinecone_key or not index_name:
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
index = get_pinecone_index(pinecone_key, index_name)
|
| 98 |
+
index.delete(delete_all=True, namespace=user.username)
|
rag/generator.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import os
|
| 2 |
-
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
|
| 5 |
-
load_dotenv()
|
| 6 |
|
| 7 |
from groq import Groq
|
| 8 |
from config import GROQ_MODEL
|
|
@@ -44,9 +45,11 @@ Answer:"""
|
|
| 44 |
if not key:
|
| 45 |
return "β No Gemini API key available. Please add it in your Profile settings."
|
| 46 |
|
| 47 |
-
genai.
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
return response.text
|
| 51 |
else:
|
| 52 |
key = user.get_groq_key() if user else None
|
|
|
|
| 1 |
import os
|
| 2 |
+
from google import genai
|
| 3 |
+
from google.genai import types
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
+
load_dotenv()
|
| 7 |
|
| 8 |
from groq import Groq
|
| 9 |
from config import GROQ_MODEL
|
|
|
|
| 45 |
if not key:
|
| 46 |
return "β No Gemini API key available. Please add it in your Profile settings."
|
| 47 |
|
| 48 |
+
client = genai.Client(api_key=key, http_options=types.HttpOptions(api_version="v1"))
|
| 49 |
+
response = client.models.generate_content(
|
| 50 |
+
model="gemini-2.0-flash",
|
| 51 |
+
contents=prompt
|
| 52 |
+
)
|
| 53 |
return response.text
|
| 54 |
else:
|
| 55 |
key = user.get_groq_key() if user else None
|
rag/retriever.py
CHANGED
|
@@ -1,80 +1,71 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
return []
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
if not
|
| 20 |
return []
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
# ββ Fix: check distances is not empty ββ
|
| 38 |
-
if len(distances) == 0 or len(distances[0]) == 0:
|
| 39 |
-
return []
|
| 40 |
|
| 41 |
-
|
|
|
|
| 42 |
|
| 43 |
-
chunks = []
|
| 44 |
-
for i, idx in enumerate(indices[0]):
|
| 45 |
-
# ββ Fix: skip invalid indices ββ
|
| 46 |
-
if idx == -1 or idx >= len(metadata):
|
| 47 |
-
continue
|
| 48 |
-
|
| 49 |
-
if filename and metadata[idx]["filename"] != filename:
|
| 50 |
-
continue
|
| 51 |
-
|
| 52 |
-
raw_score = float(distances[0][i])
|
| 53 |
-
confidence = round((1 - (raw_score / max_distance)) * 100, 2)
|
| 54 |
-
|
| 55 |
-
chunks.append({
|
| 56 |
-
"text": metadata[idx]["text"],
|
| 57 |
-
"filename": metadata[idx]["filename"],
|
| 58 |
-
"page": metadata[idx]["page"],
|
| 59 |
-
"score": raw_score,
|
| 60 |
-
"confidence": confidence
|
| 61 |
-
})
|
| 62 |
-
|
| 63 |
-
if len(chunks) == TOP_K:
|
| 64 |
-
break
|
| 65 |
-
|
| 66 |
-
# fallback: if no specific good match, and the user asks a very generic question
|
| 67 |
-
# and we have chunks for this file, just return the first chunk of the file
|
| 68 |
-
if not chunks and filename:
|
| 69 |
-
for idx in range(len(metadata)):
|
| 70 |
-
if metadata[idx]["filename"] == filename:
|
| 71 |
chunks.append({
|
| 72 |
-
"text": metadata
|
| 73 |
-
"filename": metadata
|
| 74 |
-
"page": metadata
|
| 75 |
-
"score":
|
| 76 |
-
"confidence":
|
| 77 |
})
|
| 78 |
-
break
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google import genai
|
| 2 |
+
from google.genai import types
|
| 3 |
+
from pinecone import Pinecone
|
| 4 |
+
from config import TOP_K
|
| 5 |
+
|
| 6 |
+
# ββ Gemini Embedding βββββββββββββββββββββββββββββββββ
|
| 7 |
+
def embed_query(query, gemini_key):
|
| 8 |
+
"""Generate query embedding using Gemini's gemini-embedding-001."""
|
| 9 |
+
client = genai.Client(api_key=gemini_key)
|
| 10 |
+
result = client.models.embed_content(
|
| 11 |
+
model="gemini-embedding-001",
|
| 12 |
+
contents=query
|
| 13 |
+
)
|
| 14 |
+
return result.embeddings[0].values
|
| 15 |
+
|
| 16 |
+
# ββ Retrieve Chunks from Pinecone ββββββββββββββββββββ
|
| 17 |
+
def retrieve_chunks(query, filename=None, user=None):
|
| 18 |
+
"""Query user's Pinecone index for relevant chunks."""
|
| 19 |
+
if not user:
|
| 20 |
return []
|
| 21 |
|
| 22 |
+
gemini_key = user.get_gemini_key()
|
| 23 |
+
pinecone_key = user.get_pinecone_key()
|
| 24 |
+
index_name = user.pinecone_index_name
|
| 25 |
|
| 26 |
+
if not gemini_key or not pinecone_key or not index_name:
|
| 27 |
return []
|
| 28 |
|
| 29 |
+
try:
|
| 30 |
+
# Generate query embedding
|
| 31 |
+
query_embedding = embed_query(query, gemini_key)
|
| 32 |
|
| 33 |
+
# Connect to Pinecone
|
| 34 |
+
pc = Pinecone(api_key=pinecone_key)
|
| 35 |
+
index = pc.Index(index_name)
|
| 36 |
|
| 37 |
+
# Build metadata filter
|
| 38 |
+
filter_dict = None
|
| 39 |
+
if filename:
|
| 40 |
+
filter_dict = {"filename": {"$eq": filename}}
|
| 41 |
|
| 42 |
+
# Query Pinecone
|
| 43 |
+
results = index.query(
|
| 44 |
+
vector=query_embedding,
|
| 45 |
+
top_k=TOP_K,
|
| 46 |
+
namespace=user.username,
|
| 47 |
+
filter=filter_dict,
|
| 48 |
+
include_metadata=True
|
| 49 |
+
)
|
| 50 |
|
| 51 |
+
# Format results
|
| 52 |
+
chunks = []
|
| 53 |
+
if results.matches:
|
| 54 |
+
max_score = max(m.score for m in results.matches) if results.matches else 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
for match in results.matches:
|
| 57 |
+
confidence = round((match.score / max_score) * 100, 2) if max_score > 0 else 0
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
chunks.append({
|
| 60 |
+
"text": match.metadata.get("text", ""),
|
| 61 |
+
"filename": match.metadata.get("filename", ""),
|
| 62 |
+
"page": match.metadata.get("page", 1),
|
| 63 |
+
"score": round(match.score, 4),
|
| 64 |
+
"confidence": confidence
|
| 65 |
})
|
|
|
|
| 66 |
|
| 67 |
+
return chunks
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f"Retrieval error: {e}")
|
| 71 |
+
return []
|
render.yaml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
- type: web
|
| 3 |
+
name: rag-pdf-assistant
|
| 4 |
+
runtime: python
|
| 5 |
+
buildCommand: pip install -r requirements.txt
|
| 6 |
+
startCommand: gunicorn app:app --bind 0.0.0.0:$PORT --workers 2
|
| 7 |
+
envVars:
|
| 8 |
+
- key: SECRET_KEY
|
| 9 |
+
generateValue: true
|
| 10 |
+
- key: ENCRYPTION_KEY
|
| 11 |
+
sync: false
|
| 12 |
+
- key: MONGO_URI
|
| 13 |
+
sync: false
|
| 14 |
+
- key: GOOGLE_CLIENT_ID
|
| 15 |
+
sync: false
|
| 16 |
+
- key: GOOGLE_CLIENT_SECRET
|
| 17 |
+
sync: false
|
| 18 |
+
- key: PYTHON_VERSION
|
| 19 |
+
value: 3.10.12
|
requirements.txt
CHANGED
|
@@ -1,8 +1,6 @@
|
|
| 1 |
flask
|
| 2 |
python-dotenv
|
| 3 |
pymupdf
|
| 4 |
-
faiss-cpu
|
| 5 |
-
sentence-transformers
|
| 6 |
flask-login
|
| 7 |
pymongo
|
| 8 |
werkzeug
|
|
@@ -11,6 +9,7 @@ python-docx
|
|
| 11 |
groq
|
| 12 |
requests
|
| 13 |
requests-oauthlib
|
| 14 |
-
google-
|
| 15 |
cryptography
|
| 16 |
-
gunicorn
|
|
|
|
|
|
| 1 |
flask
|
| 2 |
python-dotenv
|
| 3 |
pymupdf
|
|
|
|
|
|
|
| 4 |
flask-login
|
| 5 |
pymongo
|
| 6 |
werkzeug
|
|
|
|
| 9 |
groq
|
| 10 |
requests
|
| 11 |
requests-oauthlib
|
| 12 |
+
google-genai
|
| 13 |
cryptography
|
| 14 |
+
gunicorn
|
| 15 |
+
pinecone
|
templates/profile.html
CHANGED
|
@@ -55,14 +55,32 @@
|
|
| 55 |
</select>
|
| 56 |
|
| 57 |
<label style="color: var(--text-main); font-weight: 500; font-size: 14px;">Groq API Key
|
| 58 |
-
(Optional)</label>
|
| 59 |
<input type="password" id="groqKey" name="groq_key" placeholder="Enter your Groq API Key..."
|
| 60 |
-
value="{{ current_user.groq_api_key
|
| 61 |
|
| 62 |
<label style="color: var(--text-main); font-weight: 500; font-size: 14px;">Gemini API Key
|
| 63 |
-
(
|
| 64 |
<input type="password" id="geminiKey" name="gemini_key" placeholder="Enter your Gemini API Key..."
|
| 65 |
-
value="{{ current_user.gemini_api_key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
<button type="submit" style="width: 100%; margin-top: 15px;">Save Settings</button>
|
| 68 |
</form>
|
|
@@ -100,6 +118,11 @@
|
|
| 100 |
}
|
| 101 |
}
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
document.getElementById("settingsForm").addEventListener("submit", async (e) => {
|
| 104 |
e.preventDefault()
|
| 105 |
const statusDiv = document.getElementById("settingsStatus")
|
|
@@ -107,13 +130,21 @@
|
|
| 107 |
statusDiv.style.color = "var(--primary-color)"
|
| 108 |
|
| 109 |
const preferredModel = document.getElementById("preferredModel").value
|
| 110 |
-
const groqKey = document.getElementById("groqKey").value
|
| 111 |
-
const geminiKey = document.getElementById("geminiKey").value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
const response = await fetch("/update_settings", {
|
| 114 |
method: "POST",
|
| 115 |
headers: { "Content-Type": "application/json" },
|
| 116 |
-
body: JSON.stringify(
|
| 117 |
})
|
| 118 |
|
| 119 |
const data = await response.json()
|
|
@@ -141,6 +172,7 @@
|
|
| 141 |
let icon = "π"
|
| 142 |
if (file.endsWith(".docx")) icon = "π"
|
| 143 |
if (file.endsWith(".txt")) icon = "π"
|
|
|
|
| 144 |
|
| 145 |
filesList.innerHTML += `
|
| 146 |
<li class="file-item">
|
|
|
|
| 55 |
</select>
|
| 56 |
|
| 57 |
<label style="color: var(--text-main); font-weight: 500; font-size: 14px;">Groq API Key
|
| 58 |
+
(Optional β for chat generation)</label>
|
| 59 |
<input type="password" id="groqKey" name="groq_key" placeholder="Enter your Groq API Key..."
|
| 60 |
+
value="" data-has-key="{{ 'true' if current_user.groq_api_key else 'false' }}" style="margin-top: 10px;">
|
| 61 |
|
| 62 |
<label style="color: var(--text-main); font-weight: 500; font-size: 14px;">Gemini API Key
|
| 63 |
+
(Required β used for embeddings & chat)</label>
|
| 64 |
<input type="password" id="geminiKey" name="gemini_key" placeholder="Enter your Gemini API Key..."
|
| 65 |
+
value="" data-has-key="{{ 'true' if current_user.gemini_api_key else 'false' }}" style="margin-top: 10px;">
|
| 66 |
+
|
| 67 |
+
<hr style="border: 1px solid var(--glass-border); margin: 20px 0;">
|
| 68 |
+
|
| 69 |
+
<h3 style="margin-bottom: 10px; color: var(--text-main);">π² Pinecone Vector Database</h3>
|
| 70 |
+
<p style="color: var(--text-muted); font-size: 13px; margin-bottom: 15px;">
|
| 71 |
+
Create a free index at <a href="https://app.pinecone.io" target="_blank" style="color: var(--primary-color);">pinecone.io</a>
|
| 72 |
+
(Serverless, Dimension: 768, Metric: Cosine)
|
| 73 |
+
</p>
|
| 74 |
+
|
| 75 |
+
<label style="color: var(--text-main); font-weight: 500; font-size: 14px;">Pinecone API Key
|
| 76 |
+
(Required)</label>
|
| 77 |
+
<input type="password" id="pineconeKey" name="pinecone_key" placeholder="Enter your Pinecone API Key..."
|
| 78 |
+
value="" data-has-key="{{ 'true' if current_user.pinecone_api_key else 'false' }}" style="margin-top: 10px;">
|
| 79 |
+
|
| 80 |
+
<label style="color: var(--text-main); font-weight: 500; font-size: 14px;">Pinecone Index Name
|
| 81 |
+
(Required)</label>
|
| 82 |
+
<input type="text" id="pineconeIndex" name="pinecone_index" placeholder="e.g. rag-app"
|
| 83 |
+
value="{{ current_user.pinecone_index_name or '' }}" style="margin-top: 10px;">
|
| 84 |
|
| 85 |
<button type="submit" style="width: 100%; margin-top: 15px;">Save Settings</button>
|
| 86 |
</form>
|
|
|
|
| 118 |
}
|
| 119 |
}
|
| 120 |
|
| 121 |
+
// Show placeholder for existing keys
|
| 122 |
+
document.querySelectorAll('[data-has-key="true"]').forEach(input => {
|
| 123 |
+
input.placeholder = "β’β’β’β’β’β’β’β’ (key saved β leave blank to keep)"
|
| 124 |
+
})
|
| 125 |
+
|
| 126 |
document.getElementById("settingsForm").addEventListener("submit", async (e) => {
|
| 127 |
e.preventDefault()
|
| 128 |
const statusDiv = document.getElementById("settingsStatus")
|
|
|
|
| 130 |
statusDiv.style.color = "var(--primary-color)"
|
| 131 |
|
| 132 |
const preferredModel = document.getElementById("preferredModel").value
|
| 133 |
+
const groqKey = document.getElementById("groqKey").value.trim()
|
| 134 |
+
const geminiKey = document.getElementById("geminiKey").value.trim()
|
| 135 |
+
const pineconeKey = document.getElementById("pineconeKey").value.trim()
|
| 136 |
+
const pineconeIndex = document.getElementById("pineconeIndex").value.trim()
|
| 137 |
+
|
| 138 |
+
// Only send keys if user actually typed a new value
|
| 139 |
+
const payload = { preferred_model: preferredModel, pinecone_index: pineconeIndex }
|
| 140 |
+
if (groqKey) payload.groq_key = groqKey
|
| 141 |
+
if (geminiKey) payload.gemini_key = geminiKey
|
| 142 |
+
if (pineconeKey) payload.pinecone_key = pineconeKey
|
| 143 |
|
| 144 |
const response = await fetch("/update_settings", {
|
| 145 |
method: "POST",
|
| 146 |
headers: { "Content-Type": "application/json" },
|
| 147 |
+
body: JSON.stringify(payload)
|
| 148 |
})
|
| 149 |
|
| 150 |
const data = await response.json()
|
|
|
|
| 172 |
let icon = "π"
|
| 173 |
if (file.endsWith(".docx")) icon = "π"
|
| 174 |
if (file.endsWith(".txt")) icon = "π"
|
| 175 |
+
if (file.endsWith(".md")) icon = "π"
|
| 176 |
|
| 177 |
filesList.innerHTML += `
|
| 178 |
<li class="file-item">
|