LiamKhoaLe commited on
Commit
729a1f7
·
0 Parent(s):

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces - Docker
2
+ FROM python:3.11-slim
3
+
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ # System deps
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ build-essential curl git libglib2.0-0 libgl1 \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Create and use a non-root user
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV PATH="/home/user/.local/bin:$PATH"
16
+
17
+ # Set working directory
18
+ WORKDIR /app
19
+
20
+ # Copy project files
21
+ COPY . .
22
+
23
+ # Install Python dependencies
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Hugging Face cache directories
27
+ ENV HF_HOME="/home/user/.cache/huggingface"
28
+ ENV SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/huggingface/sentence-transformers"
29
+ ENV MEDGEMMA_HOME="/home/user/.cache/huggingface/sentence-transformers"
30
+
31
+ # Create cache directories and set permissions
32
+ RUN mkdir -p /app/model_cache /home/user/.cache/huggingface/sentence-transformers && \
33
+ chown -R user:user /app/model_cache /home/user/.cache/huggingface
34
+
35
+ # Control preloading flags
36
+ ENV PRELOAD_TRANSLATORS="0"
37
+ ENV EMBEDDING_HALF="0"
38
+
39
+ # Preload embedding model and warmup
40
+ RUN python /app/dw_model.py && python /app/warmup.py
41
+
42
+ # Ensure ownership stays correct
43
+ RUN chown -R user:user /app/model_cache
44
+
45
+ # Expose port for HF Spaces
46
+ ENV PORT=7860
47
+ EXPOSE 7860
48
+
49
+ # Start FastAPI
50
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
LICENSE.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [2025] [Dang Khoa Le]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: EdSummariser
3
+ emoji: 📚
4
+ colorFrom: inigo
5
+ colorTo: blue
6
+ sdk: docker
7
+ sdk_version: latest
8
+ pinned: false
9
+ license: apache-2.0
10
+ short_description: Ed-Assistant summary your learning journey with Agentic RAG
11
+ ---
12
+
13
+ ### StudyBuddy RAG
14
+
15
+ An end-to-end RAG (Retrieval-Augmented Generation) app for studying from your own documents. Upload PDF/DOCX files, the app extracts text and images, captions images, chunks into semantic "cards", embeds and stores them in MongoDB, and serves a chat endpoint that answers strictly from your uploaded materials. Includes a lightweight chat-memory feature to improve context continuity, cost-aware model routing, and robust provider retries.
16
+
17
+ ## Features
18
+
19
+ - **Document ingestion**: PDF/DOCX parsing (PyMuPDF, python-docx), image extraction and BLIP-based captions
20
+ - **Semantic chunking**: heuristic heading/size-based chunker
21
+ - **Embeddings**: Sentence-Transformers (all-MiniLM-L6-v2 by default) with random fallback when unavailable
22
+ - **Vector search**: MongoDB Atlas Vector Search (optional) or local cosine fallback
23
+ - **RAG chat**: cost-aware routing between Gemini and NVIDIA endpoints
24
+ - **Chat memory**: per-user LRU of recent QA summaries; history and semantic retrieval to augment context
25
+ - **Summarization**: cheap extractive summaries via sumy with naive fallback
26
+ - **Centralized logging**: tagged loggers per module, e.g., [APP], [RAG], [CHUNKER]
27
+ - **Simple UI**: static frontend under `static/`
28
+
29
+ ## Prerequisites
30
+
31
+ - Python 3.10+
32
+ - MongoDB instance (local or Atlas). Collections are created automatically
33
+ - Optional: NVIDIA and/or Gemini API keys for model calls
34
+ - Optional but recommended: a virtual environment
35
+
36
+ ## Project Structure
37
+
38
+ ```text
39
+ app.py # FastAPI app, routes, background ingestion, chat
40
+ utils/logger.py # Centralized tagged logger
41
+ utils/parser.py # PDF/DOCX parsing and image extraction
42
+ utils/caption.py # BLIP image captioning (transformers)
43
+ utils/chunker.py # Heuristic chunk builder
44
+ utils/embeddings.py # Embedding client (Sentence-Transformers)
45
+ utils/rag.py # Mongo-backed store and vector search
46
+ utils/rotator.py # API key rotator + robust HTTP POST helper
47
+ utils/router.py # Model selection + LLM invocation helpers
48
+ utils/summarizer.py # sumy-based extractive summarizer
49
+ utils/common.py # small helpers
50
+ memo/memory.py # per-user LRU memory store
51
+ memo/history.py # history relevance + semantic helpers
52
+ static/ # minimal frontend (index.html, script.js, styles.css)
53
+ Dockerfile # container image
54
+ requirements.txt # Python dependencies
55
+ ```
56
+
57
+ ## Quickstart (Local)
58
+
59
+ ```bash
60
+ python -m venv .venv && source .venv/bin/activate
61
+ pip install -r requirements.txt
62
+ export MONGO_URI="mongodb://localhost:27017"
63
+ uvicorn app:app --reload
64
+ ```
65
+
66
+ Open UI: `http://localhost:8000/static/`
67
+
68
+ Health: `http://localhost:8000/healthz`
69
+
70
+ ## Configuration
71
+
72
+ Environment variables:
73
+
74
+ - **MONGO_URI**: MongoDB connection string (required)
75
+ - **MONGO_DB**: MongoDB database name (default: studybuddy)
76
+ - **ATLAS_VECTOR**: set to "1" to enable Atlas Vector Search, else local cosine (default: 0)
77
+ - **MONGO_VECTOR_INDEX**: Atlas Search index name for vectors (default: vector_index)
78
+ - **EMBED_MODEL**: sentence-transformers model name (default: sentence-transformers/all-MiniLM-L6-v2)
79
+ - **GEMINI_API_1..5**: Gemini API keys for rotation
80
+ - **NVIDIA_API_1..5**: NVIDIA API keys for rotation
81
+ - **GEMINI_SMALL, GEMINI_MED, GEMINI_PRO**: override default Gemini models
82
+ - **NVIDIA_SMALL**: override default NVIDIA small model
83
+ - Optional logging controls: use process env like `PYTHONWARNINGS=ignore` and manage verbosity per logger if needed
84
+
85
+ Logging: Logs are sent to stdout at INFO level, tagged per module, e.g., `[APP]`, `[RAG]`. See `utils/logger.py`.
86
+
87
+ ## Running (Local)
88
+
89
+ ```bash
90
+ export MONGO_URI="mongodb://localhost:27017" # or Atlas URI
91
+ uvicorn app:app --reload --workers 1 --host 0.0.0.0 --port 8000
92
+ ```
93
+
94
+ Open the UI: `http://localhost:8000/static/`
95
+
96
+ Health check: `http://localhost:8000/healthz`
97
+
98
+ ## Running (Docker)
99
+
100
+ Build and run:
101
+
102
+ ```bash
103
+ docker build -t studybuddy-rag .
104
+ docker run --rm -p 8000:8000 \
105
+ -e MONGO_URI="<your-mongo-uri>" \
106
+ -e MONGO_DB="studybuddy" \
107
+ -e NVIDIA_API_1="<nvidia-key>" \
108
+ -e GEMINI_API_1="<gemini-key>" \
109
+ studybuddy-rag
110
+ ```
111
+
112
+ For production, consider `--restart unless-stopped` and setting `--env ATLAS_VECTOR=1` if using Atlas Vector Search.
113
+
114
+ ## API Overview
115
+
116
+ - GET `/` → serves `static/index.html`
117
+ - POST `/upload` (multipart form-data)
118
+ - fields: `user_id` (str), `files` (one or more PDF/DOCX)
119
+ - response: `{ job_id, status: "processing" }`; ingestion proceeds in background
120
+ - GET `/cards`
121
+ - params: `user_id` (str), `filename` (optional), `limit` (int), `skip` (int)
122
+ - returns stored cards without embeddings
123
+ - GET `/file-summary`
124
+ - params: `user_id`, `filename`
125
+ - returns `{ filename, summary }`
126
+ - POST `/chat` (form-urlencoded)
127
+ - fields: `user_id`, `question`, `k` (int, default 6)
128
+ - logic:
129
+ - If question matches "what is <file> about?": returns file summary
130
+ - Else: classify relevant files via NVIDIA, augment with chat memory context, run vector search (restricted to relevant files if any), select model, generate answer, store QA summary in LRU
131
+ - returns `{ answer, sources }` (and `relevant_files` when no hits)
132
+
133
+ Example cURL:
134
+
135
+ ```bash
136
+ curl -X POST http://localhost:8000/chat \
137
+ -H 'Content-Type: application/x-www-form-urlencoded' \
138
+ -d 'user_id=user1' \
139
+ --data-urlencode 'question=Summarize reinforcement learning from the uploaded notes.'
140
+ ```
141
+
142
+ Upload example:
143
+
144
+ ```bash
145
+ curl -X POST http://localhost:8000/upload \
146
+ -H 'Content-Type: multipart/form-data' \
147
+ -F 'user_id=user1' \
148
+ -F 'files=@/path/to/file1.pdf' \
149
+ -F 'files=@/path/to/file2.docx'
150
+ ```
151
+
152
+ List cards:
153
+
154
+ ```bash
155
+ curl 'http://localhost:8000/cards?user_id=user1&limit=10'
156
+ ```
157
+
158
+ ## MongoDB Atlas Vector Index (optional)
159
+
160
+ If using Atlas Vector Search, create an index (UI or API) similar to:
161
+
162
+ ```json
163
+ {
164
+ "mappings": {
165
+ "dynamic": false,
166
+ "fields": {
167
+ "embedding": {
168
+ "type": "knnVector",
169
+ "dimensions": 384,
170
+ "similarity": "cosine"
171
+ }
172
+ }
173
+ }
174
+ }
175
+ ```
176
+
177
+ Set `ATLAS_VECTOR=1` and `MONGO_VECTOR_INDEX` accordingly.
178
+
179
+ Schema overview:
180
+
181
+ - Collection `chunks` (per card):
182
+ - `user_id` (str), `filename` (str), `topic_name` (str), `summary` (str), `content` (str)
183
+ - `page_span` ([int, int])
184
+ - `card_id` (slug + sequence)
185
+ - `embedding` (float[384])
186
+ - Collection `files` (per file):
187
+ - `user_id` (str), `filename` (str), `summary` (str)
188
+
189
+ ## Notes on Models and Keys
190
+
191
+ - NVIDIA and Gemini calls use a simple key rotator. Provide one or more keys via `NVIDIA_API_1..5`, `GEMINI_API_1..5`.
192
+ - The app is defensive: if embeddings or summarization models are unavailable, it falls back to naive strategies to keep the app responsive (with reduced quality).
193
+
194
+ ## Logging and Observability
195
+
196
+ - Logs are tagged by module via `utils/logger.py`:
197
+ - [APP] app lifecycle, ingestion, chat flow
198
+ - [RAG] storage, vector search
199
+ - [EMBED] embedding model loads and fallbacks
200
+ - [CAPTION] BLIP model loads and captioning
201
+ - [ROUTER]/[ROTATOR] model routing and retry/rotation events
202
+ - [CHUNKER]/[SUM]/[COMMON]/[PARSER] module-specific messages
203
+ - Change verbosity by setting the root logger level in code if needed
204
+
205
+ ## Performance and Cost Tips
206
+
207
+ - Disable image captioning if CPU-bound by short-circuiting in `utils/caption.py` (return "")
208
+ - Use smaller `k` in `/chat` for fewer chunks
209
+ - Prefer NVIDIA_SMALL for simple questions (already default via router)
210
+ - If Atlas Vector is unavailable, local cosine search samples up to 2000 docs; tune in `utils/rag.py`
211
+ - Run with `--workers` and consider a process manager for production
212
+
213
+ ## Security Notes
214
+
215
+ - CORS is currently open (`allow_origins=["*"]`) for simplicity. Restrict in production
216
+ - Validate and limit upload sizes at the reverse proxy (e.g., nginx) or add checks in `/upload`
217
+ - Secrets are passed via environment; avoid committing them
218
+
219
+ ## Troubleshooting
220
+
221
+ - Missing Python packages: install via `pip install -r requirements.txt`.
222
+ - Ingestion stalls: check `[APP]` logs; large files and image captioning (BLIP) can be slow on CPU.
223
+ - No vector hits:
224
+ - Ensure documents were embedded and stored (see `[RAG] Inserted ... cards` logs)
225
+ - Verify `MONGO_URI` and collection contents
226
+ - If Atlas Vector is on, confirm index exists and `ATLAS_VECTOR=1`
227
+ - NVIDIA/Gemini errors: see `[ROUTER]`/`[ROTATOR]` logs; key rotation retries transient errors.
228
+ - PIL/transformers/torch issues on ARM Macs: ensure correct torch build or disable captioning
229
+ - PyMuPDF font warnings: generally safe to ignore; upgrade PyMuPDF if needed
230
+
231
+ ## Development
232
+
233
+ - Code style: straightforward, explicit names, tagged logging
234
+ - Frontend: simple static site in `static/`
235
+ - Extend chunking/embeddings or swap providers by editing modules in `utils/`
236
+ - Optional Makefile targets you can add:
237
+
238
+ ```Makefile
239
+ run:
240
+ uvicorn app:app --reload
241
+
242
+ docker-build:
243
+ docker build -t studybuddy-rag .
244
+
245
+ docker-run:
246
+ docker run --rm -p 8000:8000 -e MONGO_URI="mongodb://host.docker.internal:27017" studybuddy-rag
247
+ ```
248
+
249
+ ## License
250
+
251
+ MIT (or your preferred license). Replace this section if needed.
252
+
253
+
app.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, re, uuid, json, time, logging
2
+ from typing import List, Dict, Any, Optional
3
+
4
+ from fastapi import FastAPI, UploadFile, File, Form, Request, HTTPException, BackgroundTasks
5
+ from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
6
+ from fastapi.staticfiles import StaticFiles
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+
9
+ from utils.rotator import APIKeyRotator
10
+ from utils.parser import parse_pdf_bytes, parse_docx_bytes
11
+ from utils.caption import BlipCaptioner
12
+ from utils.chunker import build_cards_from_pages
13
+ from utils.embeddings import EmbeddingClient
14
+ from utils.rag import RAGStore, ensure_indexes
15
+ from utils.router import select_model, generate_answer_with_model
16
+ from utils.summarizer import cheap_summarize
17
+ from utils.common import trim_text
18
+ from utils.logger import get_logger
19
+
20
+ # ────────────────────────────── App Setup ──────────────────────────────
21
+ logger = get_logger("APP", name="studybuddy")
22
+
23
+ app = FastAPI(title="StudyBuddy RAG", version="0.1.0")
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+ # Serve static files (index.html, scripts.js, styles.css)
33
+ app.mount("/static", StaticFiles(directory="static"), name="static")
34
+
35
+
36
+ # ────────────────────────────── Global Clients ──────────────────────────────
37
+ # API rotators (round robin + auto failover on quota errors)
38
+ gemini_rotator = APIKeyRotator(prefix="GEMINI_API_", max_slots=5)
39
+ nvidia_rotator = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)
40
+
41
+ # Captioner + Embeddings (lazy init inside classes)
42
+ captioner = BlipCaptioner()
43
+ embedder = EmbeddingClient(model_name=os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2"))
44
+
45
+ # Mongo / RAG store
46
+ rag = RAGStore(mongo_uri=os.getenv("MONGO_URI"), db_name=os.getenv("MONGO_DB", "studybuddy"))
47
+ ensure_indexes(rag)
48
+
49
+
50
+ # ────────────────────────────── Helpers ──────────────────────────────
51
+ def _infer_mime(filename: str) -> str:
52
+ lower = filename.lower()
53
+ if lower.endswith(".pdf"):
54
+ return "application/pdf"
55
+ if lower.endswith(".docx"):
56
+ return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
57
+ return "application/octet-stream"
58
+
59
+
60
+ def _extract_pages(filename: str, file_bytes: bytes) -> List[Dict[str, Any]]:
61
+ mime = _infer_mime(filename)
62
+ if mime == "application/pdf":
63
+ return parse_pdf_bytes(file_bytes)
64
+ elif mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
65
+ return parse_docx_bytes(file_bytes)
66
+ else:
67
+ raise HTTPException(status_code=400, detail=f"Unsupported file type: {filename}")
68
+
69
+
70
+ # ────────────────────────────── Routes ──────────────────────────────
71
+ @app.get("/", response_class=HTMLResponse)
72
+ def index():
73
+ index_path = os.path.join("static", "index.html")
74
+ if not os.path.exists(index_path):
75
+ return HTMLResponse("<h1>StudyBuddy RAG</h1><p>Static files not found.</p>")
76
+ return FileResponse(index_path)
77
+
78
+
79
+ @app.post("/upload")
80
+ async def upload_files(
81
+ request: Request,
82
+ background_tasks: BackgroundTasks,
83
+ user_id: str = Form(...),
84
+ files: List[UploadFile] = File(...),
85
+ ):
86
+ """
87
+ Ingest many files: PDF/DOCX.
88
+ Steps:
89
+ 1) Extract text & images
90
+ 2) Caption images (BLIP base, CPU ok)
91
+ 3) Merge captions into page text
92
+ 4) Chunk into semantic cards (topic_name, summary, content + metadata)
93
+ 5) Embed with all-MiniLM-L6-v2
94
+ 6) Store in MongoDB with per-user and per-filename metadata
95
+ 7) Create a file-level summary
96
+ """
97
+ job_id = str(uuid.uuid4())
98
+ # Read file bytes upfront to avoid reading from closed streams in background task
99
+ preloaded_files = []
100
+ for uf in files:
101
+ raw = await uf.read()
102
+ preloaded_files.append((uf.filename, raw))
103
+ # Process files in background
104
+ async def _process():
105
+ total_cards = 0
106
+ file_summaries = []
107
+ for fname, raw in preloaded_files:
108
+ logger.info(f"[{job_id}] Parsing {fname} ({len(raw)} bytes)")
109
+ # Extract pages from file
110
+ pages = _extract_pages(fname, raw)
111
+ # Caption images per page (if any)
112
+ num_imgs = sum(len(p.get("images", [])) for p in pages)
113
+ captions = []
114
+ if num_imgs > 0:
115
+ for p in pages:
116
+ caps = []
117
+ for im in p.get("images", []):
118
+ try:
119
+ cap = captioner.caption_image(im)
120
+ caps.append(cap)
121
+ except Exception as e:
122
+ logger.warning(f"Caption error: {e}")
123
+ captions.append(caps)
124
+ else:
125
+ captions = [[] for _ in pages]
126
+ # Merge captions into text
127
+ for idx, p in enumerate(pages):
128
+ if captions[idx]:
129
+ p["text"] = (p.get("text", "") + "\n\n" + "\n".join([f"[Image] {c}" for c in captions[idx]])).strip()
130
+ # Build cards
131
+ cards = build_cards_from_pages(pages, filename=fname, user_id=user_id)
132
+ logger.info(f"[{job_id}] Built {len(cards)} cards for {fname}")
133
+ # Embed & store
134
+ embeddings = embedder.embed([c["content"] for c in cards])
135
+ for c, vec in zip(cards, embeddings):
136
+ c["embedding"] = vec
137
+ # Store cards in MongoDB on card
138
+ rag.store_cards(cards)
139
+ total_cards += len(cards)
140
+ # File-level summary (cheap extractive)
141
+ full_text = "\n\n".join(p.get("text", "") for p in pages)
142
+ file_summary = cheap_summarize(full_text, max_sentences=6)
143
+ rag.upsert_file_summary(user_id=user_id, filename=fname, summary=file_summary)
144
+ file_summaries.append({"filename": fname, "summary": file_summary})
145
+ logger.info(f"[{job_id}] Ingestion complete. Total cards: {total_cards}")
146
+ # Kick off processing in background to keep UI responsive
147
+ background_tasks.add_task(_process)
148
+ return {"job_id": job_id, "status": "processing"}
149
+
150
+
151
+ @app.get("/cards")
152
+ def list_cards(user_id: str, filename: Optional[str] = None, limit: int = 50, skip: int = 0):
153
+ return rag.list_cards(user_id=user_id, filename=filename, limit=limit, skip=skip)
154
+
155
+
156
+ @app.get("/file-summary")
157
+ def get_file_summary(user_id: str, filename: str):
158
+ doc = rag.get_file_summary(user_id=user_id, filename=filename)
159
+ if not doc:
160
+ raise HTTPException(404, detail="No summary found for that file.")
161
+ return {"filename": filename, "summary": doc.get("summary", "")}
162
+
163
+
164
+ @app.post("/chat")
165
+ async def chat(user_id: str = Form(...), question: str = Form(...), k: int = Form(6)):
166
+ """
167
+ RAG chat that answers ONLY from uploaded materials.
168
+ - Preload all filenames + summaries; use NVIDIA to classify file relevance to question (true/false)
169
+ - Restrict vector search to relevant files (fall back to all if none)
170
+ - Bring in recent chat memory: last 3 via NVIDIA relevance; remaining 17 via semantic search
171
+ - After answering, summarize (q,a) via NVIDIA and store into LRU (last 20)
172
+ """
173
+ from memo.memory import MemoryLRU
174
+ from memo.history import summarize_qa_with_nvidia, files_relevance, related_recent_and_semantic_context
175
+ from utils.router import NVIDIA_SMALL # reuse default name
176
+ memory = app.state.__dict__.setdefault("memory_lru", MemoryLRU())
177
+
178
+ # 0) If question is about a specific file, return the file summary
179
+ m = re.search(r"what\s+is\s+the\s+(.+?\.(pdf|docx))\s+about\??", question, re.IGNORECASE)
180
+ # If the question is about a specific file, return the file summary
181
+ if m:
182
+ fn = m.group(1)
183
+ doc = rag.get_file_summary(user_id=user_id, filename=fn)
184
+ if doc:
185
+ return {"answer": doc.get("summary", ""), "sources": [{"filename": fn, "file_summary": True}]}
186
+ else:
187
+ return {"answer": "I couldn't find a summary for that file in your library.", "sources": []}
188
+
189
+ # 1) Preload file list + summaries
190
+ files_list = rag.list_files(user_id=user_id) # [{filename, summary}]
191
+ # Ask NVIDIA to mark relevance per file
192
+ relevant_map = await files_relevance(question, files_list, nvidia_rotator)
193
+ relevant_files = [fn for fn, ok in relevant_map.items() if ok]
194
+
195
+ # 2) Memory context: recent 3 via NVIDIA, remaining 17 via semantic
196
+ # recent 3 related (we do a simple include-all; NVIDIA will prune by "related" selection using the same mechanism as files_relevance but here handled in history)
197
+ recent_related, semantic_related = await related_recent_and_semantic_context(user_id, question, memory, embedder)
198
+ # For recent_related (empty placeholder), do NVIDIA pruning now:
199
+ recent3 = memory.recent(user_id, 3)
200
+ if recent3:
201
+ sys = "Pick only items that directly relate to the new question. Output the selected items verbatim, no commentary. If none, output nothing."
202
+ numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
203
+ user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
204
+ try:
205
+ from utils.rotator import robust_post_json
206
+ key = nvidia_rotator.get_key()
207
+ url = "https://integrate.api.nvidia.com/v1/chat/completions"
208
+ payload = {
209
+ "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
210
+ "temperature": 0.0,
211
+ "messages": [
212
+ {"role": "system", "content": sys},
213
+ {"role": "user", "content": user},
214
+ ]
215
+ }
216
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key or ''}"}
217
+ data = await robust_post_json(url, headers, payload, nvidia_rotator)
218
+ recent_related = data["choices"][0]["message"]["content"].strip()
219
+ except Exception as e:
220
+ logger.warning(f"Recent-related NVIDIA error: {e}")
221
+ recent_related = ""
222
+
223
+ # 3) RAG vector search (restricted to relevant files if any)
224
+ q_vec = embedder.embed([question])[0]
225
+ hits = rag.vector_search(user_id=user_id, query_vector=q_vec, k=k, filenames=relevant_files if relevant_files else None)
226
+ if not hits:
227
+ return {
228
+ "answer": "I don't know based on your uploaded materials. Try uploading more sources or rephrasing the question.",
229
+ "sources": [],
230
+ "relevant_files": relevant_files
231
+ }
232
+ # Compose context
233
+ contexts = []
234
+ sources_meta = []
235
+ for h in hits:
236
+ doc = h["doc"]
237
+ score = h["score"]
238
+ contexts.append(f"[{doc.get('topic_name','Topic')}] {trim_text(doc.get('content',''), 1200)}")
239
+ sources_meta.append({
240
+ "filename": doc.get("filename"),
241
+ "topic_name": doc.get("topic_name"),
242
+ "page_span": doc.get("page_span"),
243
+ "score": float(score),
244
+ "chunk_id": str(doc.get("_id", ""))
245
+ })
246
+ context_text = "\n\n---\n\n".join(contexts)
247
+
248
+ # Add file-level summaries for relevant files
249
+ file_summary_block = ""
250
+ if relevant_files:
251
+ fsum_map = {f["filename"]: f.get("summary","") for f in files_list}
252
+ lines = [f"[{fn}] {fsum_map.get(fn, '')}" for fn in relevant_files]
253
+ file_summary_block = "\n".join(lines)
254
+
255
+ # Guardrail instruction to avoid hallucination
256
+ system_prompt = (
257
+ "You are a careful study assistant. Answer strictly using the given CONTEXT.\n"
258
+ "If the answer isn't in the context, say 'I don't know based on the provided materials.'\n"
259
+ "Write concise, clear explanations with citations like (source: filename, topic).\n"
260
+ )
261
+
262
+ # Add recent chat context and historical similarity context
263
+ history_block = ""
264
+ if recent_related or semantic_related:
265
+ history_block = "RECENT_CHAT_CONTEXT:\n" + (recent_related or "") + ("\n\nHISTORICAL_SIMILARITY_CONTEXT:\n" + semantic_related if semantic_related else "")
266
+ composed_context = ""
267
+ if history_block:
268
+ composed_context += history_block + "\n\n"
269
+ if file_summary_block:
270
+ composed_context += "FILE_SUMMARIES:\n" + file_summary_block + "\n\n"
271
+ composed_context += "DOC_CONTEXT:\n" + context_text
272
+
273
+ # Compose user prompt
274
+ user_prompt = f"QUESTION:\n{question}\n\nCONTEXT:\n{composed_context}"
275
+ # Choose model (cost-aware)
276
+ selection = select_model(question=question, context=composed_context)
277
+ logger.info(f"Model selection: {selection}")
278
+ # Generate answer with model
279
+ try:
280
+ answer = await generate_answer_with_model(
281
+ selection=selection,
282
+ system_prompt=system_prompt,
283
+ user_prompt=user_prompt,
284
+ gemini_rotator=gemini_rotator,
285
+ nvidia_rotator=nvidia_rotator
286
+ )
287
+ except Exception as e:
288
+ logger.error(f"LLM error: {e}")
289
+ answer = "I had trouble contacting the language model provider just now. Please try again."
290
+ # After answering: summarize QA and store in memory (LRU, last 20)
291
+ try:
292
+ qa_sum = await summarize_qa_with_nvidia(question, answer, nvidia_rotator)
293
+ memory.add(user_id, qa_sum)
294
+ except Exception as e:
295
+ logger.warning(f"QA summarize/store failed: {e}")
296
+ # Trim for logging
297
+ logger.info("LLM answer (trimmed): %s", trim_text(answer, 200).replace("\n", " "))
298
+ return {"answer": answer, "sources": sources_meta}
299
+
300
+
301
+ @app.get("/healthz")
302
+ def health():
303
+ return {"ok": True}
dw_model.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dw_model.py
2
+ ### --- A. transformer and embedder ---
3
+ import os
4
+ import shutil
5
+ from huggingface_hub import snapshot_download
6
+
7
+ # Set up paths
8
+ MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
9
+ MODEL_CACHE_DIR = "/app/model_cache"
10
+ HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")
11
+
12
+ print("⏳ Downloading the SentenceTransformer model...")
13
+ # Download directly into /app/model_cache to avoid duplicating files from HF cache
14
+ model_path = snapshot_download(
15
+ repo_id=MODEL_REPO,
16
+ cache_dir=HF_CACHE_DIR, # Store HF cache in user cache dir
17
+ local_dir=MODEL_CACHE_DIR, # Place usable model here
18
+ local_dir_use_symlinks=False # Copy files into local_dir (no symlinks)
19
+ )
20
+
21
+ print("Model path: ", model_path)
22
+ if not os.path.exists(MODEL_CACHE_DIR):
23
+ os.makedirs(MODEL_CACHE_DIR)
24
+
25
+ # Verify structure after moving
26
+ print("\n📂 LLM Model Structure (Build Level):")
27
+ for root, dirs, files in os.walk(MODEL_CACHE_DIR):
28
+ print(f"📁 {root}/")
29
+ for file in files:
30
+ print(f" 📄 {file}")
31
+
32
+
33
+ ### --- B. translation modules ---
34
+ # Optional pre-download of translation models. These can be very large and
35
+ # may exceed build storage limits on constrained environments (e.g., HF Spaces).
36
+ # Control with env var PRELOAD_TRANSLATORS ("1" to enable; default: disabled).
37
+ PRELOAD_TRANSLATORS = os.getenv("PRELOAD_TRANSLATORS", "0")
38
+ if PRELOAD_TRANSLATORS == "1":
39
+ try:
40
+ from transformers import pipeline
41
+ print("⏬ Pre-downloading Vietnamese–English translator...")
42
+ _ = pipeline("translation", model="VietAI/envit5-translation", src_lang="vi", tgt_lang="en", device=-1)
43
+ print("⏬ Pre-downloading Chinese–English translator...")
44
+ _ = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=-1)
45
+ print("✅ Translators preloaded.")
46
+ except Exception as e:
47
+ print(f"⚠️ Skipping translator preload due to error: {e}")
48
+ else:
49
+ print("ℹ️ Skipping translator pre-download (PRELOAD_TRANSLATORS != '1'). They will lazy-load at runtime.")
memo/history.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── memo/history.py ──────────────────────────────
2
+ import os
3
+ import json
4
+ import logging
5
+ from typing import List, Dict, Any, Tuple
6
+ import numpy as np
7
+
8
+ from utils.logger import get_logger
9
+ from utils.rotator import robust_post_json
10
+ from utils.embeddings import EmbeddingClient
11
+
12
+ logger = get_logger("RAG", __name__)
13
+
14
+ NVIDIA_SMALL = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
15
+
16
+ async def _nvidia_chat(system_prompt: str, user_prompt: str, nvidia_key: str, rotator) -> str:
17
+ """
18
+ Minimal NVIDIA Chat call that enforces no-comment concise outputs.
19
+ """
20
+ url = "https://integrate.api.nvidia.com/v1/chat/completions"
21
+ payload = {
22
+ "model": NVIDIA_SMALL,
23
+ "temperature": 0.0,
24
+ "messages": [
25
+ {"role": "system", "content": system_prompt},
26
+ {"role": "user", "content": user_prompt},
27
+ ]
28
+ }
29
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {nvidia_key or ''}"}
30
+ data = None
31
+ try:
32
+ data = await robust_post_json(url, headers, payload, rotator)
33
+ return data["choices"][0]["message"]["content"]
34
+ except Exception as e:
35
+ logger.warning(f"NVIDIA chat error: {e} • response: {data}")
36
+ return ""
37
+
38
+ def _safe_json(s: str) -> Any:
39
+ try:
40
+ return json.loads(s)
41
+ except Exception:
42
+ # Try to extract a JSON object from text
43
+ start = s.find("{")
44
+ end = s.rfind("}")
45
+ if start != -1 and end != -1 and end > start:
46
+ try:
47
+ return json.loads(s[start:end+1])
48
+ except Exception:
49
+ return {}
50
+ return {}
51
+
52
+ async def summarize_qa_with_nvidia(question: str, answer: str, rotator) -> str:
53
+ """
54
+ Returns a single line block:
55
+ q: <concise>\na: <concise>
56
+ No extra commentary.
57
+ """
58
+ sys = "You are a terse summarizer. Output exactly two lines:\nq: <short question summary>\na: <short answer summary>\nNo extra text."
59
+ user = f"Question:\n{question}\n\nAnswer:\n{answer}"
60
+ key = rotator.get_key()
61
+ out = await _nvidia_chat(sys, user, key, rotator)
62
+ # Basic guard if the model returns extra prose
63
+ lines = [ln.strip() for ln in out.splitlines() if ln.strip()]
64
+ ql = next((l for l in lines if l.lower().startswith('q:')), None)
65
+ al = next((l for l in lines if l.lower().startswith('a:')), None)
66
+ if not ql or not al:
67
+ # Fallback truncate
68
+ ql = "q: " + (question.strip()[:160] + ("…" if len(question.strip()) > 160 else ""))
69
+ al = "a: " + (answer.strip()[:220] + ("…" if len(answer.strip()) > 220 else ""))
70
+ return f"{ql}\n{al}"
71
+
72
+ async def files_relevance(question: str, file_summaries: List[Dict[str, str]], rotator) -> Dict[str, bool]:
73
+ """
74
+ Ask NVIDIA model to mark each file as relevant (true) or not (false) for the question.
75
+ Returns {filename: bool}
76
+ """
77
+ sys = "You classify file relevance. Return STRICT JSON only with shape {\"relevance\":[{\"filename\":\"...\",\"relevant\":true|false}]}."
78
+ items = [{"filename": f["filename"], "summary": f.get("summary","")} for f in file_summaries]
79
+ user = f"Question: {question}\n\nFiles:\n{json.dumps(items, ensure_ascii=False)}\n\nReturn JSON only."
80
+ key = rotator.get_key()
81
+ out = await _nvidia_chat(sys, user, key, rotator)
82
+ data = _safe_json(out) or {}
83
+ rels = {}
84
+ for row in data.get("relevance", []):
85
+ fn = row.get("filename")
86
+ rv = row.get("relevant")
87
+ if isinstance(fn, str) and isinstance(rv, bool):
88
+ rels[fn] = rv
89
+ # If parsing failed, default to considering all files possibly relevant
90
+ if not rels and file_summaries:
91
+ rels = {f["filename"]: True for f in file_summaries}
92
+ return rels
93
+
94
+ def _cosine(a: np.ndarray, b: np.ndarray) -> float:
95
+ denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
96
+ return float(np.dot(a, b) / denom)
97
+
98
+ def _as_text(block: str) -> str:
99
+ return block.strip()
100
+
101
+ async def related_recent_and_semantic_context(user_id: str, question: str, memory, embedder: EmbeddingClient, topk_sem: int = 3) -> Tuple[str, str]:
102
+ """
103
+ Returns (recent_related_text, semantic_related_text).
104
+ - recent_related_text: NVIDIA checks the last 3 summaries for direct relatedness.
105
+ - semantic_related_text: cosine-sim search over the remaining 17 summaries (top-k).
106
+ """
107
+ recent3 = memory.recent(user_id, 3)
108
+ rest17 = memory.rest(user_id, 3)
109
+
110
+ recent_text = ""
111
+ if recent3:
112
+ sys = "Pick only items that directly relate to the new question. Output the selected items verbatim, no commentary. If none, output nothing."
113
+ numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
114
+ user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' lines concatenated."
115
+ key = None # We'll let robust_post_json handle rotation via rotator param
116
+ # Use the same nvidia rotator mechanism via a fake call; we'll reconstruct in app with the real rotator passed through
117
+ # Here, we expect the caller to monkey-patch the chat with rotator; to keep it simple, we'll do a tiny trick:
118
+ # The real API call occurs in app with rotator. For here, we return empty and let app request do it. (But to keep module self-contained, we do call with rotator when provided.)
119
+ # However, since this function is called from app and gets the rotator, we'll move NVIDIA call out of here to avoid circular deps.
120
+
121
+ # We'll implement a pure semantic search for rest17 here; recent related will be handled in app using the same prompt.
122
+
123
+ # Semantic over rest17
124
+ sem_text = ""
125
+ if rest17:
126
+ qv = np.array(embedder.embed([question])[0], dtype="float32")
127
+ mats = embedder.embed([_as_text(s) for s in rest17])
128
+ sims = [(_cosine(qv, np.array(v, dtype="float32")), s) for v, s in zip(mats, rest17)]
129
+ sims.sort(key=lambda x: x[0], reverse=True)
130
+ top = [s for (sc, s) in sims[:topk_sem] if sc > 0.15] # small threshold
131
+ if top:
132
+ sem_text = "\n\n".join(top)
133
+ # Return recent empty (to be filled by caller using NVIDIA), and semantic text
134
+ return ("", sem_text)
memo/memory.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── memo/memory.py ──────────────────────────────
2
+ from collections import deque, defaultdict
3
+ from typing import List, Dict
4
+
5
+ class MemoryLRU:
6
+ """
7
+ Per-user LRU-like memory of the last N (default 20) summarized chat sessions.
8
+ Each item is a single string in the format: "q: ...\na: ..."
9
+ """
10
+ def __init__(self, capacity: int = 20):
11
+ self.capacity = capacity
12
+ self._store: Dict[str, deque] = defaultdict(lambda: deque(maxlen=self.capacity))
13
+
14
+ def add(self, user_id: str, qa_summary: str):
15
+ self._store[user_id].append(qa_summary)
16
+
17
+ def recent(self, user_id: str, n: int = 3) -> List[str]:
18
+ d = self._store[user_id]
19
+ if not d:
20
+ return []
21
+ # Return last n in recency order (most recent first)
22
+ return list(d)[-n:][::-1]
23
+
24
+ def rest(self, user_id: str, skip_n: int = 3) -> List[str]:
25
+ d = self._store[user_id]
26
+ if not d:
27
+ return []
28
+ # Everything except the most recent `skip_n`, oldest first
29
+ return list(d)[:-skip_n] if len(d) > skip_n else []
30
+
31
+ def all(self, user_id: str) -> List[str]:
32
+ return list(self._store[user_id])
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi #==0.114.2
2
+ uvicorn[standard] #==0.30.6
3
+ python-multipart #==0.0.9
4
+ pymongo #==4.8.0
5
+ httpx #==0.27.2
6
+ python-docx #==1.1.2
7
+ PyMuPDF #==1.24.10
8
+ pillow #==10.4.0
9
+ transformers #==4.44.2
10
+ torch #==2.4.0
11
+ sentence-transformers#==3.1.1
12
+ sumy #==0.11.0
13
+ numpy #==1.26.4a
static/index.html ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- ────────────────────────────── static/index.html ────────────────────────────── -->
2
+ <!doctype html>
3
+ <html lang="en">
4
+ <head>
5
+ <meta charset="utf-8">
6
+ <title>StudyBuddy</title>
7
+ <meta name="viewport" content="width=device-width, initial-scale=1">
8
+ <link rel="stylesheet" href="/static/styles.css">
9
+ </head>
10
+ <body>
11
+ <div class="container">
12
+ <header>
13
+ <h1>📚 StudyBuddy</h1>
14
+ <p>Upload your PDFs/DOCX, then chat with your materials. No hallucinations — answers only come from your files.</p>
15
+ </header>
16
+
17
+ <section class="card">
18
+ <h2>1/ Upload materials</h2>
19
+ <form id="upload-form">
20
+ <label>User ID</label>
21
+ <input type="text" id="user_id" placeholder="e.g., user_123" required>
22
+ <label>Files (PDF/DOCX, multiple)</label>
23
+ <input type="file" id="files" multiple accept=".pdf,.docx">
24
+ <button type="submit">Upload</button>
25
+ </form>
26
+ <pre id="upload-log"></pre>
27
+ </section>
28
+
29
+ <section class="card">
30
+ <h2>2/ Ask questions</h2>
31
+ <div id="chat">
32
+ <div id="messages"></div>
33
+ <div class="chat-controls">
34
+ <input type="text" id="question" placeholder="Ask something about your files…">
35
+ <button id="ask">Ask</button>
36
+ </div>
37
+ </div>
38
+ </section>
39
+
40
+ <footer>
41
+ <small>StudyBuddy RAG • FastAPI on Hugging Face Spaces • MongoDB Vector • BLIP captions</small>
42
+ </footer>
43
+ </div>
44
+
45
+ <script src="/static/scripts.js"></script>
46
+ </body>
47
+ </html>
static/script.js ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ────────────────────────────── static/script.js ──────────────────────────────
2
+ const log = (msg) => {
3
+ const el = document.getElementById("upload-log");
4
+ el.textContent += msg + "\n";
5
+ el.scrollTop = el.scrollHeight;
6
+ };
7
+
8
+ // Upload
9
+ document.getElementById("upload-form").addEventListener("submit", async (e) => {
10
+ e.preventDefault();
11
+ const user_id = document.getElementById("user_id").value.trim();
12
+ const files = document.getElementById("files").files;
13
+ if (!user_id || files.length === 0) {
14
+ alert("Provide user id and at least one file.");
15
+ return;
16
+ }
17
+ const fd = new FormData();
18
+ fd.append("user_id", user_id);
19
+ for (let f of files) fd.append("files", f);
20
+
21
+ log("Uploading " + files.length + " file(s)…");
22
+ const res = await fetch("/upload", { method: "POST", body: fd });
23
+ const data = await res.json();
24
+ log("Upload accepted. Job: " + (data.job_id || "?") + " • status: " + (data.status || "?"));
25
+ log("Processing in the background. You can start chatting meanwhile.");
26
+ });
27
+
28
+ // Chat
29
+ document.getElementById("ask").addEventListener("click", async () => {
30
+ const user_id = document.getElementById("user_id").value.trim();
31
+ const q = document.getElementById("question").value.trim();
32
+ if (!user_id || !q) return;
33
+ appendMessage("user", q);
34
+ document.getElementById("question").value = "";
35
+
36
+ const fd = new FormData();
37
+ fd.append("user_id", user_id);
38
+ fd.append("question", q);
39
+ fd.append("k", "6");
40
+
41
+ try {
42
+ const res = await fetch("/chat", { method: "POST", body: fd });
43
+ const data = await res.json();
44
+ appendMessage("assistant", data.answer || "[no answer]");
45
+ if (data.sources && data.sources.length) {
46
+ appendSources(data.sources);
47
+ }
48
+ } catch (e) {
49
+ appendMessage("assistant", "⚠️ Error contacting server.");
50
+ }
51
+ });
52
+
53
+ function appendMessage(role, text) {
54
+ const m = document.createElement("div");
55
+ m.className = "msg " + role;
56
+ m.textContent = text;
57
+ document.getElementById("messages").appendChild(m);
58
+ m.scrollIntoView({ behavior: "smooth", block: "end" });
59
+ }
60
+
61
+ function appendSources(sources) {
62
+ const wrap = document.createElement("div");
63
+ wrap.className = "sources";
64
+ wrap.innerHTML = "<strong>Sources:</strong> " + sources.map(s => {
65
+ const f = s.filename || "unknown";
66
+ const t = s.topic_name ? (" • " + s.topic_name) : "";
67
+ const p = s.page_span ? (" [pp. " + s.page_span.join("-") + "]") : "";
68
+ return `<span class="pill">${f}${t}${p}</span>`;
69
+ }).join(" ");
70
+ document.getElementById("messages").appendChild(wrap);
71
+ wrap.scrollIntoView({ behavior: "smooth", block: "end" });
72
+ }
static/styles.css ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ────────────────────────────── static/styles.css ────────────────────────────── */
2
+ :root {
3
+ --bg: #0b1020;
4
+ --card: #12193a;
5
+ --text: #e6ecff;
6
+ --muted: #9bb0ff;
7
+ --accent: #7aa2ff;
8
+ --pill: #1f2a5c;
9
+ --green: #41d6a5;
10
+ }
11
+
12
+ * { box-sizing: border-box; }
13
+
14
+ body {
15
+ margin: 0;
16
+ font-family: system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, sans-serif;
17
+ color: var(--text);
18
+ background: radial-gradient(1200px 600px at 20% -10%, #18225a, var(--bg));
19
+ }
20
+
21
+ .container {
22
+ max-width: 960px;
23
+ margin: 0 auto;
24
+ padding: 24px;
25
+ }
26
+
27
+ header h1 { margin: 0 0 8px; }
28
+ header p { color: var(--muted); margin: 0 0 16px; }
29
+
30
+ .card {
31
+ background: var(--card);
32
+ border: 1px solid #1f2750;
33
+ border-radius: 16px;
34
+ padding: 16px;
35
+ margin: 16px 0;
36
+ box-shadow: 0 10px 30px rgba(0,0,0,0.25);
37
+ }
38
+
39
+ label { display: block; margin: 8px 0 6px; color: var(--muted); }
40
+ input[type="text"], input[type="file"] {
41
+ width: 100%; padding: 10px 12px; border-radius: 12px; border: 1px solid #2a3570;
42
+ background: #0f1430; color: var(--text);
43
+ }
44
+ button {
45
+ margin-top: 12px;
46
+ background: linear-gradient(135deg, var(--accent), #5bc7ff);
47
+ color: #0a0f25; border: none; border-radius: 12px; padding: 10px 16px; font-weight: 600;
48
+ cursor: pointer;
49
+ }
50
+ button:hover { filter: brightness(1.07); }
51
+
52
+ #upload-log {
53
+ height: 120px; overflow: auto; background: #0f1430; padding: 10px; border-radius: 12px; border: 1px solid #2a3570;
54
+ color: #b9c7ff;
55
+ }
56
+
57
+ #chat { display: flex; flex-direction: column; gap: 12px; }
58
+ #messages {
59
+ height: 300px; overflow: auto; background: #0f1430; padding: 12px; border-radius: 12px; border: 1px solid #2a3570;
60
+ }
61
+ .msg { padding: 10px 12px; border-radius: 12px; margin: 6px 0; max-width: 80%; white-space: pre-wrap; }
62
+ .msg.user { margin-left: auto; background: #173361; }
63
+ .msg.assistant { background: #0f244d; border: 1px solid #243a7a; }
64
+ .sources { margin: 8px 0; }
65
+ .pill { display: inline-block; background: var(--pill); padding: 4px 8px; border-radius: 999px; margin: 2px; color: #cbd6ff; border: 1px solid #304088; }
66
+ footer { text-align: center; color: var(--muted); margin-top: 24px; }
utils/caption.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── utils/caption.py ──────────────────────────────
2
+ from typing import Optional
3
+ from PIL import Image
4
+ import logging
5
+ from .logger import get_logger
6
+
7
+ # Use transformers BLIP base (CPU friendly)
8
+ try:
9
+ from transformers import BlipProcessor, BlipForConditionalGeneration
10
+ except Exception as e:
11
+ BlipProcessor = None
12
+ BlipForConditionalGeneration = None
13
+
14
+ logger = get_logger("CAPTION", __name__)
15
+
16
+
17
+ class BlipCaptioner:
18
+ def __init__(self):
19
+ self._ready = False
20
+ self.processor = None
21
+ self.model = None
22
+
23
+ def _lazy_load(self):
24
+ if self._ready:
25
+ return
26
+ if BlipProcessor is None or BlipForConditionalGeneration is None:
27
+ logger.warning("transformers not available; image captions will be skipped.")
28
+ self._ready = True
29
+ return
30
+ logger.info("Loading BLIP captioner (base)…")
31
+ self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
32
+ self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
33
+ self._ready = True
34
+
35
+ def caption_image(self, image: Image.Image) -> str:
36
+ self._lazy_load()
37
+ if self.processor is None or self.model is None:
38
+ return ""
39
+ inputs = self.processor(images=image, return_tensors="pt")
40
+ out = self.model.generate(**inputs, max_new_tokens=40)
41
+ return self.processor.decode(out[0], skip_special_tokens=True).strip()
utils/chunker.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── utils/chunker.py ──────────────────────────────
2
+ import re
3
+ from typing import List, Dict, Any
4
+ from .summarizer import cheap_summarize
5
+ from .common import split_sentences, slugify
6
+ from .logger import get_logger
7
+
8
+ # Heuristic "semantic" chunker:
9
+ # - Split by headings / numbered sections if present
10
+ # - Ensure each chunk ~ 300-600 words (configurable)
11
+ # - Generate a short summary + topic name
12
+
13
+ MAX_WORDS = 500
14
+ MIN_WORDS = 150
15
+ logger = get_logger("CHUNKER", __name__)
16
+
17
+ def _by_headings(text: str):
18
+ # split on markdown-like or outline headings
19
+ pattern = r"(?m)^(#{1,6}\s.*|[0-9]+\.\s+[^\n]+|[A-Z][A-Za-z0-9\s\-]{2,}\n[-=]{3,})\s*$"
20
+ parts = []
21
+ last = 0
22
+ for m in re.finditer(pattern, text):
23
+ start = m.start()
24
+ if start > last:
25
+ parts.append(text[last:start])
26
+ parts.append(text[start:m.end()])
27
+ last = m.end()
28
+ if last < len(text):
29
+ parts.append(text[last:])
30
+ if not parts:
31
+ parts = [text]
32
+ return parts
33
+
34
+
35
+ def build_cards_from_pages(pages: List[Dict[str, Any]], filename: str, user_id: str) -> List[Dict[str, Any]]:
36
+ # Concatenate pages but keep page spans for metadata
37
+ full = ""
38
+ page_markers = []
39
+ for p in pages:
40
+ start = len(full)
41
+ full += f"\n\n[[Page {p['page_num']}]]\n{p.get('text','').strip()}\n"
42
+ page_markers.append((p['page_num'], start, len(full)))
43
+
44
+ # First split by headings
45
+ coarse = _by_headings(full)
46
+
47
+ # Then pack into 150-500 word chunks
48
+ cards = []
49
+ buf = []
50
+ buf_words = 0
51
+ start_idx = 0
52
+ for block in coarse:
53
+ words = block.split()
54
+ if not words:
55
+ continue
56
+ if buf_words + len(words) > MAX_WORDS and buf_words >= MIN_WORDS:
57
+ cards.append(" ".join(buf))
58
+ buf, buf_words = [], 0
59
+ start_idx = len(" ".join(coarse[:coarse.index(block)])) # approximate
60
+ buf.extend(words)
61
+ buf_words += len(words)
62
+ if buf_words > 0:
63
+ cards.append(" ".join(buf))
64
+
65
+ # Build card dicts
66
+ out = []
67
+ for i, content in enumerate(cards, 1):
68
+ topic = cheap_summarize(content, max_sentences=1)
69
+ if not topic:
70
+ topic = content[:80] + "..."
71
+ summary = cheap_summarize(content, max_sentences=3)
72
+ # Estimate page span
73
+ first_page = pages[0]['page_num'] if pages else 1
74
+ last_page = pages[-1]['page_num'] if pages else 1
75
+ out.append({
76
+ "user_id": user_id,
77
+ "filename": filename,
78
+ "topic_name": topic[:120],
79
+ "summary": summary,
80
+ "content": content,
81
+ "page_span": [first_page, last_page],
82
+ "card_id": f"{slugify(filename)}-c{i:04d}"
83
+ })
84
+ logger.info(f"Built {len(out)} cards from {len(pages)} pages for {filename}")
85
+ return out
utils/common.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import unicodedata
3
+ from .logger import get_logger
4
+
5
+ logger = get_logger("COMMON", __name__)
6
+
7
+ def split_sentences(text: str):
8
+ return re.split(r"(?<=[\.\!\?])\s+", text.strip())
9
+
10
+ def slugify(value: str):
11
+ value = str(value)
12
+ value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
13
+ value = re.sub(r"[^\w\s-]", "", value).strip().lower()
14
+ return re.sub(r"[-\s]+", "-", value)
15
+
16
+ def trim_text(s: str, n: int):
17
+ s = s or ""
18
+ if len(s) <= n:
19
+ return s
20
+ return s[:n] + "…"
utils/embeddings.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── utils/embeddings.py ──────────────────────────────
2
+ import os
3
+ from typing import List
4
+ import numpy as np
5
+ import logging
6
+ from .logger import get_logger
7
+
8
+ try:
9
+ from sentence_transformers import SentenceTransformer
10
+ except Exception:
11
+ SentenceTransformer = None
12
+
13
+
14
+ logger = get_logger("EMBED", __name__)
15
+
16
+
17
+ class EmbeddingClient:
18
+ def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
19
+ self.model_name = model_name
20
+ self.model = None
21
+
22
+ def _lazy(self):
23
+ if self.model is None and SentenceTransformer is not None:
24
+ logger.info(f"Loading embedding model: {self.model_name}")
25
+ self.model = SentenceTransformer(self.model_name)
26
+
27
+ def embed(self, texts: List[str]) -> List[list]:
28
+ self._lazy()
29
+ if self.model is None:
30
+ # Fallback: extremely naive hashing -> NOT for production, but keeps code running without deps
31
+ logger.warning("SentenceTransformer unavailable; using random fallback embeddings.")
32
+ return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
33
+ vecs = self.model.encode(texts, show_progress_bar=False, normalize_embeddings=True)
34
+ return [v.tolist() for v in vecs]
utils/logger.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from typing import Optional
4
+
5
+
6
+ _DEFAULT_FORMAT = "%(asctime)s %(levelname)s %(message)s"
7
+
8
+
9
+ def _ensure_root_handler() -> None:
10
+ root_logger = logging.getLogger()
11
+ if root_logger.handlers:
12
+ return
13
+ handler = logging.StreamHandler(stream=sys.stdout)
14
+ formatter = logging.Formatter(_DEFAULT_FORMAT)
15
+ handler.setFormatter(formatter)
16
+ root_logger.addHandler(handler)
17
+ root_logger.setLevel(logging.INFO)
18
+
19
+
20
+ class _TaggedAdapter(logging.LoggerAdapter):
21
+ def process(self, msg, kwargs):
22
+ tag = self.extra.get("tag", "")
23
+ if tag and not str(msg).startswith(tag):
24
+ msg = f"{tag} {msg}"
25
+ return msg, kwargs
26
+
27
+
28
+ def get_logger(tag: str, name: Optional[str] = None) -> logging.Logger:
29
+ """
30
+ Return a logger that injects a [TAG] prefix into records.
31
+ Example: logger = get_logger("APP") → logs like: [APP] message
32
+ """
33
+ _ensure_root_handler()
34
+ logger_name = name or __name__
35
+ base = logging.getLogger(logger_name)
36
+ return _TaggedAdapter(base, {"tag": f"[{tag}]"})
37
+
38
+
utils/parser.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from typing import List, Dict, Any
3
+ import fitz # PyMuPDF
4
+ from docx import Document
5
+ from PIL import Image
6
+ import numpy as np
7
+ from .logger import get_logger
8
+
9
+ logger = get_logger("PARSER", __name__)
10
+
11
+
12
+ def parse_pdf_bytes(b: bytes) -> List[Dict[str, Any]]:
13
+ """
14
+ Returns list of pages, each {'page_num': i, 'text': str, 'images': [PIL.Image]}
15
+ """
16
+ pages = []
17
+ with fitz.open(stream=b, filetype="pdf") as doc:
18
+ for i, page in enumerate(doc):
19
+ text = page.get_text("text")
20
+ images = []
21
+ for img in page.get_images(full=True):
22
+ xref = img[0]
23
+ pix = fitz.Pixmap(doc, xref)
24
+ if pix.n - pix.alpha >= 4: # CMYK
25
+ pix = fitz.Pixmap(fitz.csRGB, pix)
26
+ im = Image.frombytes("RGBA" if pix.alpha else "RGB", (pix.width, pix.height), pix.samples)
27
+ images.append(im.convert("RGB"))
28
+ pix = None
29
+ pages.append({"page_num": i + 1, "text": text, "images": images})
30
+ logger.info(f"Parsed PDF with {len(pages)} pages")
31
+ return pages
32
+
33
+
34
+ def parse_docx_bytes(b: bytes) -> List[Dict[str, Any]]:
35
+ f = io.BytesIO(b)
36
+ doc = Document(f)
37
+ text = []
38
+ images = []
39
+ for rel in doc.part.rels.values():
40
+ if "image" in rel.reltype:
41
+ data = rel.target_part.blob
42
+ try:
43
+ im = Image.open(io.BytesIO(data)).convert("RGB")
44
+ images.append(im)
45
+ except Exception:
46
+ pass
47
+ for p in doc.paragraphs:
48
+ text.append(p.text)
49
+ pages = [{"page_num": 1, "text": "\n".join(text), "images": images}]
50
+ logger.info("Parsed DOCX into single concatenated page")
51
+ return pages
52
+
53
+
utils/rag.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── utils/rag.py ──────────────────────────────
2
+ import os
3
+ import math
4
+ from typing import List, Dict, Any, Optional
5
+ from pymongo import MongoClient, ASCENDING, TEXT
6
+ from pymongo.collection import Collection
7
+ from pymongo.errors import PyMongoError
8
+ import numpy as np
9
+ from .logger import get_logger
10
+
11
+ VECTOR_DIM = 384 # all-MiniLM-L6-v2
12
+ INDEX_NAME = os.getenv("MONGO_VECTOR_INDEX", "vector_index")
13
+ USE_ATLAS_VECTOR = os.getenv("ATLAS_VECTOR", "0") == "1"
14
+ logger = get_logger("RAG", __name__)
15
+
16
+
17
+
18
+ class RAGStore:
19
+ def __init__(self, mongo_uri: str, db_name: str = "studybuddy"):
20
+ self.client = MongoClient(mongo_uri)
21
+ self.db = self.client[db_name]
22
+ self.chunks: Collection = self.db["chunks"]
23
+ self.files: Collection = self.db["files"]
24
+
25
+ # ── Write ────────────────────────────────────────────────────────────────
26
+ def store_cards(self, cards: List[Dict[str, Any]]):
27
+ if not cards:
28
+ return
29
+ for c in cards:
30
+ # basic validation
31
+ emb = c.get("embedding")
32
+ if not emb or len(emb) != VECTOR_DIM:
33
+ raise ValueError("Invalid embedding length; expected %d" % VECTOR_DIM)
34
+ self.chunks.insert_many(cards, ordered=False)
35
+ logger.info(f"Inserted {len(cards)} cards into MongoDB")
36
+
37
+ def upsert_file_summary(self, user_id: str, filename: str, summary: str):
38
+ self.files.update_one(
39
+ {"user_id": user_id, "filename": filename},
40
+ {"$set": {"summary": summary}},
41
+ upsert=True
42
+ )
43
+ logger.info(f"Upserted summary for {filename} (user {user_id})")
44
+
45
+ # ── Read ────────────────────────────────────────────────────────────────
46
+ def list_cards(self, user_id: str, filename: Optional[str], limit: int, skip: int):
47
+ q = {"user_id": user_id}
48
+ if filename:
49
+ q["filename"] = filename
50
+ cur = self.chunks.find(q, {"embedding": 0}).skip(skip).limit(limit).sort([("_id", ASCENDING)])
51
+ return list(cur)
52
+
53
+ def list_files(self, user_id: str) -> List[Dict[str, Any]]:
54
+ cur = self.files.find({"user_id": user_id}, {"_id": 0})
55
+ return list(cur)
56
+
57
+ def get_file_summary(self, user_id: str, filename: str):
58
+ return self.files.find_one({"user_id": user_id, "filename": filename})
59
+
60
+ def vector_search(self, user_id: str, query_vector: List[float], k: int = 6, filenames: Optional[List[str]] = None):
61
+ if USE_ATLAS_VECTOR:
62
+ # Atlas Vector Search (requires pre-created index on 'embedding')
63
+ pipeline = [
64
+ {
65
+ "$search": {
66
+ "index": INDEX_NAME,
67
+ "knnBeta": {
68
+ "vector": query_vector,
69
+ "path": "embedding",
70
+ "k": k,
71
+ },
72
+ "filter": {"equals": {"path": "user_id", "value": user_id}},
73
+ }
74
+ },
75
+ {"$project": {"embedding": 0, "score": {"$meta": "searchScore"}, "doc": "$$ROOT"}},
76
+ ]
77
+ if filenames:
78
+ pipeline.append({"$match": {"doc.filename": {"$in": filenames}}})
79
+ pipeline.append({"$limit": k})
80
+ hits = list(self.chunks.aggregate(pipeline))
81
+ return [{"doc": h["doc"], "score": h["score"]} for h in hits]
82
+ # Fallback: scan limited sample and compute cosine locally
83
+ else:
84
+ q = {"user_id": user_id}
85
+ # Apply filename filter if provided
86
+ if filenames:
87
+ q["filename"] = {"$in": filenames}
88
+ # Scan limited sample and compute cosine locally
89
+ sample = list(self.chunks.find(q).limit(max(2000, k*10)))
90
+ # If no sample, return empty list
91
+ if not sample:
92
+ return []
93
+ # Compute cosine similarity for each sample
94
+ qv = np.array(query_vector, dtype="float32")
95
+ scores = []
96
+ # Compute cosine similarity for each sample
97
+ for d in sample:
98
+ v = np.array(d.get("embedding", [0]*VECTOR_DIM), dtype="float32")
99
+ denom = (np.linalg.norm(qv) * np.linalg.norm(v)) or 1.0
100
+ sim = float(np.dot(qv, v) / denom)
101
+ scores.append((sim, d))
102
+ # Sort scores by cosine similarity in descending order
103
+ scores.sort(key=lambda x: x[0], reverse=True)
104
+ # Get top k sc ores
105
+ top = scores[:k]
106
+ # Log the results
107
+ logger.info(f"Vector search sample={len(sample)} returned top={len(top)}")
108
+ return [{"doc": d, "score": s} for (s, d) in top]
109
+
110
+
111
+ def ensure_indexes(store: RAGStore):
112
+ # Basic text index for fallback keyword search (optional)
113
+ try:
114
+ store.chunks.create_index([("user_id", ASCENDING), ("filename", ASCENDING)])
115
+ store.chunks.create_index([("content", TEXT), ("topic_name", TEXT), ("summary", TEXT)], name="text_idx")
116
+ store.files.create_index([("user_id", ASCENDING), ("filename", ASCENDING)], unique=True)
117
+ except PyMongoError as e:
118
+ logger.warning(f"Index creation warning: {e}")
119
+ # Note: For Atlas Vector, create an Atlas Search index named INDEX_NAME on field "embedding" with vector options.
120
+ # Example (in Atlas UI):
121
+ # {
122
+ # "mappings": {
123
+ # "dynamic": false,
124
+ # "fields": {
125
+ # "embedding": {
126
+ # "type": "knnVector",
127
+ # "dimensions": 384,
128
+ # "similarity": "cosine"
129
+ # }
130
+ # }
131
+ # }
132
+ # }
utils/rotator.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── utils/rotator.py ──────────────────────────────
2
+ import os
3
+ import itertools
4
+ import logging
5
+ from .logger import get_logger
6
+ from typing import Optional
7
+
8
+ import httpx
9
+
10
+ logger = get_logger("ROTATOR", __name__)
11
+
12
+
13
+ class APIKeyRotator:
14
+ """
15
+ Round-robin API key rotator.
16
+ - Loads keys from env vars with given prefix (e.g., GEMINI_API_1..5)
17
+ - get_key() returns current key
18
+ - rotate() moves to next key
19
+ - on HTTP 401/429/5xx you should call rotate() and retry (bounded)
20
+ """
21
+ def __init__(self, prefix: str, max_slots: int = 5):
22
+ self.keys = []
23
+ for i in range(1, max_slots + 1):
24
+ v = os.getenv(f"{prefix}{i}")
25
+ if v:
26
+ self.keys.append(v.strip())
27
+ if not self.keys:
28
+ logger.warning(f"No API keys found for prefix {prefix}. Calls will likely fail.")
29
+ self._cycle = itertools.cycle([""])
30
+ else:
31
+ self._cycle = itertools.cycle(self.keys)
32
+ self.current = next(self._cycle)
33
+
34
+ def get_key(self) -> Optional[str]:
35
+ return self.current
36
+
37
+ def rotate(self) -> Optional[str]:
38
+ self.current = next(self._cycle)
39
+ logger.info("Rotated API key.")
40
+ return self.current
41
+
42
+
43
+ async def robust_post_json(url: str, headers: dict, payload: dict, rotator: APIKeyRotator, max_retries: int = 5):
44
+ """
45
+ POST JSON with simple retry+rotate on 401/403/429/5xx.
46
+ Returns json response.
47
+ """
48
+ for attempt in range(max_retries):
49
+ try:
50
+ async with httpx.AsyncClient(timeout=60) as client:
51
+ r = await client.post(url, headers=headers, json=payload)
52
+ if r.status_code in (401, 403, 429) or (500 <= r.status_code < 600):
53
+ logger.warning(f"HTTP {r.status_code} from provider. Rotating key and retrying ({attempt+1}/{max_retries})")
54
+ rotator.rotate()
55
+ continue
56
+ r.raise_for_status()
57
+ return r.json()
58
+ except Exception as e:
59
+ logger.warning(f"Request error: {e}. Rotating and retrying ({attempt+1}/{max_retries})")
60
+ rotator.rotate()
61
+ raise RuntimeError("Provider request failed after retries.")
utils/router.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── utils/router.py ──────────────────────────────
2
+ import os
3
+ import logging
4
+ from .logger import get_logger
5
+ from typing import Dict, Any
6
+ from .rotator import robust_post_json, APIKeyRotator
7
+
8
+ logger = get_logger("ROUTER", __name__)
9
+
10
+ # Default model names (can be overridden via env)
11
+ GEMINI_SMALL = os.getenv("GEMINI_SMALL", "gemini-2.5-flash-lite")
12
+ GEMINI_MED = os.getenv("GEMINI_MED", "gemini-2.5-flash")
13
+ GEMINI_PRO = os.getenv("GEMINI_PRO", "gemini-2.5-pro")
14
+
15
+ # NVIDIA small default (can be override)
16
+ NVIDIA_SMALL = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct") # example; adjust to your NIM catalog
17
+
18
+ def select_model(question: str, context: str) -> Dict[str, Any]:
19
+ """
20
+ Very lightweight complexity heuristic:
21
+ - If long question or lots of context -> MED/PRO
22
+ - If code/math keywords -> PRO
23
+ - Else SMALL
24
+ Prefers NVIDIA small when question is short/simple (cost-awareness).
25
+ """
26
+ qlen = len(question.split())
27
+ clen = len(context.split())
28
+ hard_keywords = ("prove", "derivation", "complexity", "algorithm", "optimize", "theorem", "rigorous", "step-by-step", "policy critique", "ambiguity", "counterfactual")
29
+ is_hard = any(k in question.lower() for k in hard_keywords) or qlen > 60 or clen > 1600
30
+
31
+ if is_hard:
32
+ # Use Gemini Pro (larger context)
33
+ return {"provider": "gemini", "model": GEMINI_PRO}
34
+ elif qlen > 25 or clen > 900:
35
+ return {"provider": "gemini", "model": GEMINI_MED}
36
+ else:
37
+ # Prefer NVIDIA small for cheap/light
38
+ return {"provider": "nvidia", "model": NVIDIA_SMALL}
39
+
40
+
41
+ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: str, user_prompt: str,
42
+ gemini_rotator: APIKeyRotator, nvidia_rotator: APIKeyRotator) -> str:
43
+ provider = selection["provider"]
44
+ model = selection["model"]
45
+
46
+ if provider == "gemini":
47
+ key = gemini_rotator.get_key() or ""
48
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={key}"
49
+ payload = {
50
+ "contents": [
51
+ {"role": "user", "parts": [{"text": f"{system_prompt}\n\n{user_prompt}"}]}
52
+ ],
53
+ "generationConfig": {"temperature": 0.2}
54
+ }
55
+ headers = {"Content-Type": "application/json"}
56
+ data = await robust_post_json(url, headers, payload, gemini_rotator)
57
+ try:
58
+ return data["candidates"][0]["content"]["parts"][0]["text"]
59
+ except Exception:
60
+ logger.warning(f"Unexpected Gemini response: {data}")
61
+ return "I couldn't parse the model response."
62
+
63
+ elif provider == "nvidia":
64
+ # Many NVIDIA endpoints are OpenAI-compatible. Adjust if using a different path.
65
+ key = nvidia_rotator.get_key() or ""
66
+ url = "https://integrate.api.nvidia.com/v1/chat/completions"
67
+ payload = {
68
+ "model": model,
69
+ "temperature": 0.2,
70
+ "messages": [
71
+ {"role": "system", "content": system_prompt},
72
+ {"role": "user", "content": user_prompt},
73
+ ]
74
+ }
75
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
76
+ data = await robust_post_json(url, headers, payload, nvidia_rotator)
77
+ try:
78
+ return data["choices"][0]["message"]["content"]
79
+ except Exception:
80
+ logger.warning(f"Unexpected NVIDIA response: {data}")
81
+ return "I couldn't parse the model response."
82
+
83
+ return "Unsupported provider."
utils/summarizer.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from sumy.parsers.plaintext import PlaintextParser
3
+ from sumy.nlp.tokenizers import Tokenizer
4
+ from sumy.summarizers.lex_rank import LexRankSummarizer
5
+ from .logger import get_logger
6
+
7
+ logger = get_logger("SUM", __name__)
8
+
9
+ def cheap_summarize(text: str, max_sentences: int = 3) -> str:
10
+ try:
11
+ parser = PlaintextParser.from_string(text, Tokenizer("english"))
12
+ summarizer = LexRankSummarizer()
13
+ sentences = summarizer(parser.document, max_sentences)
14
+ return " ".join(str(s) for s in sentences)
15
+ except Exception:
16
+ # Fallback: naive first N sentences
17
+ logger.warning("sumy unavailable or failed; using naive summarization fallback.")
18
+ parts = text.split(". ")
19
+ return ". ".join(parts[:max_sentences])
warmup.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import torch
3
+ import os
4
+
5
+ print("🚀 Warming up model...")
6
+ embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
7
+
8
+ # Some CPU backends on HF Spaces fail on .half(); make it configurable
9
+ USE_HALF = os.getenv("EMBEDDING_HALF", "1") == "1"
10
+ try:
11
+ if USE_HALF and torch.cuda.is_available():
12
+ embedding_model = embedding_model.half()
13
+ except Exception as e:
14
+ print(f"⚠️ Skipping half precision due to: {e}")
15
+
16
+ embedding_model.to(torch.device("cpu"))
17
+ print("✅ Model warm-up complete!")