Spaces:
Sleeping
Sleeping
Commit
·
729a1f7
0
Parent(s):
Initial commit
Browse files- .gitignore +1 -0
- Dockerfile +50 -0
- LICENSE.txt +201 -0
- README.md +253 -0
- app.py +303 -0
- dw_model.py +49 -0
- memo/history.py +134 -0
- memo/memory.py +32 -0
- requirements.txt +13 -0
- static/index.html +47 -0
- static/script.js +72 -0
- static/styles.css +66 -0
- utils/caption.py +41 -0
- utils/chunker.py +85 -0
- utils/common.py +20 -0
- utils/embeddings.py +34 -0
- utils/logger.py +38 -0
- utils/parser.py +53 -0
- utils/rag.py +132 -0
- utils/rotator.py +61 -0
- utils/router.py +83 -0
- utils/summarizer.py +19 -0
- warmup.py +17 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
Dockerfile
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces - Docker
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 5 |
+
ENV PYTHONUNBUFFERED=1
|
| 6 |
+
|
| 7 |
+
# System deps
|
| 8 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 9 |
+
build-essential curl git libglib2.0-0 libgl1 \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Create and use a non-root user
|
| 13 |
+
RUN useradd -m -u 1000 user
|
| 14 |
+
USER user
|
| 15 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 16 |
+
|
| 17 |
+
# Set working directory
|
| 18 |
+
WORKDIR /app
|
| 19 |
+
|
| 20 |
+
# Copy project files
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
# Install Python dependencies
|
| 24 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 25 |
+
|
| 26 |
+
# Hugging Face cache directories
|
| 27 |
+
ENV HF_HOME="/home/user/.cache/huggingface"
|
| 28 |
+
ENV SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/huggingface/sentence-transformers"
|
| 29 |
+
ENV MEDGEMMA_HOME="/home/user/.cache/huggingface/sentence-transformers"
|
| 30 |
+
|
| 31 |
+
# Create cache directories and set permissions
|
| 32 |
+
RUN mkdir -p /app/model_cache /home/user/.cache/huggingface/sentence-transformers && \
|
| 33 |
+
chown -R user:user /app/model_cache /home/user/.cache/huggingface
|
| 34 |
+
|
| 35 |
+
# Control preloading flags
|
| 36 |
+
ENV PRELOAD_TRANSLATORS="0"
|
| 37 |
+
ENV EMBEDDING_HALF="0"
|
| 38 |
+
|
| 39 |
+
# Preload embedding model and warmup
|
| 40 |
+
RUN python /app/dw_model.py && python /app/warmup.py
|
| 41 |
+
|
| 42 |
+
# Ensure ownership stays correct
|
| 43 |
+
RUN chown -R user:user /app/model_cache
|
| 44 |
+
|
| 45 |
+
# Expose port for HF Spaces
|
| 46 |
+
ENV PORT=7860
|
| 47 |
+
EXPOSE 7860
|
| 48 |
+
|
| 49 |
+
# Start FastAPI
|
| 50 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
LICENSE.txt
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [2025] [Dang Khoa Le]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
README.md
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: EdSummariser
|
| 3 |
+
emoji: 📚
|
| 4 |
+
colorFrom: inigo
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
sdk_version: latest
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: Ed-Assistant summary your learning journey with Agentic RAG
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
### StudyBuddy RAG
|
| 14 |
+
|
| 15 |
+
An end-to-end RAG (Retrieval-Augmented Generation) app for studying from your own documents. Upload PDF/DOCX files, the app extracts text and images, captions images, chunks into semantic "cards", embeds and stores them in MongoDB, and serves a chat endpoint that answers strictly from your uploaded materials. Includes a lightweight chat-memory feature to improve context continuity, cost-aware model routing, and robust provider retries.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- **Document ingestion**: PDF/DOCX parsing (PyMuPDF, python-docx), image extraction and BLIP-based captions
|
| 20 |
+
- **Semantic chunking**: heuristic heading/size-based chunker
|
| 21 |
+
- **Embeddings**: Sentence-Transformers (all-MiniLM-L6-v2 by default) with random fallback when unavailable
|
| 22 |
+
- **Vector search**: MongoDB Atlas Vector Search (optional) or local cosine fallback
|
| 23 |
+
- **RAG chat**: cost-aware routing between Gemini and NVIDIA endpoints
|
| 24 |
+
- **Chat memory**: per-user LRU of recent QA summaries; history and semantic retrieval to augment context
|
| 25 |
+
- **Summarization**: cheap extractive summaries via sumy with naive fallback
|
| 26 |
+
- **Centralized logging**: tagged loggers per module, e.g., [APP], [RAG], [CHUNKER]
|
| 27 |
+
- **Simple UI**: static frontend under `static/`
|
| 28 |
+
|
| 29 |
+
## Prerequisites
|
| 30 |
+
|
| 31 |
+
- Python 3.10+
|
| 32 |
+
- MongoDB instance (local or Atlas). Collections are created automatically
|
| 33 |
+
- Optional: NVIDIA and/or Gemini API keys for model calls
|
| 34 |
+
- Optional but recommended: a virtual environment
|
| 35 |
+
|
| 36 |
+
## Project Structure
|
| 37 |
+
|
| 38 |
+
```text
|
| 39 |
+
app.py # FastAPI app, routes, background ingestion, chat
|
| 40 |
+
utils/logger.py # Centralized tagged logger
|
| 41 |
+
utils/parser.py # PDF/DOCX parsing and image extraction
|
| 42 |
+
utils/caption.py # BLIP image captioning (transformers)
|
| 43 |
+
utils/chunker.py # Heuristic chunk builder
|
| 44 |
+
utils/embeddings.py # Embedding client (Sentence-Transformers)
|
| 45 |
+
utils/rag.py # Mongo-backed store and vector search
|
| 46 |
+
utils/rotator.py # API key rotator + robust HTTP POST helper
|
| 47 |
+
utils/router.py # Model selection + LLM invocation helpers
|
| 48 |
+
utils/summarizer.py # sumy-based extractive summarizer
|
| 49 |
+
utils/common.py # small helpers
|
| 50 |
+
memo/memory.py # per-user LRU memory store
|
| 51 |
+
memo/history.py # history relevance + semantic helpers
|
| 52 |
+
static/ # minimal frontend (index.html, script.js, styles.css)
|
| 53 |
+
Dockerfile # container image
|
| 54 |
+
requirements.txt # Python dependencies
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## Quickstart (Local)
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
python -m venv .venv && source .venv/bin/activate
|
| 61 |
+
pip install -r requirements.txt
|
| 62 |
+
export MONGO_URI="mongodb://localhost:27017"
|
| 63 |
+
uvicorn app:app --reload
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
Open UI: `http://localhost:8000/static/`
|
| 67 |
+
|
| 68 |
+
Health: `http://localhost:8000/healthz`
|
| 69 |
+
|
| 70 |
+
## Configuration
|
| 71 |
+
|
| 72 |
+
Environment variables:
|
| 73 |
+
|
| 74 |
+
- **MONGO_URI**: MongoDB connection string (required)
|
| 75 |
+
- **MONGO_DB**: MongoDB database name (default: studybuddy)
|
| 76 |
+
- **ATLAS_VECTOR**: set to "1" to enable Atlas Vector Search, else local cosine (default: 0)
|
| 77 |
+
- **MONGO_VECTOR_INDEX**: Atlas Search index name for vectors (default: vector_index)
|
| 78 |
+
- **EMBED_MODEL**: sentence-transformers model name (default: sentence-transformers/all-MiniLM-L6-v2)
|
| 79 |
+
- **GEMINI_API_1..5**: Gemini API keys for rotation
|
| 80 |
+
- **NVIDIA_API_1..5**: NVIDIA API keys for rotation
|
| 81 |
+
- **GEMINI_SMALL, GEMINI_MED, GEMINI_PRO**: override default Gemini models
|
| 82 |
+
- **NVIDIA_SMALL**: override default NVIDIA small model
|
| 83 |
+
- Optional logging controls: use process env like `PYTHONWARNINGS=ignore` and manage verbosity per logger if needed
|
| 84 |
+
|
| 85 |
+
Logging: Logs are sent to stdout at INFO level, tagged per module, e.g., `[APP]`, `[RAG]`. See `utils/logger.py`.
|
| 86 |
+
|
| 87 |
+
## Running (Local)
|
| 88 |
+
|
| 89 |
+
```bash
|
| 90 |
+
export MONGO_URI="mongodb://localhost:27017" # or Atlas URI
|
| 91 |
+
uvicorn app:app --reload --workers 1 --host 0.0.0.0 --port 8000
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
Open the UI: `http://localhost:8000/static/`
|
| 95 |
+
|
| 96 |
+
Health check: `http://localhost:8000/healthz`
|
| 97 |
+
|
| 98 |
+
## Running (Docker)
|
| 99 |
+
|
| 100 |
+
Build and run:
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
docker build -t studybuddy-rag .
|
| 104 |
+
docker run --rm -p 8000:8000 \
|
| 105 |
+
-e MONGO_URI="<your-mongo-uri>" \
|
| 106 |
+
-e MONGO_DB="studybuddy" \
|
| 107 |
+
-e NVIDIA_API_1="<nvidia-key>" \
|
| 108 |
+
-e GEMINI_API_1="<gemini-key>" \
|
| 109 |
+
studybuddy-rag
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
For production, consider `--restart unless-stopped` and setting `--env ATLAS_VECTOR=1` if using Atlas Vector Search.
|
| 113 |
+
|
| 114 |
+
## API Overview
|
| 115 |
+
|
| 116 |
+
- GET `/` → serves `static/index.html`
|
| 117 |
+
- POST `/upload` (multipart form-data)
|
| 118 |
+
- fields: `user_id` (str), `files` (one or more PDF/DOCX)
|
| 119 |
+
- response: `{ job_id, status: "processing" }`; ingestion proceeds in background
|
| 120 |
+
- GET `/cards`
|
| 121 |
+
- params: `user_id` (str), `filename` (optional), `limit` (int), `skip` (int)
|
| 122 |
+
- returns stored cards without embeddings
|
| 123 |
+
- GET `/file-summary`
|
| 124 |
+
- params: `user_id`, `filename`
|
| 125 |
+
- returns `{ filename, summary }`
|
| 126 |
+
- POST `/chat` (form-urlencoded)
|
| 127 |
+
- fields: `user_id`, `question`, `k` (int, default 6)
|
| 128 |
+
- logic:
|
| 129 |
+
- If question matches "what is <file> about?": returns file summary
|
| 130 |
+
- Else: classify relevant files via NVIDIA, augment with chat memory context, run vector search (restricted to relevant files if any), select model, generate answer, store QA summary in LRU
|
| 131 |
+
- returns `{ answer, sources }` (and `relevant_files` when no hits)
|
| 132 |
+
|
| 133 |
+
Example cURL:
|
| 134 |
+
|
| 135 |
+
```bash
|
| 136 |
+
curl -X POST http://localhost:8000/chat \
|
| 137 |
+
-H 'Content-Type: application/x-www-form-urlencoded' \
|
| 138 |
+
-d 'user_id=user1' \
|
| 139 |
+
--data-urlencode 'question=Summarize reinforcement learning from the uploaded notes.'
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
Upload example:
|
| 143 |
+
|
| 144 |
+
```bash
|
| 145 |
+
curl -X POST http://localhost:8000/upload \
|
| 146 |
+
-H 'Content-Type: multipart/form-data' \
|
| 147 |
+
-F 'user_id=user1' \
|
| 148 |
+
-F 'files=@/path/to/file1.pdf' \
|
| 149 |
+
-F 'files=@/path/to/file2.docx'
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
List cards:
|
| 153 |
+
|
| 154 |
+
```bash
|
| 155 |
+
curl 'http://localhost:8000/cards?user_id=user1&limit=10'
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
## MongoDB Atlas Vector Index (optional)
|
| 159 |
+
|
| 160 |
+
If using Atlas Vector Search, create an index (UI or API) similar to:
|
| 161 |
+
|
| 162 |
+
```json
|
| 163 |
+
{
|
| 164 |
+
"mappings": {
|
| 165 |
+
"dynamic": false,
|
| 166 |
+
"fields": {
|
| 167 |
+
"embedding": {
|
| 168 |
+
"type": "knnVector",
|
| 169 |
+
"dimensions": 384,
|
| 170 |
+
"similarity": "cosine"
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
Set `ATLAS_VECTOR=1` and `MONGO_VECTOR_INDEX` accordingly.
|
| 178 |
+
|
| 179 |
+
Schema overview:
|
| 180 |
+
|
| 181 |
+
- Collection `chunks` (per card):
|
| 182 |
+
- `user_id` (str), `filename` (str), `topic_name` (str), `summary` (str), `content` (str)
|
| 183 |
+
- `page_span` ([int, int])
|
| 184 |
+
- `card_id` (slug + sequence)
|
| 185 |
+
- `embedding` (float[384])
|
| 186 |
+
- Collection `files` (per file):
|
| 187 |
+
- `user_id` (str), `filename` (str), `summary` (str)
|
| 188 |
+
|
| 189 |
+
## Notes on Models and Keys
|
| 190 |
+
|
| 191 |
+
- NVIDIA and Gemini calls use a simple key rotator. Provide one or more keys via `NVIDIA_API_1..5`, `GEMINI_API_1..5`.
|
| 192 |
+
- The app is defensive: if embeddings or summarization models are unavailable, it falls back to naive strategies to keep the app responsive (with reduced quality).
|
| 193 |
+
|
| 194 |
+
## Logging and Observability
|
| 195 |
+
|
| 196 |
+
- Logs are tagged by module via `utils/logger.py`:
|
| 197 |
+
- [APP] app lifecycle, ingestion, chat flow
|
| 198 |
+
- [RAG] storage, vector search
|
| 199 |
+
- [EMBED] embedding model loads and fallbacks
|
| 200 |
+
- [CAPTION] BLIP model loads and captioning
|
| 201 |
+
- [ROUTER]/[ROTATOR] model routing and retry/rotation events
|
| 202 |
+
- [CHUNKER]/[SUM]/[COMMON]/[PARSER] module-specific messages
|
| 203 |
+
- Change verbosity by setting the root logger level in code if needed
|
| 204 |
+
|
| 205 |
+
## Performance and Cost Tips
|
| 206 |
+
|
| 207 |
+
- Disable image captioning if CPU-bound by short-circuiting in `utils/caption.py` (return "")
|
| 208 |
+
- Use smaller `k` in `/chat` for fewer chunks
|
| 209 |
+
- Prefer NVIDIA_SMALL for simple questions (already default via router)
|
| 210 |
+
- If Atlas Vector is unavailable, local cosine search samples up to 2000 docs; tune in `utils/rag.py`
|
| 211 |
+
- Run with `--workers` and consider a process manager for production
|
| 212 |
+
|
| 213 |
+
## Security Notes
|
| 214 |
+
|
| 215 |
+
- CORS is currently open (`allow_origins=["*"]`) for simplicity. Restrict in production
|
| 216 |
+
- Validate and limit upload sizes at the reverse proxy (e.g., nginx) or add checks in `/upload`
|
| 217 |
+
- Secrets are passed via environment; avoid committing them
|
| 218 |
+
|
| 219 |
+
## Troubleshooting
|
| 220 |
+
|
| 221 |
+
- Missing Python packages: install via `pip install -r requirements.txt`.
|
| 222 |
+
- Ingestion stalls: check `[APP]` logs; large files and image captioning (BLIP) can be slow on CPU.
|
| 223 |
+
- No vector hits:
|
| 224 |
+
- Ensure documents were embedded and stored (see `[RAG] Inserted ... cards` logs)
|
| 225 |
+
- Verify `MONGO_URI` and collection contents
|
| 226 |
+
- If Atlas Vector is on, confirm index exists and `ATLAS_VECTOR=1`
|
| 227 |
+
- NVIDIA/Gemini errors: see `[ROUTER]`/`[ROTATOR]` logs; key rotation retries transient errors.
|
| 228 |
+
- PIL/transformers/torch issues on ARM Macs: ensure correct torch build or disable captioning
|
| 229 |
+
- PyMuPDF font warnings: generally safe to ignore; upgrade PyMuPDF if needed
|
| 230 |
+
|
| 231 |
+
## Development
|
| 232 |
+
|
| 233 |
+
- Code style: straightforward, explicit names, tagged logging
|
| 234 |
+
- Frontend: simple static site in `static/`
|
| 235 |
+
- Extend chunking/embeddings or swap providers by editing modules in `utils/`
|
| 236 |
+
- Optional Makefile targets you can add:
|
| 237 |
+
|
| 238 |
+
```Makefile
|
| 239 |
+
run:
|
| 240 |
+
uvicorn app:app --reload
|
| 241 |
+
|
| 242 |
+
docker-build:
|
| 243 |
+
docker build -t studybuddy-rag .
|
| 244 |
+
|
| 245 |
+
docker-run:
|
| 246 |
+
docker run --rm -p 8000:8000 -e MONGO_URI="mongodb://host.docker.internal:27017" studybuddy-rag
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
## License
|
| 250 |
+
|
| 251 |
+
MIT (or your preferred license). Replace this section if needed.
|
| 252 |
+
|
| 253 |
+
|
app.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, io, re, uuid, json, time, logging
|
| 2 |
+
from typing import List, Dict, Any, Optional
|
| 3 |
+
|
| 4 |
+
from fastapi import FastAPI, UploadFile, File, Form, Request, HTTPException, BackgroundTasks
|
| 5 |
+
from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
|
| 6 |
+
from fastapi.staticfiles import StaticFiles
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
|
| 9 |
+
from utils.rotator import APIKeyRotator
|
| 10 |
+
from utils.parser import parse_pdf_bytes, parse_docx_bytes
|
| 11 |
+
from utils.caption import BlipCaptioner
|
| 12 |
+
from utils.chunker import build_cards_from_pages
|
| 13 |
+
from utils.embeddings import EmbeddingClient
|
| 14 |
+
from utils.rag import RAGStore, ensure_indexes
|
| 15 |
+
from utils.router import select_model, generate_answer_with_model
|
| 16 |
+
from utils.summarizer import cheap_summarize
|
| 17 |
+
from utils.common import trim_text
|
| 18 |
+
from utils.logger import get_logger
|
| 19 |
+
|
| 20 |
+
# ────────────────────────────── App Setup ──────────────────────────────
|
| 21 |
+
logger = get_logger("APP", name="studybuddy")
|
| 22 |
+
|
| 23 |
+
app = FastAPI(title="StudyBuddy RAG", version="0.1.0")
|
| 24 |
+
app.add_middleware(
|
| 25 |
+
CORSMiddleware,
|
| 26 |
+
allow_origins=["*"],
|
| 27 |
+
allow_credentials=True,
|
| 28 |
+
allow_methods=["*"],
|
| 29 |
+
allow_headers=["*"],
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Serve static files (index.html, scripts.js, styles.css)
|
| 33 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ────────────────────────────── Global Clients ──────────────────────────────
|
| 37 |
+
# API rotators (round robin + auto failover on quota errors)
|
| 38 |
+
gemini_rotator = APIKeyRotator(prefix="GEMINI_API_", max_slots=5)
|
| 39 |
+
nvidia_rotator = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)
|
| 40 |
+
|
| 41 |
+
# Captioner + Embeddings (lazy init inside classes)
|
| 42 |
+
captioner = BlipCaptioner()
|
| 43 |
+
embedder = EmbeddingClient(model_name=os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2"))
|
| 44 |
+
|
| 45 |
+
# Mongo / RAG store
|
| 46 |
+
rag = RAGStore(mongo_uri=os.getenv("MONGO_URI"), db_name=os.getenv("MONGO_DB", "studybuddy"))
|
| 47 |
+
ensure_indexes(rag)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# ────────────────────────────── Helpers ──────────────────────────────
|
| 51 |
+
def _infer_mime(filename: str) -> str:
|
| 52 |
+
lower = filename.lower()
|
| 53 |
+
if lower.endswith(".pdf"):
|
| 54 |
+
return "application/pdf"
|
| 55 |
+
if lower.endswith(".docx"):
|
| 56 |
+
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 57 |
+
return "application/octet-stream"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _extract_pages(filename: str, file_bytes: bytes) -> List[Dict[str, Any]]:
|
| 61 |
+
mime = _infer_mime(filename)
|
| 62 |
+
if mime == "application/pdf":
|
| 63 |
+
return parse_pdf_bytes(file_bytes)
|
| 64 |
+
elif mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
| 65 |
+
return parse_docx_bytes(file_bytes)
|
| 66 |
+
else:
|
| 67 |
+
raise HTTPException(status_code=400, detail=f"Unsupported file type: {filename}")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# ────────────────────────────── Routes ──────────────────────────────
|
| 71 |
+
@app.get("/", response_class=HTMLResponse)
|
| 72 |
+
def index():
|
| 73 |
+
index_path = os.path.join("static", "index.html")
|
| 74 |
+
if not os.path.exists(index_path):
|
| 75 |
+
return HTMLResponse("<h1>StudyBuddy RAG</h1><p>Static files not found.</p>")
|
| 76 |
+
return FileResponse(index_path)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@app.post("/upload")
|
| 80 |
+
async def upload_files(
|
| 81 |
+
request: Request,
|
| 82 |
+
background_tasks: BackgroundTasks,
|
| 83 |
+
user_id: str = Form(...),
|
| 84 |
+
files: List[UploadFile] = File(...),
|
| 85 |
+
):
|
| 86 |
+
"""
|
| 87 |
+
Ingest many files: PDF/DOCX.
|
| 88 |
+
Steps:
|
| 89 |
+
1) Extract text & images
|
| 90 |
+
2) Caption images (BLIP base, CPU ok)
|
| 91 |
+
3) Merge captions into page text
|
| 92 |
+
4) Chunk into semantic cards (topic_name, summary, content + metadata)
|
| 93 |
+
5) Embed with all-MiniLM-L6-v2
|
| 94 |
+
6) Store in MongoDB with per-user and per-filename metadata
|
| 95 |
+
7) Create a file-level summary
|
| 96 |
+
"""
|
| 97 |
+
job_id = str(uuid.uuid4())
|
| 98 |
+
# Read file bytes upfront to avoid reading from closed streams in background task
|
| 99 |
+
preloaded_files = []
|
| 100 |
+
for uf in files:
|
| 101 |
+
raw = await uf.read()
|
| 102 |
+
preloaded_files.append((uf.filename, raw))
|
| 103 |
+
# Process files in background
|
| 104 |
+
async def _process():
|
| 105 |
+
total_cards = 0
|
| 106 |
+
file_summaries = []
|
| 107 |
+
for fname, raw in preloaded_files:
|
| 108 |
+
logger.info(f"[{job_id}] Parsing {fname} ({len(raw)} bytes)")
|
| 109 |
+
# Extract pages from file
|
| 110 |
+
pages = _extract_pages(fname, raw)
|
| 111 |
+
# Caption images per page (if any)
|
| 112 |
+
num_imgs = sum(len(p.get("images", [])) for p in pages)
|
| 113 |
+
captions = []
|
| 114 |
+
if num_imgs > 0:
|
| 115 |
+
for p in pages:
|
| 116 |
+
caps = []
|
| 117 |
+
for im in p.get("images", []):
|
| 118 |
+
try:
|
| 119 |
+
cap = captioner.caption_image(im)
|
| 120 |
+
caps.append(cap)
|
| 121 |
+
except Exception as e:
|
| 122 |
+
logger.warning(f"Caption error: {e}")
|
| 123 |
+
captions.append(caps)
|
| 124 |
+
else:
|
| 125 |
+
captions = [[] for _ in pages]
|
| 126 |
+
# Merge captions into text
|
| 127 |
+
for idx, p in enumerate(pages):
|
| 128 |
+
if captions[idx]:
|
| 129 |
+
p["text"] = (p.get("text", "") + "\n\n" + "\n".join([f"[Image] {c}" for c in captions[idx]])).strip()
|
| 130 |
+
# Build cards
|
| 131 |
+
cards = build_cards_from_pages(pages, filename=fname, user_id=user_id)
|
| 132 |
+
logger.info(f"[{job_id}] Built {len(cards)} cards for {fname}")
|
| 133 |
+
# Embed & store
|
| 134 |
+
embeddings = embedder.embed([c["content"] for c in cards])
|
| 135 |
+
for c, vec in zip(cards, embeddings):
|
| 136 |
+
c["embedding"] = vec
|
| 137 |
+
# Store cards in MongoDB on card
|
| 138 |
+
rag.store_cards(cards)
|
| 139 |
+
total_cards += len(cards)
|
| 140 |
+
# File-level summary (cheap extractive)
|
| 141 |
+
full_text = "\n\n".join(p.get("text", "") for p in pages)
|
| 142 |
+
file_summary = cheap_summarize(full_text, max_sentences=6)
|
| 143 |
+
rag.upsert_file_summary(user_id=user_id, filename=fname, summary=file_summary)
|
| 144 |
+
file_summaries.append({"filename": fname, "summary": file_summary})
|
| 145 |
+
logger.info(f"[{job_id}] Ingestion complete. Total cards: {total_cards}")
|
| 146 |
+
# Kick off processing in background to keep UI responsive
|
| 147 |
+
background_tasks.add_task(_process)
|
| 148 |
+
return {"job_id": job_id, "status": "processing"}
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
@app.get("/cards")
|
| 152 |
+
def list_cards(user_id: str, filename: Optional[str] = None, limit: int = 50, skip: int = 0):
|
| 153 |
+
return rag.list_cards(user_id=user_id, filename=filename, limit=limit, skip=skip)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
@app.get("/file-summary")
|
| 157 |
+
def get_file_summary(user_id: str, filename: str):
|
| 158 |
+
doc = rag.get_file_summary(user_id=user_id, filename=filename)
|
| 159 |
+
if not doc:
|
| 160 |
+
raise HTTPException(404, detail="No summary found for that file.")
|
| 161 |
+
return {"filename": filename, "summary": doc.get("summary", "")}
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
@app.post("/chat")
|
| 165 |
+
async def chat(user_id: str = Form(...), question: str = Form(...), k: int = Form(6)):
|
| 166 |
+
"""
|
| 167 |
+
RAG chat that answers ONLY from uploaded materials.
|
| 168 |
+
- Preload all filenames + summaries; use NVIDIA to classify file relevance to question (true/false)
|
| 169 |
+
- Restrict vector search to relevant files (fall back to all if none)
|
| 170 |
+
- Bring in recent chat memory: last 3 via NVIDIA relevance; remaining 17 via semantic search
|
| 171 |
+
- After answering, summarize (q,a) via NVIDIA and store into LRU (last 20)
|
| 172 |
+
"""
|
| 173 |
+
from memo.memory import MemoryLRU
|
| 174 |
+
from memo.history import summarize_qa_with_nvidia, files_relevance, related_recent_and_semantic_context
|
| 175 |
+
from utils.router import NVIDIA_SMALL # reuse default name
|
| 176 |
+
memory = app.state.__dict__.setdefault("memory_lru", MemoryLRU())
|
| 177 |
+
|
| 178 |
+
# 0) If question is about a specific file, return the file summary
|
| 179 |
+
m = re.search(r"what\s+is\s+the\s+(.+?\.(pdf|docx))\s+about\??", question, re.IGNORECASE)
|
| 180 |
+
# If the question is about a specific file, return the file summary
|
| 181 |
+
if m:
|
| 182 |
+
fn = m.group(1)
|
| 183 |
+
doc = rag.get_file_summary(user_id=user_id, filename=fn)
|
| 184 |
+
if doc:
|
| 185 |
+
return {"answer": doc.get("summary", ""), "sources": [{"filename": fn, "file_summary": True}]}
|
| 186 |
+
else:
|
| 187 |
+
return {"answer": "I couldn't find a summary for that file in your library.", "sources": []}
|
| 188 |
+
|
| 189 |
+
# 1) Preload file list + summaries
|
| 190 |
+
files_list = rag.list_files(user_id=user_id) # [{filename, summary}]
|
| 191 |
+
# Ask NVIDIA to mark relevance per file
|
| 192 |
+
relevant_map = await files_relevance(question, files_list, nvidia_rotator)
|
| 193 |
+
relevant_files = [fn for fn, ok in relevant_map.items() if ok]
|
| 194 |
+
|
| 195 |
+
# 2) Memory context: recent 3 via NVIDIA, remaining 17 via semantic
|
| 196 |
+
# recent 3 related (we do a simple include-all; NVIDIA will prune by "related" selection using the same mechanism as files_relevance but here handled in history)
|
| 197 |
+
recent_related, semantic_related = await related_recent_and_semantic_context(user_id, question, memory, embedder)
|
| 198 |
+
# For recent_related (empty placeholder), do NVIDIA pruning now:
|
| 199 |
+
recent3 = memory.recent(user_id, 3)
|
| 200 |
+
if recent3:
|
| 201 |
+
sys = "Pick only items that directly relate to the new question. Output the selected items verbatim, no commentary. If none, output nothing."
|
| 202 |
+
numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
|
| 203 |
+
user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
|
| 204 |
+
try:
|
| 205 |
+
from utils.rotator import robust_post_json
|
| 206 |
+
key = nvidia_rotator.get_key()
|
| 207 |
+
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 208 |
+
payload = {
|
| 209 |
+
"model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
|
| 210 |
+
"temperature": 0.0,
|
| 211 |
+
"messages": [
|
| 212 |
+
{"role": "system", "content": sys},
|
| 213 |
+
{"role": "user", "content": user},
|
| 214 |
+
]
|
| 215 |
+
}
|
| 216 |
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key or ''}"}
|
| 217 |
+
data = await robust_post_json(url, headers, payload, nvidia_rotator)
|
| 218 |
+
recent_related = data["choices"][0]["message"]["content"].strip()
|
| 219 |
+
except Exception as e:
|
| 220 |
+
logger.warning(f"Recent-related NVIDIA error: {e}")
|
| 221 |
+
recent_related = ""
|
| 222 |
+
|
| 223 |
+
# 3) RAG vector search (restricted to relevant files if any)
|
| 224 |
+
q_vec = embedder.embed([question])[0]
|
| 225 |
+
hits = rag.vector_search(user_id=user_id, query_vector=q_vec, k=k, filenames=relevant_files if relevant_files else None)
|
| 226 |
+
if not hits:
|
| 227 |
+
return {
|
| 228 |
+
"answer": "I don't know based on your uploaded materials. Try uploading more sources or rephrasing the question.",
|
| 229 |
+
"sources": [],
|
| 230 |
+
"relevant_files": relevant_files
|
| 231 |
+
}
|
| 232 |
+
# Compose context
|
| 233 |
+
contexts = []
|
| 234 |
+
sources_meta = []
|
| 235 |
+
for h in hits:
|
| 236 |
+
doc = h["doc"]
|
| 237 |
+
score = h["score"]
|
| 238 |
+
contexts.append(f"[{doc.get('topic_name','Topic')}] {trim_text(doc.get('content',''), 1200)}")
|
| 239 |
+
sources_meta.append({
|
| 240 |
+
"filename": doc.get("filename"),
|
| 241 |
+
"topic_name": doc.get("topic_name"),
|
| 242 |
+
"page_span": doc.get("page_span"),
|
| 243 |
+
"score": float(score),
|
| 244 |
+
"chunk_id": str(doc.get("_id", ""))
|
| 245 |
+
})
|
| 246 |
+
context_text = "\n\n---\n\n".join(contexts)
|
| 247 |
+
|
| 248 |
+
# Add file-level summaries for relevant files
|
| 249 |
+
file_summary_block = ""
|
| 250 |
+
if relevant_files:
|
| 251 |
+
fsum_map = {f["filename"]: f.get("summary","") for f in files_list}
|
| 252 |
+
lines = [f"[{fn}] {fsum_map.get(fn, '')}" for fn in relevant_files]
|
| 253 |
+
file_summary_block = "\n".join(lines)
|
| 254 |
+
|
| 255 |
+
# Guardrail instruction to avoid hallucination
|
| 256 |
+
system_prompt = (
|
| 257 |
+
"You are a careful study assistant. Answer strictly using the given CONTEXT.\n"
|
| 258 |
+
"If the answer isn't in the context, say 'I don't know based on the provided materials.'\n"
|
| 259 |
+
"Write concise, clear explanations with citations like (source: filename, topic).\n"
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
# Add recent chat context and historical similarity context
|
| 263 |
+
history_block = ""
|
| 264 |
+
if recent_related or semantic_related:
|
| 265 |
+
history_block = "RECENT_CHAT_CONTEXT:\n" + (recent_related or "") + ("\n\nHISTORICAL_SIMILARITY_CONTEXT:\n" + semantic_related if semantic_related else "")
|
| 266 |
+
composed_context = ""
|
| 267 |
+
if history_block:
|
| 268 |
+
composed_context += history_block + "\n\n"
|
| 269 |
+
if file_summary_block:
|
| 270 |
+
composed_context += "FILE_SUMMARIES:\n" + file_summary_block + "\n\n"
|
| 271 |
+
composed_context += "DOC_CONTEXT:\n" + context_text
|
| 272 |
+
|
| 273 |
+
# Compose user prompt
|
| 274 |
+
user_prompt = f"QUESTION:\n{question}\n\nCONTEXT:\n{composed_context}"
|
| 275 |
+
# Choose model (cost-aware)
|
| 276 |
+
selection = select_model(question=question, context=composed_context)
|
| 277 |
+
logger.info(f"Model selection: {selection}")
|
| 278 |
+
# Generate answer with model
|
| 279 |
+
try:
|
| 280 |
+
answer = await generate_answer_with_model(
|
| 281 |
+
selection=selection,
|
| 282 |
+
system_prompt=system_prompt,
|
| 283 |
+
user_prompt=user_prompt,
|
| 284 |
+
gemini_rotator=gemini_rotator,
|
| 285 |
+
nvidia_rotator=nvidia_rotator
|
| 286 |
+
)
|
| 287 |
+
except Exception as e:
|
| 288 |
+
logger.error(f"LLM error: {e}")
|
| 289 |
+
answer = "I had trouble contacting the language model provider just now. Please try again."
|
| 290 |
+
# After answering: summarize QA and store in memory (LRU, last 20)
|
| 291 |
+
try:
|
| 292 |
+
qa_sum = await summarize_qa_with_nvidia(question, answer, nvidia_rotator)
|
| 293 |
+
memory.add(user_id, qa_sum)
|
| 294 |
+
except Exception as e:
|
| 295 |
+
logger.warning(f"QA summarize/store failed: {e}")
|
| 296 |
+
# Trim for logging
|
| 297 |
+
logger.info("LLM answer (trimmed): %s", trim_text(answer, 200).replace("\n", " "))
|
| 298 |
+
return {"answer": answer, "sources": sources_meta}
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
@app.get("/healthz")
|
| 302 |
+
def health():
|
| 303 |
+
return {"ok": True}
|
dw_model.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# dw_model.py
|
| 2 |
+
### --- A. transformer and embedder ---
|
| 3 |
+
import os
|
| 4 |
+
import shutil
|
| 5 |
+
from huggingface_hub import snapshot_download
|
| 6 |
+
|
| 7 |
+
# Set up paths
|
| 8 |
+
MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
|
| 9 |
+
MODEL_CACHE_DIR = "/app/model_cache"
|
| 10 |
+
HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")
|
| 11 |
+
|
| 12 |
+
print("⏳ Downloading the SentenceTransformer model...")
|
| 13 |
+
# Download directly into /app/model_cache to avoid duplicating files from HF cache
|
| 14 |
+
model_path = snapshot_download(
|
| 15 |
+
repo_id=MODEL_REPO,
|
| 16 |
+
cache_dir=HF_CACHE_DIR, # Store HF cache in user cache dir
|
| 17 |
+
local_dir=MODEL_CACHE_DIR, # Place usable model here
|
| 18 |
+
local_dir_use_symlinks=False # Copy files into local_dir (no symlinks)
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
print("Model path: ", model_path)
|
| 22 |
+
if not os.path.exists(MODEL_CACHE_DIR):
|
| 23 |
+
os.makedirs(MODEL_CACHE_DIR)
|
| 24 |
+
|
| 25 |
+
# Verify structure after moving
|
| 26 |
+
print("\n📂 LLM Model Structure (Build Level):")
|
| 27 |
+
for root, dirs, files in os.walk(MODEL_CACHE_DIR):
|
| 28 |
+
print(f"📁 {root}/")
|
| 29 |
+
for file in files:
|
| 30 |
+
print(f" 📄 {file}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
### --- B. translation modules ---
|
| 34 |
+
# Optional pre-download of translation models. These can be very large and
|
| 35 |
+
# may exceed build storage limits on constrained environments (e.g., HF Spaces).
|
| 36 |
+
# Control with env var PRELOAD_TRANSLATORS ("1" to enable; default: disabled).
|
| 37 |
+
PRELOAD_TRANSLATORS = os.getenv("PRELOAD_TRANSLATORS", "0")
|
| 38 |
+
if PRELOAD_TRANSLATORS == "1":
|
| 39 |
+
try:
|
| 40 |
+
from transformers import pipeline
|
| 41 |
+
print("⏬ Pre-downloading Vietnamese–English translator...")
|
| 42 |
+
_ = pipeline("translation", model="VietAI/envit5-translation", src_lang="vi", tgt_lang="en", device=-1)
|
| 43 |
+
print("⏬ Pre-downloading Chinese–English translator...")
|
| 44 |
+
_ = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=-1)
|
| 45 |
+
print("✅ Translators preloaded.")
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"⚠️ Skipping translator preload due to error: {e}")
|
| 48 |
+
else:
|
| 49 |
+
print("ℹ️ Skipping translator pre-download (PRELOAD_TRANSLATORS != '1'). They will lazy-load at runtime.")
|
memo/history.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── memo/history.py ──────────────────────────────
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
from typing import List, Dict, Any, Tuple
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
from utils.logger import get_logger
|
| 9 |
+
from utils.rotator import robust_post_json
|
| 10 |
+
from utils.embeddings import EmbeddingClient
|
| 11 |
+
|
| 12 |
+
logger = get_logger("RAG", __name__)
|
| 13 |
+
|
| 14 |
+
NVIDIA_SMALL = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
|
| 15 |
+
|
| 16 |
+
async def _nvidia_chat(system_prompt: str, user_prompt: str, nvidia_key: str, rotator) -> str:
|
| 17 |
+
"""
|
| 18 |
+
Minimal NVIDIA Chat call that enforces no-comment concise outputs.
|
| 19 |
+
"""
|
| 20 |
+
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 21 |
+
payload = {
|
| 22 |
+
"model": NVIDIA_SMALL,
|
| 23 |
+
"temperature": 0.0,
|
| 24 |
+
"messages": [
|
| 25 |
+
{"role": "system", "content": system_prompt},
|
| 26 |
+
{"role": "user", "content": user_prompt},
|
| 27 |
+
]
|
| 28 |
+
}
|
| 29 |
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {nvidia_key or ''}"}
|
| 30 |
+
data = None
|
| 31 |
+
try:
|
| 32 |
+
data = await robust_post_json(url, headers, payload, rotator)
|
| 33 |
+
return data["choices"][0]["message"]["content"]
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logger.warning(f"NVIDIA chat error: {e} • response: {data}")
|
| 36 |
+
return ""
|
| 37 |
+
|
| 38 |
+
def _safe_json(s: str) -> Any:
|
| 39 |
+
try:
|
| 40 |
+
return json.loads(s)
|
| 41 |
+
except Exception:
|
| 42 |
+
# Try to extract a JSON object from text
|
| 43 |
+
start = s.find("{")
|
| 44 |
+
end = s.rfind("}")
|
| 45 |
+
if start != -1 and end != -1 and end > start:
|
| 46 |
+
try:
|
| 47 |
+
return json.loads(s[start:end+1])
|
| 48 |
+
except Exception:
|
| 49 |
+
return {}
|
| 50 |
+
return {}
|
| 51 |
+
|
| 52 |
+
async def summarize_qa_with_nvidia(question: str, answer: str, rotator) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Returns a single line block:
|
| 55 |
+
q: <concise>\na: <concise>
|
| 56 |
+
No extra commentary.
|
| 57 |
+
"""
|
| 58 |
+
sys = "You are a terse summarizer. Output exactly two lines:\nq: <short question summary>\na: <short answer summary>\nNo extra text."
|
| 59 |
+
user = f"Question:\n{question}\n\nAnswer:\n{answer}"
|
| 60 |
+
key = rotator.get_key()
|
| 61 |
+
out = await _nvidia_chat(sys, user, key, rotator)
|
| 62 |
+
# Basic guard if the model returns extra prose
|
| 63 |
+
lines = [ln.strip() for ln in out.splitlines() if ln.strip()]
|
| 64 |
+
ql = next((l for l in lines if l.lower().startswith('q:')), None)
|
| 65 |
+
al = next((l for l in lines if l.lower().startswith('a:')), None)
|
| 66 |
+
if not ql or not al:
|
| 67 |
+
# Fallback truncate
|
| 68 |
+
ql = "q: " + (question.strip()[:160] + ("…" if len(question.strip()) > 160 else ""))
|
| 69 |
+
al = "a: " + (answer.strip()[:220] + ("…" if len(answer.strip()) > 220 else ""))
|
| 70 |
+
return f"{ql}\n{al}"
|
| 71 |
+
|
| 72 |
+
async def files_relevance(question: str, file_summaries: List[Dict[str, str]], rotator) -> Dict[str, bool]:
|
| 73 |
+
"""
|
| 74 |
+
Ask NVIDIA model to mark each file as relevant (true) or not (false) for the question.
|
| 75 |
+
Returns {filename: bool}
|
| 76 |
+
"""
|
| 77 |
+
sys = "You classify file relevance. Return STRICT JSON only with shape {\"relevance\":[{\"filename\":\"...\",\"relevant\":true|false}]}."
|
| 78 |
+
items = [{"filename": f["filename"], "summary": f.get("summary","")} for f in file_summaries]
|
| 79 |
+
user = f"Question: {question}\n\nFiles:\n{json.dumps(items, ensure_ascii=False)}\n\nReturn JSON only."
|
| 80 |
+
key = rotator.get_key()
|
| 81 |
+
out = await _nvidia_chat(sys, user, key, rotator)
|
| 82 |
+
data = _safe_json(out) or {}
|
| 83 |
+
rels = {}
|
| 84 |
+
for row in data.get("relevance", []):
|
| 85 |
+
fn = row.get("filename")
|
| 86 |
+
rv = row.get("relevant")
|
| 87 |
+
if isinstance(fn, str) and isinstance(rv, bool):
|
| 88 |
+
rels[fn] = rv
|
| 89 |
+
# If parsing failed, default to considering all files possibly relevant
|
| 90 |
+
if not rels and file_summaries:
|
| 91 |
+
rels = {f["filename"]: True for f in file_summaries}
|
| 92 |
+
return rels
|
| 93 |
+
|
| 94 |
+
def _cosine(a: np.ndarray, b: np.ndarray) -> float:
|
| 95 |
+
denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
|
| 96 |
+
return float(np.dot(a, b) / denom)
|
| 97 |
+
|
| 98 |
+
def _as_text(block: str) -> str:
|
| 99 |
+
return block.strip()
|
| 100 |
+
|
| 101 |
+
async def related_recent_and_semantic_context(user_id: str, question: str, memory, embedder: EmbeddingClient, topk_sem: int = 3) -> Tuple[str, str]:
|
| 102 |
+
"""
|
| 103 |
+
Returns (recent_related_text, semantic_related_text).
|
| 104 |
+
- recent_related_text: NVIDIA checks the last 3 summaries for direct relatedness.
|
| 105 |
+
- semantic_related_text: cosine-sim search over the remaining 17 summaries (top-k).
|
| 106 |
+
"""
|
| 107 |
+
recent3 = memory.recent(user_id, 3)
|
| 108 |
+
rest17 = memory.rest(user_id, 3)
|
| 109 |
+
|
| 110 |
+
recent_text = ""
|
| 111 |
+
if recent3:
|
| 112 |
+
sys = "Pick only items that directly relate to the new question. Output the selected items verbatim, no commentary. If none, output nothing."
|
| 113 |
+
numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
|
| 114 |
+
user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' lines concatenated."
|
| 115 |
+
key = None # We'll let robust_post_json handle rotation via rotator param
|
| 116 |
+
# Use the same nvidia rotator mechanism via a fake call; we'll reconstruct in app with the real rotator passed through
|
| 117 |
+
# Here, we expect the caller to monkey-patch the chat with rotator; to keep it simple, we'll do a tiny trick:
|
| 118 |
+
# The real API call occurs in app with rotator. For here, we return empty and let app request do it. (But to keep module self-contained, we do call with rotator when provided.)
|
| 119 |
+
# However, since this function is called from app and gets the rotator, we'll move NVIDIA call out of here to avoid circular deps.
|
| 120 |
+
|
| 121 |
+
# We'll implement a pure semantic search for rest17 here; recent related will be handled in app using the same prompt.
|
| 122 |
+
|
| 123 |
+
# Semantic over rest17
|
| 124 |
+
sem_text = ""
|
| 125 |
+
if rest17:
|
| 126 |
+
qv = np.array(embedder.embed([question])[0], dtype="float32")
|
| 127 |
+
mats = embedder.embed([_as_text(s) for s in rest17])
|
| 128 |
+
sims = [(_cosine(qv, np.array(v, dtype="float32")), s) for v, s in zip(mats, rest17)]
|
| 129 |
+
sims.sort(key=lambda x: x[0], reverse=True)
|
| 130 |
+
top = [s for (sc, s) in sims[:topk_sem] if sc > 0.15] # small threshold
|
| 131 |
+
if top:
|
| 132 |
+
sem_text = "\n\n".join(top)
|
| 133 |
+
# Return recent empty (to be filled by caller using NVIDIA), and semantic text
|
| 134 |
+
return ("", sem_text)
|
memo/memory.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── memo/memory.py ──────────────────────────────
|
| 2 |
+
from collections import deque, defaultdict
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
|
| 5 |
+
class MemoryLRU:
|
| 6 |
+
"""
|
| 7 |
+
Per-user LRU-like memory of the last N (default 20) summarized chat sessions.
|
| 8 |
+
Each item is a single string in the format: "q: ...\na: ..."
|
| 9 |
+
"""
|
| 10 |
+
def __init__(self, capacity: int = 20):
|
| 11 |
+
self.capacity = capacity
|
| 12 |
+
self._store: Dict[str, deque] = defaultdict(lambda: deque(maxlen=self.capacity))
|
| 13 |
+
|
| 14 |
+
def add(self, user_id: str, qa_summary: str):
|
| 15 |
+
self._store[user_id].append(qa_summary)
|
| 16 |
+
|
| 17 |
+
def recent(self, user_id: str, n: int = 3) -> List[str]:
|
| 18 |
+
d = self._store[user_id]
|
| 19 |
+
if not d:
|
| 20 |
+
return []
|
| 21 |
+
# Return last n in recency order (most recent first)
|
| 22 |
+
return list(d)[-n:][::-1]
|
| 23 |
+
|
| 24 |
+
def rest(self, user_id: str, skip_n: int = 3) -> List[str]:
|
| 25 |
+
d = self._store[user_id]
|
| 26 |
+
if not d:
|
| 27 |
+
return []
|
| 28 |
+
# Everything except the most recent `skip_n`, oldest first
|
| 29 |
+
return list(d)[:-skip_n] if len(d) > skip_n else []
|
| 30 |
+
|
| 31 |
+
def all(self, user_id: str) -> List[str]:
|
| 32 |
+
return list(self._store[user_id])
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi #==0.114.2
|
| 2 |
+
uvicorn[standard] #==0.30.6
|
| 3 |
+
python-multipart #==0.0.9
|
| 4 |
+
pymongo #==4.8.0
|
| 5 |
+
httpx #==0.27.2
|
| 6 |
+
python-docx #==1.1.2
|
| 7 |
+
PyMuPDF #==1.24.10
|
| 8 |
+
pillow #==10.4.0
|
| 9 |
+
transformers #==4.44.2
|
| 10 |
+
torch #==2.4.0
|
| 11 |
+
sentence-transformers#==3.1.1
|
| 12 |
+
sumy #==0.11.0
|
| 13 |
+
numpy #==1.26.4a
|
static/index.html
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- ────────────────────────────── static/index.html ────────────────────────────── -->
|
| 2 |
+
<!doctype html>
|
| 3 |
+
<html lang="en">
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="utf-8">
|
| 6 |
+
<title>StudyBuddy</title>
|
| 7 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 8 |
+
<link rel="stylesheet" href="/static/styles.css">
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<div class="container">
|
| 12 |
+
<header>
|
| 13 |
+
<h1>📚 StudyBuddy</h1>
|
| 14 |
+
<p>Upload your PDFs/DOCX, then chat with your materials. No hallucinations — answers only come from your files.</p>
|
| 15 |
+
</header>
|
| 16 |
+
|
| 17 |
+
<section class="card">
|
| 18 |
+
<h2>1/ Upload materials</h2>
|
| 19 |
+
<form id="upload-form">
|
| 20 |
+
<label>User ID</label>
|
| 21 |
+
<input type="text" id="user_id" placeholder="e.g., user_123" required>
|
| 22 |
+
<label>Files (PDF/DOCX, multiple)</label>
|
| 23 |
+
<input type="file" id="files" multiple accept=".pdf,.docx">
|
| 24 |
+
<button type="submit">Upload</button>
|
| 25 |
+
</form>
|
| 26 |
+
<pre id="upload-log"></pre>
|
| 27 |
+
</section>
|
| 28 |
+
|
| 29 |
+
<section class="card">
|
| 30 |
+
<h2>2/ Ask questions</h2>
|
| 31 |
+
<div id="chat">
|
| 32 |
+
<div id="messages"></div>
|
| 33 |
+
<div class="chat-controls">
|
| 34 |
+
<input type="text" id="question" placeholder="Ask something about your files…">
|
| 35 |
+
<button id="ask">Ask</button>
|
| 36 |
+
</div>
|
| 37 |
+
</div>
|
| 38 |
+
</section>
|
| 39 |
+
|
| 40 |
+
<footer>
|
| 41 |
+
<small>StudyBuddy RAG • FastAPI on Hugging Face Spaces • MongoDB Vector • BLIP captions</small>
|
| 42 |
+
</footer>
|
| 43 |
+
</div>
|
| 44 |
+
|
| 45 |
+
<script src="/static/scripts.js"></script>
|
| 46 |
+
</body>
|
| 47 |
+
</html>
|
static/script.js
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ────────────────────────────── static/script.js ──────────────────────────────
|
| 2 |
+
const log = (msg) => {
|
| 3 |
+
const el = document.getElementById("upload-log");
|
| 4 |
+
el.textContent += msg + "\n";
|
| 5 |
+
el.scrollTop = el.scrollHeight;
|
| 6 |
+
};
|
| 7 |
+
|
| 8 |
+
// Upload
|
| 9 |
+
document.getElementById("upload-form").addEventListener("submit", async (e) => {
|
| 10 |
+
e.preventDefault();
|
| 11 |
+
const user_id = document.getElementById("user_id").value.trim();
|
| 12 |
+
const files = document.getElementById("files").files;
|
| 13 |
+
if (!user_id || files.length === 0) {
|
| 14 |
+
alert("Provide user id and at least one file.");
|
| 15 |
+
return;
|
| 16 |
+
}
|
| 17 |
+
const fd = new FormData();
|
| 18 |
+
fd.append("user_id", user_id);
|
| 19 |
+
for (let f of files) fd.append("files", f);
|
| 20 |
+
|
| 21 |
+
log("Uploading " + files.length + " file(s)…");
|
| 22 |
+
const res = await fetch("/upload", { method: "POST", body: fd });
|
| 23 |
+
const data = await res.json();
|
| 24 |
+
log("Upload accepted. Job: " + (data.job_id || "?") + " • status: " + (data.status || "?"));
|
| 25 |
+
log("Processing in the background. You can start chatting meanwhile.");
|
| 26 |
+
});
|
| 27 |
+
|
| 28 |
+
// Chat
|
| 29 |
+
document.getElementById("ask").addEventListener("click", async () => {
|
| 30 |
+
const user_id = document.getElementById("user_id").value.trim();
|
| 31 |
+
const q = document.getElementById("question").value.trim();
|
| 32 |
+
if (!user_id || !q) return;
|
| 33 |
+
appendMessage("user", q);
|
| 34 |
+
document.getElementById("question").value = "";
|
| 35 |
+
|
| 36 |
+
const fd = new FormData();
|
| 37 |
+
fd.append("user_id", user_id);
|
| 38 |
+
fd.append("question", q);
|
| 39 |
+
fd.append("k", "6");
|
| 40 |
+
|
| 41 |
+
try {
|
| 42 |
+
const res = await fetch("/chat", { method: "POST", body: fd });
|
| 43 |
+
const data = await res.json();
|
| 44 |
+
appendMessage("assistant", data.answer || "[no answer]");
|
| 45 |
+
if (data.sources && data.sources.length) {
|
| 46 |
+
appendSources(data.sources);
|
| 47 |
+
}
|
| 48 |
+
} catch (e) {
|
| 49 |
+
appendMessage("assistant", "⚠️ Error contacting server.");
|
| 50 |
+
}
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
function appendMessage(role, text) {
|
| 54 |
+
const m = document.createElement("div");
|
| 55 |
+
m.className = "msg " + role;
|
| 56 |
+
m.textContent = text;
|
| 57 |
+
document.getElementById("messages").appendChild(m);
|
| 58 |
+
m.scrollIntoView({ behavior: "smooth", block: "end" });
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
function appendSources(sources) {
|
| 62 |
+
const wrap = document.createElement("div");
|
| 63 |
+
wrap.className = "sources";
|
| 64 |
+
wrap.innerHTML = "<strong>Sources:</strong> " + sources.map(s => {
|
| 65 |
+
const f = s.filename || "unknown";
|
| 66 |
+
const t = s.topic_name ? (" • " + s.topic_name) : "";
|
| 67 |
+
const p = s.page_span ? (" [pp. " + s.page_span.join("-") + "]") : "";
|
| 68 |
+
return `<span class="pill">${f}${t}${p}</span>`;
|
| 69 |
+
}).join(" ");
|
| 70 |
+
document.getElementById("messages").appendChild(wrap);
|
| 71 |
+
wrap.scrollIntoView({ behavior: "smooth", block: "end" });
|
| 72 |
+
}
|
static/styles.css
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ────────────────────────────── static/styles.css ────────────────────────────── */
|
| 2 |
+
:root {
|
| 3 |
+
--bg: #0b1020;
|
| 4 |
+
--card: #12193a;
|
| 5 |
+
--text: #e6ecff;
|
| 6 |
+
--muted: #9bb0ff;
|
| 7 |
+
--accent: #7aa2ff;
|
| 8 |
+
--pill: #1f2a5c;
|
| 9 |
+
--green: #41d6a5;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
* { box-sizing: border-box; }
|
| 13 |
+
|
| 14 |
+
body {
|
| 15 |
+
margin: 0;
|
| 16 |
+
font-family: system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, sans-serif;
|
| 17 |
+
color: var(--text);
|
| 18 |
+
background: radial-gradient(1200px 600px at 20% -10%, #18225a, var(--bg));
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.container {
|
| 22 |
+
max-width: 960px;
|
| 23 |
+
margin: 0 auto;
|
| 24 |
+
padding: 24px;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
header h1 { margin: 0 0 8px; }
|
| 28 |
+
header p { color: var(--muted); margin: 0 0 16px; }
|
| 29 |
+
|
| 30 |
+
.card {
|
| 31 |
+
background: var(--card);
|
| 32 |
+
border: 1px solid #1f2750;
|
| 33 |
+
border-radius: 16px;
|
| 34 |
+
padding: 16px;
|
| 35 |
+
margin: 16px 0;
|
| 36 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.25);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
label { display: block; margin: 8px 0 6px; color: var(--muted); }
|
| 40 |
+
input[type="text"], input[type="file"] {
|
| 41 |
+
width: 100%; padding: 10px 12px; border-radius: 12px; border: 1px solid #2a3570;
|
| 42 |
+
background: #0f1430; color: var(--text);
|
| 43 |
+
}
|
| 44 |
+
button {
|
| 45 |
+
margin-top: 12px;
|
| 46 |
+
background: linear-gradient(135deg, var(--accent), #5bc7ff);
|
| 47 |
+
color: #0a0f25; border: none; border-radius: 12px; padding: 10px 16px; font-weight: 600;
|
| 48 |
+
cursor: pointer;
|
| 49 |
+
}
|
| 50 |
+
button:hover { filter: brightness(1.07); }
|
| 51 |
+
|
| 52 |
+
#upload-log {
|
| 53 |
+
height: 120px; overflow: auto; background: #0f1430; padding: 10px; border-radius: 12px; border: 1px solid #2a3570;
|
| 54 |
+
color: #b9c7ff;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
#chat { display: flex; flex-direction: column; gap: 12px; }
|
| 58 |
+
#messages {
|
| 59 |
+
height: 300px; overflow: auto; background: #0f1430; padding: 12px; border-radius: 12px; border: 1px solid #2a3570;
|
| 60 |
+
}
|
| 61 |
+
.msg { padding: 10px 12px; border-radius: 12px; margin: 6px 0; max-width: 80%; white-space: pre-wrap; }
|
| 62 |
+
.msg.user { margin-left: auto; background: #173361; }
|
| 63 |
+
.msg.assistant { background: #0f244d; border: 1px solid #243a7a; }
|
| 64 |
+
.sources { margin: 8px 0; }
|
| 65 |
+
.pill { display: inline-block; background: var(--pill); padding: 4px 8px; border-radius: 999px; margin: 2px; color: #cbd6ff; border: 1px solid #304088; }
|
| 66 |
+
footer { text-align: center; color: var(--muted); margin-top: 24px; }
|
utils/caption.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── utils/caption.py ──────────────────────────────
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import logging
|
| 5 |
+
from .logger import get_logger
|
| 6 |
+
|
| 7 |
+
# Use transformers BLIP base (CPU friendly)
|
| 8 |
+
try:
|
| 9 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 10 |
+
except Exception as e:
|
| 11 |
+
BlipProcessor = None
|
| 12 |
+
BlipForConditionalGeneration = None
|
| 13 |
+
|
| 14 |
+
logger = get_logger("CAPTION", __name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class BlipCaptioner:
|
| 18 |
+
def __init__(self):
|
| 19 |
+
self._ready = False
|
| 20 |
+
self.processor = None
|
| 21 |
+
self.model = None
|
| 22 |
+
|
| 23 |
+
def _lazy_load(self):
|
| 24 |
+
if self._ready:
|
| 25 |
+
return
|
| 26 |
+
if BlipProcessor is None or BlipForConditionalGeneration is None:
|
| 27 |
+
logger.warning("transformers not available; image captions will be skipped.")
|
| 28 |
+
self._ready = True
|
| 29 |
+
return
|
| 30 |
+
logger.info("Loading BLIP captioner (base)…")
|
| 31 |
+
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
| 32 |
+
self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
| 33 |
+
self._ready = True
|
| 34 |
+
|
| 35 |
+
def caption_image(self, image: Image.Image) -> str:
|
| 36 |
+
self._lazy_load()
|
| 37 |
+
if self.processor is None or self.model is None:
|
| 38 |
+
return ""
|
| 39 |
+
inputs = self.processor(images=image, return_tensors="pt")
|
| 40 |
+
out = self.model.generate(**inputs, max_new_tokens=40)
|
| 41 |
+
return self.processor.decode(out[0], skip_special_tokens=True).strip()
|
utils/chunker.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── utils/chunker.py ──────────────────────────────
|
| 2 |
+
import re
|
| 3 |
+
from typing import List, Dict, Any
|
| 4 |
+
from .summarizer import cheap_summarize
|
| 5 |
+
from .common import split_sentences, slugify
|
| 6 |
+
from .logger import get_logger
|
| 7 |
+
|
| 8 |
+
# Heuristic "semantic" chunker:
|
| 9 |
+
# - Split by headings / numbered sections if present
|
| 10 |
+
# - Ensure each chunk ~ 300-600 words (configurable)
|
| 11 |
+
# - Generate a short summary + topic name
|
| 12 |
+
|
| 13 |
+
MAX_WORDS = 500
|
| 14 |
+
MIN_WORDS = 150
|
| 15 |
+
logger = get_logger("CHUNKER", __name__)
|
| 16 |
+
|
| 17 |
+
def _by_headings(text: str):
|
| 18 |
+
# split on markdown-like or outline headings
|
| 19 |
+
pattern = r"(?m)^(#{1,6}\s.*|[0-9]+\.\s+[^\n]+|[A-Z][A-Za-z0-9\s\-]{2,}\n[-=]{3,})\s*$"
|
| 20 |
+
parts = []
|
| 21 |
+
last = 0
|
| 22 |
+
for m in re.finditer(pattern, text):
|
| 23 |
+
start = m.start()
|
| 24 |
+
if start > last:
|
| 25 |
+
parts.append(text[last:start])
|
| 26 |
+
parts.append(text[start:m.end()])
|
| 27 |
+
last = m.end()
|
| 28 |
+
if last < len(text):
|
| 29 |
+
parts.append(text[last:])
|
| 30 |
+
if not parts:
|
| 31 |
+
parts = [text]
|
| 32 |
+
return parts
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def build_cards_from_pages(pages: List[Dict[str, Any]], filename: str, user_id: str) -> List[Dict[str, Any]]:
|
| 36 |
+
# Concatenate pages but keep page spans for metadata
|
| 37 |
+
full = ""
|
| 38 |
+
page_markers = []
|
| 39 |
+
for p in pages:
|
| 40 |
+
start = len(full)
|
| 41 |
+
full += f"\n\n[[Page {p['page_num']}]]\n{p.get('text','').strip()}\n"
|
| 42 |
+
page_markers.append((p['page_num'], start, len(full)))
|
| 43 |
+
|
| 44 |
+
# First split by headings
|
| 45 |
+
coarse = _by_headings(full)
|
| 46 |
+
|
| 47 |
+
# Then pack into 150-500 word chunks
|
| 48 |
+
cards = []
|
| 49 |
+
buf = []
|
| 50 |
+
buf_words = 0
|
| 51 |
+
start_idx = 0
|
| 52 |
+
for block in coarse:
|
| 53 |
+
words = block.split()
|
| 54 |
+
if not words:
|
| 55 |
+
continue
|
| 56 |
+
if buf_words + len(words) > MAX_WORDS and buf_words >= MIN_WORDS:
|
| 57 |
+
cards.append(" ".join(buf))
|
| 58 |
+
buf, buf_words = [], 0
|
| 59 |
+
start_idx = len(" ".join(coarse[:coarse.index(block)])) # approximate
|
| 60 |
+
buf.extend(words)
|
| 61 |
+
buf_words += len(words)
|
| 62 |
+
if buf_words > 0:
|
| 63 |
+
cards.append(" ".join(buf))
|
| 64 |
+
|
| 65 |
+
# Build card dicts
|
| 66 |
+
out = []
|
| 67 |
+
for i, content in enumerate(cards, 1):
|
| 68 |
+
topic = cheap_summarize(content, max_sentences=1)
|
| 69 |
+
if not topic:
|
| 70 |
+
topic = content[:80] + "..."
|
| 71 |
+
summary = cheap_summarize(content, max_sentences=3)
|
| 72 |
+
# Estimate page span
|
| 73 |
+
first_page = pages[0]['page_num'] if pages else 1
|
| 74 |
+
last_page = pages[-1]['page_num'] if pages else 1
|
| 75 |
+
out.append({
|
| 76 |
+
"user_id": user_id,
|
| 77 |
+
"filename": filename,
|
| 78 |
+
"topic_name": topic[:120],
|
| 79 |
+
"summary": summary,
|
| 80 |
+
"content": content,
|
| 81 |
+
"page_span": [first_page, last_page],
|
| 82 |
+
"card_id": f"{slugify(filename)}-c{i:04d}"
|
| 83 |
+
})
|
| 84 |
+
logger.info(f"Built {len(out)} cards from {len(pages)} pages for {filename}")
|
| 85 |
+
return out
|
utils/common.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import unicodedata
|
| 3 |
+
from .logger import get_logger
|
| 4 |
+
|
| 5 |
+
logger = get_logger("COMMON", __name__)
|
| 6 |
+
|
| 7 |
+
def split_sentences(text: str):
|
| 8 |
+
return re.split(r"(?<=[\.\!\?])\s+", text.strip())
|
| 9 |
+
|
| 10 |
+
def slugify(value: str):
|
| 11 |
+
value = str(value)
|
| 12 |
+
value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
|
| 13 |
+
value = re.sub(r"[^\w\s-]", "", value).strip().lower()
|
| 14 |
+
return re.sub(r"[-\s]+", "-", value)
|
| 15 |
+
|
| 16 |
+
def trim_text(s: str, n: int):
|
| 17 |
+
s = s or ""
|
| 18 |
+
if len(s) <= n:
|
| 19 |
+
return s
|
| 20 |
+
return s[:n] + "…"
|
utils/embeddings.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── utils/embeddings.py ──────────────────────────────
|
| 2 |
+
import os
|
| 3 |
+
from typing import List
|
| 4 |
+
import numpy as np
|
| 5 |
+
import logging
|
| 6 |
+
from .logger import get_logger
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
from sentence_transformers import SentenceTransformer
|
| 10 |
+
except Exception:
|
| 11 |
+
SentenceTransformer = None
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
logger = get_logger("EMBED", __name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class EmbeddingClient:
|
| 18 |
+
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
|
| 19 |
+
self.model_name = model_name
|
| 20 |
+
self.model = None
|
| 21 |
+
|
| 22 |
+
def _lazy(self):
|
| 23 |
+
if self.model is None and SentenceTransformer is not None:
|
| 24 |
+
logger.info(f"Loading embedding model: {self.model_name}")
|
| 25 |
+
self.model = SentenceTransformer(self.model_name)
|
| 26 |
+
|
| 27 |
+
def embed(self, texts: List[str]) -> List[list]:
|
| 28 |
+
self._lazy()
|
| 29 |
+
if self.model is None:
|
| 30 |
+
# Fallback: extremely naive hashing -> NOT for production, but keeps code running without deps
|
| 31 |
+
logger.warning("SentenceTransformer unavailable; using random fallback embeddings.")
|
| 32 |
+
return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
|
| 33 |
+
vecs = self.model.encode(texts, show_progress_bar=False, normalize_embeddings=True)
|
| 34 |
+
return [v.tolist() for v in vecs]
|
utils/logger.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
_DEFAULT_FORMAT = "%(asctime)s %(levelname)s %(message)s"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _ensure_root_handler() -> None:
|
| 10 |
+
root_logger = logging.getLogger()
|
| 11 |
+
if root_logger.handlers:
|
| 12 |
+
return
|
| 13 |
+
handler = logging.StreamHandler(stream=sys.stdout)
|
| 14 |
+
formatter = logging.Formatter(_DEFAULT_FORMAT)
|
| 15 |
+
handler.setFormatter(formatter)
|
| 16 |
+
root_logger.addHandler(handler)
|
| 17 |
+
root_logger.setLevel(logging.INFO)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class _TaggedAdapter(logging.LoggerAdapter):
|
| 21 |
+
def process(self, msg, kwargs):
|
| 22 |
+
tag = self.extra.get("tag", "")
|
| 23 |
+
if tag and not str(msg).startswith(tag):
|
| 24 |
+
msg = f"{tag} {msg}"
|
| 25 |
+
return msg, kwargs
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_logger(tag: str, name: Optional[str] = None) -> logging.Logger:
|
| 29 |
+
"""
|
| 30 |
+
Return a logger that injects a [TAG] prefix into records.
|
| 31 |
+
Example: logger = get_logger("APP") → logs like: [APP] message
|
| 32 |
+
"""
|
| 33 |
+
_ensure_root_handler()
|
| 34 |
+
logger_name = name or __name__
|
| 35 |
+
base = logging.getLogger(logger_name)
|
| 36 |
+
return _TaggedAdapter(base, {"tag": f"[{tag}]"})
|
| 37 |
+
|
| 38 |
+
|
utils/parser.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
from typing import List, Dict, Any
|
| 3 |
+
import fitz # PyMuPDF
|
| 4 |
+
from docx import Document
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import numpy as np
|
| 7 |
+
from .logger import get_logger
|
| 8 |
+
|
| 9 |
+
logger = get_logger("PARSER", __name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def parse_pdf_bytes(b: bytes) -> List[Dict[str, Any]]:
|
| 13 |
+
"""
|
| 14 |
+
Returns list of pages, each {'page_num': i, 'text': str, 'images': [PIL.Image]}
|
| 15 |
+
"""
|
| 16 |
+
pages = []
|
| 17 |
+
with fitz.open(stream=b, filetype="pdf") as doc:
|
| 18 |
+
for i, page in enumerate(doc):
|
| 19 |
+
text = page.get_text("text")
|
| 20 |
+
images = []
|
| 21 |
+
for img in page.get_images(full=True):
|
| 22 |
+
xref = img[0]
|
| 23 |
+
pix = fitz.Pixmap(doc, xref)
|
| 24 |
+
if pix.n - pix.alpha >= 4: # CMYK
|
| 25 |
+
pix = fitz.Pixmap(fitz.csRGB, pix)
|
| 26 |
+
im = Image.frombytes("RGBA" if pix.alpha else "RGB", (pix.width, pix.height), pix.samples)
|
| 27 |
+
images.append(im.convert("RGB"))
|
| 28 |
+
pix = None
|
| 29 |
+
pages.append({"page_num": i + 1, "text": text, "images": images})
|
| 30 |
+
logger.info(f"Parsed PDF with {len(pages)} pages")
|
| 31 |
+
return pages
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def parse_docx_bytes(b: bytes) -> List[Dict[str, Any]]:
|
| 35 |
+
f = io.BytesIO(b)
|
| 36 |
+
doc = Document(f)
|
| 37 |
+
text = []
|
| 38 |
+
images = []
|
| 39 |
+
for rel in doc.part.rels.values():
|
| 40 |
+
if "image" in rel.reltype:
|
| 41 |
+
data = rel.target_part.blob
|
| 42 |
+
try:
|
| 43 |
+
im = Image.open(io.BytesIO(data)).convert("RGB")
|
| 44 |
+
images.append(im)
|
| 45 |
+
except Exception:
|
| 46 |
+
pass
|
| 47 |
+
for p in doc.paragraphs:
|
| 48 |
+
text.append(p.text)
|
| 49 |
+
pages = [{"page_num": 1, "text": "\n".join(text), "images": images}]
|
| 50 |
+
logger.info("Parsed DOCX into single concatenated page")
|
| 51 |
+
return pages
|
| 52 |
+
|
| 53 |
+
|
utils/rag.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── utils/rag.py ──────────────────────────────
|
| 2 |
+
import os
|
| 3 |
+
import math
|
| 4 |
+
from typing import List, Dict, Any, Optional
|
| 5 |
+
from pymongo import MongoClient, ASCENDING, TEXT
|
| 6 |
+
from pymongo.collection import Collection
|
| 7 |
+
from pymongo.errors import PyMongoError
|
| 8 |
+
import numpy as np
|
| 9 |
+
from .logger import get_logger
|
| 10 |
+
|
| 11 |
+
VECTOR_DIM = 384 # all-MiniLM-L6-v2
|
| 12 |
+
INDEX_NAME = os.getenv("MONGO_VECTOR_INDEX", "vector_index")
|
| 13 |
+
USE_ATLAS_VECTOR = os.getenv("ATLAS_VECTOR", "0") == "1"
|
| 14 |
+
logger = get_logger("RAG", __name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class RAGStore:
|
| 19 |
+
def __init__(self, mongo_uri: str, db_name: str = "studybuddy"):
|
| 20 |
+
self.client = MongoClient(mongo_uri)
|
| 21 |
+
self.db = self.client[db_name]
|
| 22 |
+
self.chunks: Collection = self.db["chunks"]
|
| 23 |
+
self.files: Collection = self.db["files"]
|
| 24 |
+
|
| 25 |
+
# ── Write ────────────────────────────────────────────────────────────────
|
| 26 |
+
def store_cards(self, cards: List[Dict[str, Any]]):
|
| 27 |
+
if not cards:
|
| 28 |
+
return
|
| 29 |
+
for c in cards:
|
| 30 |
+
# basic validation
|
| 31 |
+
emb = c.get("embedding")
|
| 32 |
+
if not emb or len(emb) != VECTOR_DIM:
|
| 33 |
+
raise ValueError("Invalid embedding length; expected %d" % VECTOR_DIM)
|
| 34 |
+
self.chunks.insert_many(cards, ordered=False)
|
| 35 |
+
logger.info(f"Inserted {len(cards)} cards into MongoDB")
|
| 36 |
+
|
| 37 |
+
def upsert_file_summary(self, user_id: str, filename: str, summary: str):
|
| 38 |
+
self.files.update_one(
|
| 39 |
+
{"user_id": user_id, "filename": filename},
|
| 40 |
+
{"$set": {"summary": summary}},
|
| 41 |
+
upsert=True
|
| 42 |
+
)
|
| 43 |
+
logger.info(f"Upserted summary for {filename} (user {user_id})")
|
| 44 |
+
|
| 45 |
+
# ── Read ────────────────────────────────────────────────────────────────
|
| 46 |
+
def list_cards(self, user_id: str, filename: Optional[str], limit: int, skip: int):
|
| 47 |
+
q = {"user_id": user_id}
|
| 48 |
+
if filename:
|
| 49 |
+
q["filename"] = filename
|
| 50 |
+
cur = self.chunks.find(q, {"embedding": 0}).skip(skip).limit(limit).sort([("_id", ASCENDING)])
|
| 51 |
+
return list(cur)
|
| 52 |
+
|
| 53 |
+
def list_files(self, user_id: str) -> List[Dict[str, Any]]:
|
| 54 |
+
cur = self.files.find({"user_id": user_id}, {"_id": 0})
|
| 55 |
+
return list(cur)
|
| 56 |
+
|
| 57 |
+
def get_file_summary(self, user_id: str, filename: str):
|
| 58 |
+
return self.files.find_one({"user_id": user_id, "filename": filename})
|
| 59 |
+
|
| 60 |
+
def vector_search(self, user_id: str, query_vector: List[float], k: int = 6, filenames: Optional[List[str]] = None):
|
| 61 |
+
if USE_ATLAS_VECTOR:
|
| 62 |
+
# Atlas Vector Search (requires pre-created index on 'embedding')
|
| 63 |
+
pipeline = [
|
| 64 |
+
{
|
| 65 |
+
"$search": {
|
| 66 |
+
"index": INDEX_NAME,
|
| 67 |
+
"knnBeta": {
|
| 68 |
+
"vector": query_vector,
|
| 69 |
+
"path": "embedding",
|
| 70 |
+
"k": k,
|
| 71 |
+
},
|
| 72 |
+
"filter": {"equals": {"path": "user_id", "value": user_id}},
|
| 73 |
+
}
|
| 74 |
+
},
|
| 75 |
+
{"$project": {"embedding": 0, "score": {"$meta": "searchScore"}, "doc": "$$ROOT"}},
|
| 76 |
+
]
|
| 77 |
+
if filenames:
|
| 78 |
+
pipeline.append({"$match": {"doc.filename": {"$in": filenames}}})
|
| 79 |
+
pipeline.append({"$limit": k})
|
| 80 |
+
hits = list(self.chunks.aggregate(pipeline))
|
| 81 |
+
return [{"doc": h["doc"], "score": h["score"]} for h in hits]
|
| 82 |
+
# Fallback: scan limited sample and compute cosine locally
|
| 83 |
+
else:
|
| 84 |
+
q = {"user_id": user_id}
|
| 85 |
+
# Apply filename filter if provided
|
| 86 |
+
if filenames:
|
| 87 |
+
q["filename"] = {"$in": filenames}
|
| 88 |
+
# Scan limited sample and compute cosine locally
|
| 89 |
+
sample = list(self.chunks.find(q).limit(max(2000, k*10)))
|
| 90 |
+
# If no sample, return empty list
|
| 91 |
+
if not sample:
|
| 92 |
+
return []
|
| 93 |
+
# Compute cosine similarity for each sample
|
| 94 |
+
qv = np.array(query_vector, dtype="float32")
|
| 95 |
+
scores = []
|
| 96 |
+
# Compute cosine similarity for each sample
|
| 97 |
+
for d in sample:
|
| 98 |
+
v = np.array(d.get("embedding", [0]*VECTOR_DIM), dtype="float32")
|
| 99 |
+
denom = (np.linalg.norm(qv) * np.linalg.norm(v)) or 1.0
|
| 100 |
+
sim = float(np.dot(qv, v) / denom)
|
| 101 |
+
scores.append((sim, d))
|
| 102 |
+
# Sort scores by cosine similarity in descending order
|
| 103 |
+
scores.sort(key=lambda x: x[0], reverse=True)
|
| 104 |
+
# Get top k sc ores
|
| 105 |
+
top = scores[:k]
|
| 106 |
+
# Log the results
|
| 107 |
+
logger.info(f"Vector search sample={len(sample)} returned top={len(top)}")
|
| 108 |
+
return [{"doc": d, "score": s} for (s, d) in top]
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def ensure_indexes(store: RAGStore):
|
| 112 |
+
# Basic text index for fallback keyword search (optional)
|
| 113 |
+
try:
|
| 114 |
+
store.chunks.create_index([("user_id", ASCENDING), ("filename", ASCENDING)])
|
| 115 |
+
store.chunks.create_index([("content", TEXT), ("topic_name", TEXT), ("summary", TEXT)], name="text_idx")
|
| 116 |
+
store.files.create_index([("user_id", ASCENDING), ("filename", ASCENDING)], unique=True)
|
| 117 |
+
except PyMongoError as e:
|
| 118 |
+
logger.warning(f"Index creation warning: {e}")
|
| 119 |
+
# Note: For Atlas Vector, create an Atlas Search index named INDEX_NAME on field "embedding" with vector options.
|
| 120 |
+
# Example (in Atlas UI):
|
| 121 |
+
# {
|
| 122 |
+
# "mappings": {
|
| 123 |
+
# "dynamic": false,
|
| 124 |
+
# "fields": {
|
| 125 |
+
# "embedding": {
|
| 126 |
+
# "type": "knnVector",
|
| 127 |
+
# "dimensions": 384,
|
| 128 |
+
# "similarity": "cosine"
|
| 129 |
+
# }
|
| 130 |
+
# }
|
| 131 |
+
# }
|
| 132 |
+
# }
|
utils/rotator.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── utils/rotator.py ──────────────────────────────
|
| 2 |
+
import os
|
| 3 |
+
import itertools
|
| 4 |
+
import logging
|
| 5 |
+
from .logger import get_logger
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
import httpx
|
| 9 |
+
|
| 10 |
+
logger = get_logger("ROTATOR", __name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class APIKeyRotator:
|
| 14 |
+
"""
|
| 15 |
+
Round-robin API key rotator.
|
| 16 |
+
- Loads keys from env vars with given prefix (e.g., GEMINI_API_1..5)
|
| 17 |
+
- get_key() returns current key
|
| 18 |
+
- rotate() moves to next key
|
| 19 |
+
- on HTTP 401/429/5xx you should call rotate() and retry (bounded)
|
| 20 |
+
"""
|
| 21 |
+
def __init__(self, prefix: str, max_slots: int = 5):
|
| 22 |
+
self.keys = []
|
| 23 |
+
for i in range(1, max_slots + 1):
|
| 24 |
+
v = os.getenv(f"{prefix}{i}")
|
| 25 |
+
if v:
|
| 26 |
+
self.keys.append(v.strip())
|
| 27 |
+
if not self.keys:
|
| 28 |
+
logger.warning(f"No API keys found for prefix {prefix}. Calls will likely fail.")
|
| 29 |
+
self._cycle = itertools.cycle([""])
|
| 30 |
+
else:
|
| 31 |
+
self._cycle = itertools.cycle(self.keys)
|
| 32 |
+
self.current = next(self._cycle)
|
| 33 |
+
|
| 34 |
+
def get_key(self) -> Optional[str]:
|
| 35 |
+
return self.current
|
| 36 |
+
|
| 37 |
+
def rotate(self) -> Optional[str]:
|
| 38 |
+
self.current = next(self._cycle)
|
| 39 |
+
logger.info("Rotated API key.")
|
| 40 |
+
return self.current
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
async def robust_post_json(url: str, headers: dict, payload: dict, rotator: APIKeyRotator, max_retries: int = 5):
|
| 44 |
+
"""
|
| 45 |
+
POST JSON with simple retry+rotate on 401/403/429/5xx.
|
| 46 |
+
Returns json response.
|
| 47 |
+
"""
|
| 48 |
+
for attempt in range(max_retries):
|
| 49 |
+
try:
|
| 50 |
+
async with httpx.AsyncClient(timeout=60) as client:
|
| 51 |
+
r = await client.post(url, headers=headers, json=payload)
|
| 52 |
+
if r.status_code in (401, 403, 429) or (500 <= r.status_code < 600):
|
| 53 |
+
logger.warning(f"HTTP {r.status_code} from provider. Rotating key and retrying ({attempt+1}/{max_retries})")
|
| 54 |
+
rotator.rotate()
|
| 55 |
+
continue
|
| 56 |
+
r.raise_for_status()
|
| 57 |
+
return r.json()
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.warning(f"Request error: {e}. Rotating and retrying ({attempt+1}/{max_retries})")
|
| 60 |
+
rotator.rotate()
|
| 61 |
+
raise RuntimeError("Provider request failed after retries.")
|
utils/router.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ────────────────────────────── utils/router.py ──────────────────────────────
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
from .logger import get_logger
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
from .rotator import robust_post_json, APIKeyRotator
|
| 7 |
+
|
| 8 |
+
logger = get_logger("ROUTER", __name__)
|
| 9 |
+
|
| 10 |
+
# Default model names (can be overridden via env)
|
| 11 |
+
GEMINI_SMALL = os.getenv("GEMINI_SMALL", "gemini-2.5-flash-lite")
|
| 12 |
+
GEMINI_MED = os.getenv("GEMINI_MED", "gemini-2.5-flash")
|
| 13 |
+
GEMINI_PRO = os.getenv("GEMINI_PRO", "gemini-2.5-pro")
|
| 14 |
+
|
| 15 |
+
# NVIDIA small default (can be override)
|
| 16 |
+
NVIDIA_SMALL = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct") # example; adjust to your NIM catalog
|
| 17 |
+
|
| 18 |
+
def select_model(question: str, context: str) -> Dict[str, Any]:
|
| 19 |
+
"""
|
| 20 |
+
Very lightweight complexity heuristic:
|
| 21 |
+
- If long question or lots of context -> MED/PRO
|
| 22 |
+
- If code/math keywords -> PRO
|
| 23 |
+
- Else SMALL
|
| 24 |
+
Prefers NVIDIA small when question is short/simple (cost-awareness).
|
| 25 |
+
"""
|
| 26 |
+
qlen = len(question.split())
|
| 27 |
+
clen = len(context.split())
|
| 28 |
+
hard_keywords = ("prove", "derivation", "complexity", "algorithm", "optimize", "theorem", "rigorous", "step-by-step", "policy critique", "ambiguity", "counterfactual")
|
| 29 |
+
is_hard = any(k in question.lower() for k in hard_keywords) or qlen > 60 or clen > 1600
|
| 30 |
+
|
| 31 |
+
if is_hard:
|
| 32 |
+
# Use Gemini Pro (larger context)
|
| 33 |
+
return {"provider": "gemini", "model": GEMINI_PRO}
|
| 34 |
+
elif qlen > 25 or clen > 900:
|
| 35 |
+
return {"provider": "gemini", "model": GEMINI_MED}
|
| 36 |
+
else:
|
| 37 |
+
# Prefer NVIDIA small for cheap/light
|
| 38 |
+
return {"provider": "nvidia", "model": NVIDIA_SMALL}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: str, user_prompt: str,
|
| 42 |
+
gemini_rotator: APIKeyRotator, nvidia_rotator: APIKeyRotator) -> str:
|
| 43 |
+
provider = selection["provider"]
|
| 44 |
+
model = selection["model"]
|
| 45 |
+
|
| 46 |
+
if provider == "gemini":
|
| 47 |
+
key = gemini_rotator.get_key() or ""
|
| 48 |
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={key}"
|
| 49 |
+
payload = {
|
| 50 |
+
"contents": [
|
| 51 |
+
{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{user_prompt}"}]}
|
| 52 |
+
],
|
| 53 |
+
"generationConfig": {"temperature": 0.2}
|
| 54 |
+
}
|
| 55 |
+
headers = {"Content-Type": "application/json"}
|
| 56 |
+
data = await robust_post_json(url, headers, payload, gemini_rotator)
|
| 57 |
+
try:
|
| 58 |
+
return data["candidates"][0]["content"]["parts"][0]["text"]
|
| 59 |
+
except Exception:
|
| 60 |
+
logger.warning(f"Unexpected Gemini response: {data}")
|
| 61 |
+
return "I couldn't parse the model response."
|
| 62 |
+
|
| 63 |
+
elif provider == "nvidia":
|
| 64 |
+
# Many NVIDIA endpoints are OpenAI-compatible. Adjust if using a different path.
|
| 65 |
+
key = nvidia_rotator.get_key() or ""
|
| 66 |
+
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 67 |
+
payload = {
|
| 68 |
+
"model": model,
|
| 69 |
+
"temperature": 0.2,
|
| 70 |
+
"messages": [
|
| 71 |
+
{"role": "system", "content": system_prompt},
|
| 72 |
+
{"role": "user", "content": user_prompt},
|
| 73 |
+
]
|
| 74 |
+
}
|
| 75 |
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
|
| 76 |
+
data = await robust_post_json(url, headers, payload, nvidia_rotator)
|
| 77 |
+
try:
|
| 78 |
+
return data["choices"][0]["message"]["content"]
|
| 79 |
+
except Exception:
|
| 80 |
+
logger.warning(f"Unexpected NVIDIA response: {data}")
|
| 81 |
+
return "I couldn't parse the model response."
|
| 82 |
+
|
| 83 |
+
return "Unsupported provider."
|
utils/summarizer.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from sumy.parsers.plaintext import PlaintextParser
|
| 3 |
+
from sumy.nlp.tokenizers import Tokenizer
|
| 4 |
+
from sumy.summarizers.lex_rank import LexRankSummarizer
|
| 5 |
+
from .logger import get_logger
|
| 6 |
+
|
| 7 |
+
logger = get_logger("SUM", __name__)
|
| 8 |
+
|
| 9 |
+
def cheap_summarize(text: str, max_sentences: int = 3) -> str:
|
| 10 |
+
try:
|
| 11 |
+
parser = PlaintextParser.from_string(text, Tokenizer("english"))
|
| 12 |
+
summarizer = LexRankSummarizer()
|
| 13 |
+
sentences = summarizer(parser.document, max_sentences)
|
| 14 |
+
return " ".join(str(s) for s in sentences)
|
| 15 |
+
except Exception:
|
| 16 |
+
# Fallback: naive first N sentences
|
| 17 |
+
logger.warning("sumy unavailable or failed; using naive summarization fallback.")
|
| 18 |
+
parts = text.split(". ")
|
| 19 |
+
return ". ".join(parts[:max_sentences])
|
warmup.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
import torch
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
print("🚀 Warming up model...")
|
| 6 |
+
embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
|
| 7 |
+
|
| 8 |
+
# Some CPU backends on HF Spaces fail on .half(); make it configurable
|
| 9 |
+
USE_HALF = os.getenv("EMBEDDING_HALF", "1") == "1"
|
| 10 |
+
try:
|
| 11 |
+
if USE_HALF and torch.cuda.is_available():
|
| 12 |
+
embedding_model = embedding_model.half()
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(f"⚠️ Skipping half precision due to: {e}")
|
| 15 |
+
|
| 16 |
+
embedding_model.to(torch.device("cpu"))
|
| 17 |
+
print("✅ Model warm-up complete!")
|