Spaces:
Sleeping
Sleeping
Commit
·
cf16f9f
1
Parent(s):
58a3663
Move to remote embedder API
Browse files- .DS_Store +0 -0
- .dockerignore +1 -0
- .gitignore +1 -0
- Dockerfile +4 -17
- dw_model.py +0 -30
- requirements.txt +0 -1
- utils/rag/embeddings.py +22 -18
- warmup.py +0 -17
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
.dockerignore
CHANGED
|
@@ -25,6 +25,7 @@ coverage.xml
|
|
| 25 |
*.md
|
| 26 |
!README.md
|
| 27 |
ingestion_pipeline
|
|
|
|
| 28 |
exefiles
|
| 29 |
diagram
|
| 30 |
diagram.svg
|
|
|
|
| 25 |
*.md
|
| 26 |
!README.md
|
| 27 |
ingestion_pipeline
|
| 28 |
+
embedder
|
| 29 |
exefiles
|
| 30 |
diagram
|
| 31 |
diagram.svg
|
.gitignore
CHANGED
|
@@ -3,4 +3,5 @@ diagram
|
|
| 3 |
|
| 4 |
# For ingestion pipeline
|
| 5 |
ingestion_pipeline
|
|
|
|
| 6 |
exefiles
|
|
|
|
| 3 |
|
| 4 |
# For ingestion pipeline
|
| 5 |
ingestion_pipeline
|
| 6 |
+
embedder
|
| 7 |
exefiles
|
Dockerfile
CHANGED
|
@@ -23,25 +23,12 @@ COPY . .
|
|
| 23 |
# Install Python dependencies
|
| 24 |
RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
|
| 25 |
|
| 26 |
-
#
|
| 27 |
ENV HF_HOME="/home/user/.cache/huggingface"
|
| 28 |
-
ENV SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/huggingface/sentence-transformers"
|
| 29 |
-
ENV MEDGEMMA_HOME="/home/user/.cache/huggingface/sentence-transformers"
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
RUN mkdir -p /
|
| 33 |
-
chown -R user:user /
|
| 34 |
-
|
| 35 |
-
# Control preloading flags
|
| 36 |
-
ENV PRELOAD_TRANSLATORS="0"
|
| 37 |
-
ENV EMBEDDING_HALF="0"
|
| 38 |
-
|
| 39 |
-
# Preload embedding model and warmup
|
| 40 |
-
RUN test -f /app/dw_model.py && python /app/dw_model.py || true
|
| 41 |
-
RUN test -f /app/warmup.py && python /app/warmup.py || true
|
| 42 |
-
|
| 43 |
-
# Ensure ownership stays correct
|
| 44 |
-
RUN chown -R user:user /app/model_cache
|
| 45 |
|
| 46 |
# Expose port for HF Spaces
|
| 47 |
ENV PORT=7860
|
|
|
|
| 23 |
# Install Python dependencies
|
| 24 |
RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
|
| 25 |
|
| 26 |
+
# Optional: general HF cache directory (kept for other models like BLIP)
|
| 27 |
ENV HF_HOME="/home/user/.cache/huggingface"
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
# Ensure cache directory ownership
|
| 30 |
+
RUN mkdir -p /home/user/.cache/huggingface && \
|
| 31 |
+
chown -R user:user /home/user/.cache/huggingface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Expose port for HF Spaces
|
| 34 |
ENV PORT=7860
|
dw_model.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
# dw_model.py
|
| 2 |
-
### --- A. transformer and embedder ---
|
| 3 |
-
import os
|
| 4 |
-
import shutil
|
| 5 |
-
from huggingface_hub import snapshot_download
|
| 6 |
-
|
| 7 |
-
# Set up paths
|
| 8 |
-
MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
|
| 9 |
-
MODEL_CACHE_DIR = "/app/model_cache"
|
| 10 |
-
HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")
|
| 11 |
-
|
| 12 |
-
print("⏳ Downloading the SentenceTransformer model...")
|
| 13 |
-
# Download directly into /app/model_cache to avoid duplicating files from HF cache
|
| 14 |
-
model_path = snapshot_download(
|
| 15 |
-
repo_id=MODEL_REPO,
|
| 16 |
-
cache_dir=HF_CACHE_DIR, # Store HF cache in user cache dir
|
| 17 |
-
local_dir=MODEL_CACHE_DIR, # Place usable model here
|
| 18 |
-
local_dir_use_symlinks=False # Copy files into local_dir (no symlinks)
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
print("Model path: ", model_path)
|
| 22 |
-
if not os.path.exists(MODEL_CACHE_DIR):
|
| 23 |
-
os.makedirs(MODEL_CACHE_DIR)
|
| 24 |
-
|
| 25 |
-
# Verify structure after moving
|
| 26 |
-
print("\n📂 LLM Model Structure (Build Level):")
|
| 27 |
-
for root, dirs, files in os.walk(MODEL_CACHE_DIR):
|
| 28 |
-
print(f"📁 {root}/")
|
| 29 |
-
for file in files:
|
| 30 |
-
print(f" 📄 {file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -8,7 +8,6 @@ PyMuPDF==1.24.10
|
|
| 8 |
pillow==10.4.0
|
| 9 |
transformers==4.44.2
|
| 10 |
torch==2.2.2
|
| 11 |
-
sentence-transformers==3.1.1
|
| 12 |
sumy==0.11.0
|
| 13 |
numpy==1.26.4
|
| 14 |
reportlab==4.0.9
|
|
|
|
| 8 |
pillow==10.4.0
|
| 9 |
transformers==4.44.2
|
| 10 |
torch==2.2.2
|
|
|
|
| 11 |
sumy==0.11.0
|
| 12 |
numpy==1.26.4
|
| 13 |
reportlab==4.0.9
|
utils/rag/embeddings.py
CHANGED
|
@@ -2,32 +2,36 @@
|
|
| 2 |
import os
|
| 3 |
from typing import List
|
| 4 |
import numpy as np
|
|
|
|
| 5 |
from ..logger import get_logger
|
| 6 |
|
| 7 |
-
try:
|
| 8 |
-
from sentence_transformers import SentenceTransformer
|
| 9 |
-
except Exception:
|
| 10 |
-
SentenceTransformer = None
|
| 11 |
-
|
| 12 |
|
| 13 |
logger = get_logger("EMBED", __name__)
|
| 14 |
|
| 15 |
|
| 16 |
class EmbeddingClient:
|
| 17 |
-
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
|
| 18 |
self.model_name = model_name
|
| 19 |
-
self.
|
| 20 |
-
|
| 21 |
-
def _lazy(self):
|
| 22 |
-
if self.model is None and SentenceTransformer is not None:
|
| 23 |
-
logger.info(f"Loading embedding model: {self.model_name}")
|
| 24 |
-
self.model = SentenceTransformer(self.model_name)
|
| 25 |
|
| 26 |
def embed(self, texts: List[str]) -> List[list]:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
| 31 |
return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
from typing import List
|
| 4 |
import numpy as np
|
| 5 |
+
import httpx
|
| 6 |
from ..logger import get_logger
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
logger = get_logger("EMBED", __name__)
|
| 10 |
|
| 11 |
|
| 12 |
class EmbeddingClient:
|
| 13 |
+
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", api_url: str | None = None):
|
| 14 |
self.model_name = model_name
|
| 15 |
+
self.api_url = api_url or os.getenv("EMBEDDER_URL")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
def embed(self, texts: List[str]) -> List[list]:
|
| 18 |
+
if not texts:
|
| 19 |
+
return []
|
| 20 |
+
|
| 21 |
+
if not self.api_url:
|
| 22 |
+
logger.warning("EMBEDDER_URL not set; using random fallback embeddings.")
|
| 23 |
return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
|
| 24 |
+
|
| 25 |
+
url = self.api_url.rstrip("/") + "/embed"
|
| 26 |
+
try:
|
| 27 |
+
with httpx.Client(timeout=30.0) as client:
|
| 28 |
+
resp = client.post(url, json={"texts": texts})
|
| 29 |
+
resp.raise_for_status()
|
| 30 |
+
data = resp.json()
|
| 31 |
+
vectors = data.get("vectors")
|
| 32 |
+
if not isinstance(vectors, list):
|
| 33 |
+
raise ValueError("Invalid response: 'vectors' field missing or not a list")
|
| 34 |
+
return vectors
|
| 35 |
+
except Exception as e:
|
| 36 |
+
logger.error(f"Embedding API call failed: {e}; falling back to random embeddings.")
|
| 37 |
+
return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
|
warmup.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
from sentence_transformers import SentenceTransformer
|
| 2 |
-
import torch
|
| 3 |
-
import os
|
| 4 |
-
|
| 5 |
-
print("🚀 Warming up model...")
|
| 6 |
-
embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
|
| 7 |
-
|
| 8 |
-
# Some CPU backends on HF Spaces fail on .half(); make it configurable
|
| 9 |
-
USE_HALF = os.getenv("EMBEDDING_HALF", "1") == "1"
|
| 10 |
-
try:
|
| 11 |
-
if USE_HALF and torch.cuda.is_available():
|
| 12 |
-
embedding_model = embedding_model.half()
|
| 13 |
-
except Exception as e:
|
| 14 |
-
print(f"⚠️ Skipping half precision due to: {e}")
|
| 15 |
-
|
| 16 |
-
embedding_model.to(torch.device("cpu"))
|
| 17 |
-
print("✅ Model warm-up complete!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|