Spaces:
Sleeping
Sleeping
Commit
·
11bb7a3
1
Parent(s):
ae9bdd2
fix: added haystack-ai-integrations in requirements
Browse files- .gitignore +0 -0
- Dockerfile +6 -10
- pipelines.py +4 -5
- requirements.txt +7 -4
.gitignore
CHANGED
|
Binary files a/.gitignore and b/.gitignore differ
|
|
|
Dockerfile
CHANGED
|
@@ -1,25 +1,21 @@
|
|
| 1 |
-
FROM python:3.11
|
| 2 |
|
| 3 |
-
# Install system dependencies
|
| 4 |
RUN apt-get update && \
|
| 5 |
apt-get install -y \
|
| 6 |
tesseract-ocr libtesseract-dev poppler-utils \
|
|
|
|
| 7 |
build-essential pkg-config libgl1 && \
|
|
|
|
| 8 |
rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
-
# Install PyTorch CPU first
|
| 13 |
-
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
|
| 14 |
-
|
| 15 |
-
# Install project dependencies
|
| 16 |
COPY requirements.txt .
|
| 17 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
|
| 19 |
-
# Copy app code
|
| 20 |
COPY . .
|
| 21 |
|
| 22 |
-
ENV PYTHONUNBUFFERED=1
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT}"]
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
|
|
|
|
| 3 |
RUN apt-get update && \
|
| 4 |
apt-get install -y \
|
| 5 |
tesseract-ocr libtesseract-dev poppler-utils \
|
| 6 |
+
tesseract-ocr-eng tesseract-ocr-fra \
|
| 7 |
build-essential pkg-config libgl1 && \
|
| 8 |
+
apt-get clean && \
|
| 9 |
rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
WORKDIR /app
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
COPY requirements.txt .
|
| 14 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
|
|
|
|
| 16 |
COPY . .
|
| 17 |
|
| 18 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 19 |
+
PORT=8000
|
| 20 |
|
| 21 |
+
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT}"]
|
|
|
pipelines.py
CHANGED
|
@@ -5,9 +5,9 @@ from haystack.dataclasses import Document
|
|
| 5 |
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
| 6 |
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
| 7 |
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
| 8 |
-
|
| 9 |
-
from haystack_integrations.components.
|
| 10 |
-
from
|
| 11 |
from haystack.components.preprocessors import DocumentSplitter
|
| 12 |
|
| 13 |
# Set up logging
|
|
@@ -19,8 +19,7 @@ document_store = InMemoryDocumentStore()
|
|
| 19 |
doc_embedder = SentenceTransformersDocumentEmbedder(model="BAAI/bge-large-en-v1.5")
|
| 20 |
text_embedder = SentenceTransformersTextEmbedder(model="BAAI/bge-large-en-v1.5")
|
| 21 |
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=5)
|
| 22 |
-
|
| 23 |
-
ranker = SentenceTransformersRanker(model="sentence-transformers/all-MiniLM-L6-v2")
|
| 24 |
|
| 25 |
# Initialize generator
|
| 26 |
generator = GoogleAIGeminiGenerator(
|
|
|
|
| 5 |
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
| 6 |
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
| 7 |
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
| 8 |
+
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
| 9 |
+
# from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
|
| 10 |
+
from haystack_google_generative_ai.generators import GoogleAIGeminiGenerator
|
| 11 |
from haystack.components.preprocessors import DocumentSplitter
|
| 12 |
|
| 13 |
# Set up logging
|
|
|
|
| 19 |
doc_embedder = SentenceTransformersDocumentEmbedder(model="BAAI/bge-large-en-v1.5")
|
| 20 |
text_embedder = SentenceTransformersTextEmbedder(model="BAAI/bge-large-en-v1.5")
|
| 21 |
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=5)
|
| 22 |
+
reranker = SentenceTransformersSimilarityRanker(model="cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
|
|
| 23 |
|
| 24 |
# Initialize generator
|
| 25 |
generator = GoogleAIGeminiGenerator(
|
requirements.txt
CHANGED
|
@@ -5,12 +5,15 @@ python-multipart==0.0.9
|
|
| 5 |
pillow==10.3.0
|
| 6 |
pdfplumber==0.11.0
|
| 7 |
pytesseract==0.3.10
|
| 8 |
-
|
| 9 |
-
# Sentence Transformers (explicit, though Haystack pulls it too)
|
| 10 |
sentence-transformers==3.0.1
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# gRPC (needed by Google / Haystack)
|
| 16 |
grpcio
|
|
|
|
| 5 |
pillow==10.3.0
|
| 6 |
pdfplumber==0.11.0
|
| 7 |
pytesseract==0.3.10
|
|
|
|
|
|
|
| 8 |
sentence-transformers==3.0.1
|
| 9 |
|
| 10 |
+
# Google Generative AI
|
| 11 |
+
google-generativeai==0.7.2
|
| 12 |
+
|
| 13 |
+
# Haystack v2 core + integrations
|
| 14 |
+
haystack-ai==2.1.0
|
| 15 |
+
haystack-google-generative-ai # Required for Gemini integration
|
| 16 |
+
google-generativeai==0.7.2 # Ensure compatibility
|
| 17 |
|
| 18 |
# gRPC (needed by Google / Haystack)
|
| 19 |
grpcio
|