Spaces:
Sleeping
Sleeping
Commit
·
d7a821e
1
Parent(s):
61b8083
new changes made
Browse files- app.py +13 -4
- pipelines.py +30 -15
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -29,10 +29,14 @@ app.add_middleware(
|
|
| 29 |
|
| 30 |
@app.post("/upload")
|
| 31 |
async def upload_files(
|
| 32 |
-
session_id: str = Form(
|
| 33 |
files: List[UploadFile] = File(...)
|
| 34 |
):
|
| 35 |
-
"""Upload and process files"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
if not files:
|
| 37 |
raise HTTPException(400, detail="No files uploaded")
|
| 38 |
|
|
@@ -42,11 +46,16 @@ async def upload_files(
|
|
| 42 |
content = await f.read()
|
| 43 |
pairs.append((f.filename or "unnamed", content))
|
| 44 |
except Exception as e:
|
| 45 |
-
logger.error(f"Failed to
|
| 46 |
continue
|
| 47 |
|
| 48 |
added = ingestion.ingest_files(session_id, pairs)
|
| 49 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
@app.post("/query")
|
| 52 |
async def query(
|
|
|
|
| 29 |
|
| 30 |
@app.post("/upload")
|
| 31 |
async def upload_files(
|
| 32 |
+
session_id: str = Form(default=""), # Allow empty string
|
| 33 |
files: List[UploadFile] = File(...)
|
| 34 |
):
|
| 35 |
+
"""Upload and process files with automatic session management"""
|
| 36 |
+
# Generate session ID if not provided
|
| 37 |
+
if not session_id.strip():
|
| 38 |
+
session_id = str(uuid.uuid4())
|
| 39 |
+
|
| 40 |
if not files:
|
| 41 |
raise HTTPException(400, detail="No files uploaded")
|
| 42 |
|
|
|
|
| 46 |
content = await f.read()
|
| 47 |
pairs.append((f.filename or "unnamed", content))
|
| 48 |
except Exception as e:
|
| 49 |
+
logger.error(f"Failed to process {f.filename}: {str(e)}")
|
| 50 |
continue
|
| 51 |
|
| 52 |
added = ingestion.ingest_files(session_id, pairs)
|
| 53 |
+
return {
|
| 54 |
+
"status": "success",
|
| 55 |
+
"session_id": session_id,
|
| 56 |
+
"documents_added": added,
|
| 57 |
+
"message": f"Use this session_id for queries: {session_id}"
|
| 58 |
+
}
|
| 59 |
|
| 60 |
@app.post("/query")
|
| 61 |
async def query(
|
pipelines.py
CHANGED
|
@@ -3,15 +3,16 @@ os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False"
|
|
| 3 |
os.environ["HF_HOME"] = "/app/cache"
|
| 4 |
os.environ["HUGGINGFACE_HUB_CACHE"] = "/app/cache"
|
| 5 |
import logging
|
| 6 |
-
from haystack.utils import Secret
|
| 7 |
import time
|
|
|
|
| 8 |
from haystack.dataclasses import Document
|
| 9 |
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
| 10 |
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
| 11 |
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
| 12 |
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
| 13 |
-
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
|
| 14 |
from haystack.components.preprocessors import DocumentSplitter
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Set up logging
|
| 17 |
logger = logging.getLogger(__name__)
|
|
@@ -27,7 +28,7 @@ doc_embedder = SentenceTransformersDocumentEmbedder(
|
|
| 27 |
text_embedder = SentenceTransformersTextEmbedder(
|
| 28 |
model="BAAI/bge-base-en-v1.5"
|
| 29 |
)
|
| 30 |
-
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=3)
|
| 31 |
|
| 32 |
def initialize_ranker():
|
| 33 |
attempts = 0
|
|
@@ -59,16 +60,30 @@ except Exception as e:
|
|
| 59 |
return {"documents": documents[:3]}
|
| 60 |
reranker = DummyRanker()
|
| 61 |
|
| 62 |
-
#
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
)
|
| 73 |
|
| 74 |
splitter = DocumentSplitter(
|
|
@@ -167,8 +182,8 @@ def query_rag(question: str, session_id: str):
|
|
| 167 |
prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
|
| 168 |
|
| 169 |
# Handle generator response safely
|
| 170 |
-
response = generator.
|
| 171 |
-
answer = response.
|
| 172 |
|
| 173 |
# Format sources
|
| 174 |
sources = [
|
|
|
|
| 3 |
os.environ["HF_HOME"] = "/app/cache"
|
| 4 |
os.environ["HUGGINGFACE_HUB_CACHE"] = "/app/cache"
|
| 5 |
import logging
|
|
|
|
| 6 |
import time
|
| 7 |
+
from haystack.utils import Secret
|
| 8 |
from haystack.dataclasses import Document
|
| 9 |
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
| 10 |
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
| 11 |
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
| 12 |
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
|
|
|
| 13 |
from haystack.components.preprocessors import DocumentSplitter
|
| 14 |
+
import google.generativeai as genai
|
| 15 |
+
from google.generativeai import types
|
| 16 |
|
| 17 |
# Set up logging
|
| 18 |
logger = logging.getLogger(__name__)
|
|
|
|
| 28 |
text_embedder = SentenceTransformersTextEmbedder(
|
| 29 |
model="BAAI/bge-base-en-v1.5"
|
| 30 |
)
|
| 31 |
+
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=3)
|
| 32 |
|
| 33 |
def initialize_ranker():
|
| 34 |
attempts = 0
|
|
|
|
| 60 |
return {"documents": documents[:3]}
|
| 61 |
reranker = DummyRanker()
|
| 62 |
|
| 63 |
+
# Configure GenAI SDK
|
| 64 |
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
| 65 |
+
|
| 66 |
+
# Initialize generator with safety settings
|
| 67 |
+
generator = genai.GenerativeModel(
|
| 68 |
+
model_name="gemini-2.0-flash",
|
| 69 |
+
safety_settings=[
|
| 70 |
+
types.SafetySetting(
|
| 71 |
+
category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
| 72 |
+
threshold=types.HarmBlockThreshold.BLOCK_NONE
|
| 73 |
+
),
|
| 74 |
+
types.SafetySetting(
|
| 75 |
+
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
| 76 |
+
threshold=types.HarmBlockThreshold.BLOCK_NONE
|
| 77 |
+
),
|
| 78 |
+
types.SafetySetting(
|
| 79 |
+
category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
| 80 |
+
threshold=types.HarmBlockThreshold.BLOCK_NONE
|
| 81 |
+
),
|
| 82 |
+
types.SafetySetting(
|
| 83 |
+
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
| 84 |
+
threshold=types.HarmBlockThreshold.BLOCK_NONE
|
| 85 |
+
)
|
| 86 |
+
]
|
| 87 |
)
|
| 88 |
|
| 89 |
splitter = DocumentSplitter(
|
|
|
|
| 182 |
prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
|
| 183 |
|
| 184 |
# Handle generator response safely
|
| 185 |
+
response = generator.generate_content(prompt)
|
| 186 |
+
answer = response.text if response and hasattr(response, 'text') else "No response generated"
|
| 187 |
|
| 188 |
# Format sources
|
| 189 |
sources = [
|
requirements.txt
CHANGED
|
@@ -11,3 +11,5 @@ python-dotenv
|
|
| 11 |
haystack-ai
|
| 12 |
google-ai-haystack
|
| 13 |
sentence-transformers
|
|
|
|
|
|
|
|
|
| 11 |
haystack-ai
|
| 12 |
google-ai-haystack
|
| 13 |
sentence-transformers
|
| 14 |
+
|
| 15 |
+
google-generativeai
|