HarshKalia-24 commited on
Commit
d7a821e
·
1 Parent(s): 61b8083

new changes made

Browse files
Files changed (3) hide show
  1. app.py +13 -4
  2. pipelines.py +30 -15
  3. requirements.txt +2 -0
app.py CHANGED
@@ -29,10 +29,14 @@ app.add_middleware(
29
 
30
  @app.post("/upload")
31
  async def upload_files(
32
- session_id: str = Form(default_factory=lambda: str(uuid.uuid4())),
33
  files: List[UploadFile] = File(...)
34
  ):
35
- """Upload and process files"""
 
 
 
 
36
  if not files:
37
  raise HTTPException(400, detail="No files uploaded")
38
 
@@ -42,11 +46,16 @@ async def upload_files(
42
  content = await f.read()
43
  pairs.append((f.filename or "unnamed", content))
44
  except Exception as e:
45
- logger.error(f"Failed to read {f.filename}: {e}")
46
  continue
47
 
48
  added = ingestion.ingest_files(session_id, pairs)
49
- return {"status": "success", "documents_added": added, "session_id": session_id}
 
 
 
 
 
50
 
51
  @app.post("/query")
52
  async def query(
 
29
 
30
  @app.post("/upload")
31
  async def upload_files(
32
+ session_id: str = Form(default=""), # Allow empty string
33
  files: List[UploadFile] = File(...)
34
  ):
35
+ """Upload and process files with automatic session management"""
36
+ # Generate session ID if not provided
37
+ if not session_id.strip():
38
+ session_id = str(uuid.uuid4())
39
+
40
  if not files:
41
  raise HTTPException(400, detail="No files uploaded")
42
 
 
46
  content = await f.read()
47
  pairs.append((f.filename or "unnamed", content))
48
  except Exception as e:
49
+ logger.error(f"Failed to process {f.filename}: {str(e)}")
50
  continue
51
 
52
  added = ingestion.ingest_files(session_id, pairs)
53
+ return {
54
+ "status": "success",
55
+ "session_id": session_id,
56
+ "documents_added": added,
57
+ "message": f"Use this session_id for queries: {session_id}"
58
+ }
59
 
60
  @app.post("/query")
61
  async def query(
pipelines.py CHANGED
@@ -3,15 +3,16 @@ os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False"
3
  os.environ["HF_HOME"] = "/app/cache"
4
  os.environ["HUGGINGFACE_HUB_CACHE"] = "/app/cache"
5
  import logging
6
- from haystack.utils import Secret
7
  import time
 
8
  from haystack.dataclasses import Document
9
  from haystack.document_stores.in_memory import InMemoryDocumentStore
10
  from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
11
  from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
12
  from haystack.components.rankers import SentenceTransformersSimilarityRanker
13
- from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
14
  from haystack.components.preprocessors import DocumentSplitter
 
 
15
 
16
  # Set up logging
17
  logger = logging.getLogger(__name__)
@@ -27,7 +28,7 @@ doc_embedder = SentenceTransformersDocumentEmbedder(
27
  text_embedder = SentenceTransformersTextEmbedder(
28
  model="BAAI/bge-base-en-v1.5"
29
  )
30
- retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=3) # Reduce top_k
31
 
32
  def initialize_ranker():
33
  attempts = 0
@@ -59,16 +60,30 @@ except Exception as e:
59
  return {"documents": documents[:3]}
60
  reranker = DummyRanker()
61
 
62
- # Initialize generator
63
- generator = GoogleAIGeminiGenerator(
64
- api_key=Secret.from_env_var("GOOGLE_API_KEY"),
65
- model="gemini-1.5-flash-latest",
66
- safety_settings={
67
- "HARASSMENT": "BLOCK_NONE",
68
- "HATE_SPEECH": "BLOCK_NONE",
69
- "SEXUAL": "BLOCK_NONE",
70
- "DANGEROUS": "BLOCK_NONE"
71
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
73
 
74
  splitter = DocumentSplitter(
@@ -167,8 +182,8 @@ def query_rag(question: str, session_id: str):
167
  prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
168
 
169
  # Handle generator response safely
170
- response = generator.run(parts=[prompt])
171
- answer = response.get("replies", [""])[0] if response and response.get("replies") else "No response generated"
172
 
173
  # Format sources
174
  sources = [
 
3
  os.environ["HF_HOME"] = "/app/cache"
4
  os.environ["HUGGINGFACE_HUB_CACHE"] = "/app/cache"
5
  import logging
 
6
  import time
7
+ from haystack.utils import Secret
8
  from haystack.dataclasses import Document
9
  from haystack.document_stores.in_memory import InMemoryDocumentStore
10
  from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
11
  from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
12
  from haystack.components.rankers import SentenceTransformersSimilarityRanker
 
13
  from haystack.components.preprocessors import DocumentSplitter
14
+ import google.generativeai as genai
15
+ from google.generativeai import types
16
 
17
  # Set up logging
18
  logger = logging.getLogger(__name__)
 
28
  text_embedder = SentenceTransformersTextEmbedder(
29
  model="BAAI/bge-base-en-v1.5"
30
  )
31
+ retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=3)
32
 
33
  def initialize_ranker():
34
  attempts = 0
 
60
  return {"documents": documents[:3]}
61
  reranker = DummyRanker()
62
 
63
+ # Configure GenAI SDK
64
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
65
+
66
+ # Initialize generator with safety settings
67
+ generator = genai.GenerativeModel(
68
+ model_name="gemini-2.0-flash",
69
+ safety_settings=[
70
+ types.SafetySetting(
71
+ category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
72
+ threshold=types.HarmBlockThreshold.BLOCK_NONE
73
+ ),
74
+ types.SafetySetting(
75
+ category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
76
+ threshold=types.HarmBlockThreshold.BLOCK_NONE
77
+ ),
78
+ types.SafetySetting(
79
+ category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
80
+ threshold=types.HarmBlockThreshold.BLOCK_NONE
81
+ ),
82
+ types.SafetySetting(
83
+ category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
84
+ threshold=types.HarmBlockThreshold.BLOCK_NONE
85
+ )
86
+ ]
87
  )
88
 
89
  splitter = DocumentSplitter(
 
182
  prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
183
 
184
  # Handle generator response safely
185
+ response = generator.generate_content(prompt)
186
+ answer = response.text if response and hasattr(response, 'text') else "No response generated"
187
 
188
  # Format sources
189
  sources = [
requirements.txt CHANGED
@@ -11,3 +11,5 @@ python-dotenv
11
  haystack-ai
12
  google-ai-haystack
13
  sentence-transformers
 
 
 
11
  haystack-ai
12
  google-ai-haystack
13
  sentence-transformers
14
+
15
+ google-generativeai