Seif-aber commited on
Commit
355fe19
·
1 Parent(s): c77e641

document q&a assistant with Gemini & RAG

Browse files
Files changed (3) hide show
  1. app.py +50 -0
  2. requirements.txt +7 -0
  3. utils.py +64 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from utils import load_data, get_gemini_embedding
4
+
5
+
6
+ def process_document(doc, question):
7
+ """Process document and return response to question."""
8
+ temp_path = os.path.join("data", doc.name)
9
+ try:
10
+ with open(temp_path, "wb") as f:
11
+ f.write(doc.getbuffer())
12
+ documents = load_data("data")
13
+ query_engine = get_gemini_embedding(documents)
14
+ return query_engine.query(question)
15
+ finally:
16
+ if os.path.exists(temp_path):
17
+ os.remove(temp_path)
18
+
19
+
20
+ def main():
21
+ st.set_page_config(page_title="Document Q&A Assistant")
22
+ st.title("Smart Document Question-Answering")
23
+
24
+ # Create data directory if not exists
25
+ os.makedirs("data", exist_ok=True)
26
+
27
+ doc = st.file_uploader(
28
+ "Upload your document (PDF, CSV, or TXT)", type=["pdf", "csv", "txt"]
29
+ )
30
+
31
+ question = st.text_input(
32
+ "What would you like to know about your document?",
33
+ placeholder="Enter your question here...",
34
+ )
35
+
36
+ if st.button("Get Answer"):
37
+ if not doc:
38
+ st.error("Please upload a document first.")
39
+ return
40
+ if not question:
41
+ st.error("Please enter a question.")
42
+ return
43
+
44
+ with st.spinner("Analyzing your document..."):
45
+ response = process_document(doc, question)
46
+ st.write(response.response)
47
+
48
+
49
+ if __name__ == "__main__":
50
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ llama-index
2
+ google-generativeai
3
+ llama-index-llms-gemini
4
+ pypdf
5
+ python-dotenv
6
+ llama-index-embeddings-gemini
7
+ streamlit
utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
2
+ from llama_index.core.node_parser import SentenceSplitter
3
+ from llama_index.embeddings.gemini import GeminiEmbedding
4
+ from llama_index.llms.gemini import Gemini
5
+ import logging
6
+ import os
7
+
8
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
9
+
10
+ # Configure logging
11
+ logger = logging.getLogger(__name__)
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+ def load_data(data_path: str) -> list[str]:
15
+ """
16
+ Load documents from a directory.
17
+
18
+ Args:
19
+ data_path (str): Path to the directory containing documents
20
+
21
+ Returns:
22
+ list[str]: List of loaded documents or False if loading fails
23
+ """
24
+ try:
25
+ logger.info(f"Loading documents from {data_path}")
26
+ loader = SimpleDirectoryReader(data_path)
27
+ documents = loader.load_data()
28
+ logger.info(f"Successfully loaded {len(documents)} documents")
29
+ return documents
30
+ except Exception as e:
31
+ logger.error(f"Failed to load data: {str(e)}")
32
+ return False
33
+
34
+ def get_gemini_embedding(documents: str):
35
+ """
36
+ Create a query engine using Gemini embeddings.
37
+
38
+ Args:
39
+ documents (str): Documents to process
40
+
41
+ Returns:
42
+ QueryEngine: Configured query engine or False if setup fails
43
+ """
44
+ try:
45
+ logger.info("Initializing Gemini embedding model and LLM")
46
+ gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001")
47
+ llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY)
48
+
49
+ # Configure global settings
50
+ Settings.llm = llm
51
+ Settings.embed_model = gemini_embedding_model
52
+ Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20)
53
+
54
+ logger.info("Creating vector store index")
55
+ index = VectorStoreIndex.from_documents(
56
+ documents=documents,
57
+ embed_model=gemini_embedding_model
58
+ )
59
+
60
+ logger.info("Creating query engine")
61
+ return index.as_query_engine()
62
+ except Exception as e:
63
+ logger.error(f"Failed to setup Gemini embedding: {str(e)}")
64
+ return False