Spaces:

maaz77
/

pdf_query

Sleeping

App Files Files Community

maaz77 commited on May 16

Commit

a36ad88

•

1 Parent(s): acbfde3

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -16

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import streamlit as st
 import pdfplumber
 from sentence_transformers import SentenceTransformer
-from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 from llama_index.llms.huggingface import HuggingFaceLLM as LlamaHuggingFaceLLM
 from llama_index.core.prompts.prompts import SimpleInputPrompt
 from llama_index.legacy.embeddings.langchain import LangchainEmbedding
@@ -9,7 +10,7 @@ import torch
 # Setup for caching the index and LLM to avoid reloading
 @st.cache(allow_output_mutation=True, suppress_st_warning=True)
-def setup_llama_index():
     # Define and configure the embedding model
     embed_model = LangchainEmbedding(SentenceTransformer('sentence-transformers/all-mpnet-base-v2'))
@@ -26,8 +27,7 @@ def setup_llama_index():
         model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
     )
-    # Load documents and create the index
-    documents = SimpleDirectoryReader('/content/data').load_data()  # Assuming document data is in this directory
     service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llama_llm, embed_model=embed_model)
     index = VectorStoreIndex.from_documents(documents, service_context=service_context)
     return index.as_query_engine()
@@ -45,25 +45,26 @@ def extract_text_from_pdf(file):
 def main():
     st.title('PDF Reader and Question Answering with RAG-like Model')
-    # Load the query engine only once
-    query_engine = setup_llama_index()
     uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
     if uploaded_file is not None:
         document_text = extract_text_from_pdf(uploaded_file)
         if document_text:
             st.text_area("Extracted Text", document_text, height=300)
         else:
             st.error("No text could be extracted from the PDF. Please check the file and try again.")
-        question = st.text_input("Ask a question based on the PDF")
-        if st.button("Get Answer"):
-            if question:
-                # Simulate RAG-like query using the index and LLM
-                response = query_engine.query(question)
-                st.text_area("Answer", response, height=150)
-            else:
-                st.error("Please enter a question to get an answer.")
 if __name__ == "__main__":
     main()

+import os
 import streamlit as st
 import pdfplumber
 from sentence_transformers import SentenceTransformer
+from llama_index.core import VectorStoreIndex, ServiceContext
 from llama_index.llms.huggingface import HuggingFaceLLM as LlamaHuggingFaceLLM
 from llama_index.core.prompts.prompts import SimpleInputPrompt
 from llama_index.legacy.embeddings.langchain import LangchainEmbedding
 # Setup for caching the index and LLM to avoid reloading
 @st.cache(allow_output_mutation=True, suppress_st_warning=True)
+def setup_llama_index(documents):
     # Define and configure the embedding model
     embed_model = LangchainEmbedding(SentenceTransformer('sentence-transformers/all-mpnet-base-v2'))
         model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
     )
+    # Create the index
     service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llama_llm, embed_model=embed_model)
     index = VectorStoreIndex.from_documents(documents, service_context=service_context)
     return index.as_query_engine()
 def main():
     st.title('PDF Reader and Question Answering with RAG-like Model')
     uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
     if uploaded_file is not None:
         document_text = extract_text_from_pdf(uploaded_file)
         if document_text:
             st.text_area("Extracted Text", document_text, height=300)
+            # Process the uploaded document
+            documents = [document_text]
+            query_engine = setup_llama_index(documents)
+            question = st.text_input("Ask a question based on the PDF")
+            if st.button("Get Answer"):
+                if question:
+                    # Simulate RAG-like query using the index and LLM
+                    response = query_engine.query(question)
+                    st.text_area("Answer", response, height=150)
+                else:
+                    st.error("Please enter a question to get an answer.")
         else:
             st.error("No text could be extracted from the PDF. Please check the file and try again.")
 if __name__ == "__main__":
     main()