maaz77 commited on
Commit
a36ad88
1 Parent(s): acbfde3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -1,7 +1,8 @@
 
1
  import streamlit as st
2
  import pdfplumber
3
  from sentence_transformers import SentenceTransformer
4
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
5
  from llama_index.llms.huggingface import HuggingFaceLLM as LlamaHuggingFaceLLM
6
  from llama_index.core.prompts.prompts import SimpleInputPrompt
7
  from llama_index.legacy.embeddings.langchain import LangchainEmbedding
@@ -9,7 +10,7 @@ import torch
9
 
10
  # Setup for caching the index and LLM to avoid reloading
11
  @st.cache(allow_output_mutation=True, suppress_st_warning=True)
12
- def setup_llama_index():
13
  # Define and configure the embedding model
14
  embed_model = LangchainEmbedding(SentenceTransformer('sentence-transformers/all-mpnet-base-v2'))
15
 
@@ -26,8 +27,7 @@ def setup_llama_index():
26
  model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
27
  )
28
 
29
- # Load documents and create the index
30
- documents = SimpleDirectoryReader('/content/data').load_data() # Assuming document data is in this directory
31
  service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llama_llm, embed_model=embed_model)
32
  index = VectorStoreIndex.from_documents(documents, service_context=service_context)
33
  return index.as_query_engine()
@@ -45,25 +45,26 @@ def extract_text_from_pdf(file):
45
  def main():
46
  st.title('PDF Reader and Question Answering with RAG-like Model')
47
 
48
- # Load the query engine only once
49
- query_engine = setup_llama_index()
50
-
51
  uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
52
  if uploaded_file is not None:
53
  document_text = extract_text_from_pdf(uploaded_file)
54
  if document_text:
55
  st.text_area("Extracted Text", document_text, height=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  else:
57
  st.error("No text could be extracted from the PDF. Please check the file and try again.")
58
 
59
- question = st.text_input("Ask a question based on the PDF")
60
- if st.button("Get Answer"):
61
- if question:
62
- # Simulate RAG-like query using the index and LLM
63
- response = query_engine.query(question)
64
- st.text_area("Answer", response, height=150)
65
- else:
66
- st.error("Please enter a question to get an answer.")
67
-
68
  if __name__ == "__main__":
69
  main()
 
1
+ import os
2
  import streamlit as st
3
  import pdfplumber
4
  from sentence_transformers import SentenceTransformer
5
+ from llama_index.core import VectorStoreIndex, ServiceContext
6
  from llama_index.llms.huggingface import HuggingFaceLLM as LlamaHuggingFaceLLM
7
  from llama_index.core.prompts.prompts import SimpleInputPrompt
8
  from llama_index.legacy.embeddings.langchain import LangchainEmbedding
 
10
 
11
  # Setup for caching the index and LLM to avoid reloading
12
  @st.cache(allow_output_mutation=True, suppress_st_warning=True)
13
+ def setup_llama_index(documents):
14
  # Define and configure the embedding model
15
  embed_model = LangchainEmbedding(SentenceTransformer('sentence-transformers/all-mpnet-base-v2'))
16
 
 
27
  model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
28
  )
29
 
30
+ # Create the index
 
31
  service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llama_llm, embed_model=embed_model)
32
  index = VectorStoreIndex.from_documents(documents, service_context=service_context)
33
  return index.as_query_engine()
 
45
  def main():
46
  st.title('PDF Reader and Question Answering with RAG-like Model')
47
 
 
 
 
48
  uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
49
  if uploaded_file is not None:
50
  document_text = extract_text_from_pdf(uploaded_file)
51
  if document_text:
52
  st.text_area("Extracted Text", document_text, height=300)
53
+
54
+ # Process the uploaded document
55
+ documents = [document_text]
56
+ query_engine = setup_llama_index(documents)
57
+
58
+ question = st.text_input("Ask a question based on the PDF")
59
+ if st.button("Get Answer"):
60
+ if question:
61
+ # Simulate RAG-like query using the index and LLM
62
+ response = query_engine.query(question)
63
+ st.text_area("Answer", response, height=150)
64
+ else:
65
+ st.error("Please enter a question to get an answer.")
66
  else:
67
  st.error("No text could be extracted from the PDF. Please check the file and try again.")
68
 
 
 
 
 
 
 
 
 
 
69
  if __name__ == "__main__":
70
  main()