Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
import pdfplumber
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
-
from llama_index.core import VectorStoreIndex,
|
5 |
from llama_index.llms.huggingface import HuggingFaceLLM as LlamaHuggingFaceLLM
|
6 |
from llama_index.core.prompts.prompts import SimpleInputPrompt
|
7 |
from llama_index.legacy.embeddings.langchain import LangchainEmbedding
|
@@ -9,7 +10,7 @@ import torch
|
|
9 |
|
10 |
# Setup for caching the index and LLM to avoid reloading
|
11 |
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
|
12 |
-
def setup_llama_index():
|
13 |
# Define and configure the embedding model
|
14 |
embed_model = LangchainEmbedding(SentenceTransformer('sentence-transformers/all-mpnet-base-v2'))
|
15 |
|
@@ -26,8 +27,7 @@ def setup_llama_index():
|
|
26 |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
|
27 |
)
|
28 |
|
29 |
-
#
|
30 |
-
documents = SimpleDirectoryReader('/content/data').load_data() # Assuming document data is in this directory
|
31 |
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llama_llm, embed_model=embed_model)
|
32 |
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
33 |
return index.as_query_engine()
|
@@ -45,25 +45,26 @@ def extract_text_from_pdf(file):
|
|
45 |
def main():
|
46 |
st.title('PDF Reader and Question Answering with RAG-like Model')
|
47 |
|
48 |
-
# Load the query engine only once
|
49 |
-
query_engine = setup_llama_index()
|
50 |
-
|
51 |
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
|
52 |
if uploaded_file is not None:
|
53 |
document_text = extract_text_from_pdf(uploaded_file)
|
54 |
if document_text:
|
55 |
st.text_area("Extracted Text", document_text, height=300)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
else:
|
57 |
st.error("No text could be extracted from the PDF. Please check the file and try again.")
|
58 |
|
59 |
-
question = st.text_input("Ask a question based on the PDF")
|
60 |
-
if st.button("Get Answer"):
|
61 |
-
if question:
|
62 |
-
# Simulate RAG-like query using the index and LLM
|
63 |
-
response = query_engine.query(question)
|
64 |
-
st.text_area("Answer", response, height=150)
|
65 |
-
else:
|
66 |
-
st.error("Please enter a question to get an answer.")
|
67 |
-
|
68 |
if __name__ == "__main__":
|
69 |
main()
|
|
|
1 |
+
import os
|
2 |
import streamlit as st
|
3 |
import pdfplumber
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
+
from llama_index.core import VectorStoreIndex, ServiceContext
|
6 |
from llama_index.llms.huggingface import HuggingFaceLLM as LlamaHuggingFaceLLM
|
7 |
from llama_index.core.prompts.prompts import SimpleInputPrompt
|
8 |
from llama_index.legacy.embeddings.langchain import LangchainEmbedding
|
|
|
10 |
|
11 |
# Setup for caching the index and LLM to avoid reloading
|
12 |
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
|
13 |
+
def setup_llama_index(documents):
|
14 |
# Define and configure the embedding model
|
15 |
embed_model = LangchainEmbedding(SentenceTransformer('sentence-transformers/all-mpnet-base-v2'))
|
16 |
|
|
|
27 |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
|
28 |
)
|
29 |
|
30 |
+
# Create the index
|
|
|
31 |
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llama_llm, embed_model=embed_model)
|
32 |
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
33 |
return index.as_query_engine()
|
|
|
45 |
def main():
|
46 |
st.title('PDF Reader and Question Answering with RAG-like Model')
|
47 |
|
|
|
|
|
|
|
48 |
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
|
49 |
if uploaded_file is not None:
|
50 |
document_text = extract_text_from_pdf(uploaded_file)
|
51 |
if document_text:
|
52 |
st.text_area("Extracted Text", document_text, height=300)
|
53 |
+
|
54 |
+
# Process the uploaded document
|
55 |
+
documents = [document_text]
|
56 |
+
query_engine = setup_llama_index(documents)
|
57 |
+
|
58 |
+
question = st.text_input("Ask a question based on the PDF")
|
59 |
+
if st.button("Get Answer"):
|
60 |
+
if question:
|
61 |
+
# Simulate RAG-like query using the index and LLM
|
62 |
+
response = query_engine.query(question)
|
63 |
+
st.text_area("Answer", response, height=150)
|
64 |
+
else:
|
65 |
+
st.error("Please enter a question to get an answer.")
|
66 |
else:
|
67 |
st.error("No text could be extracted from the PDF. Please check the file and try again.")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
if __name__ == "__main__":
|
70 |
main()
|