pdfsense / app.py
AkashVD26's picture
better UI for HF space
55d5c44
# Importing libraries
import streamlit as st
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
# Embeddings and LLM initialization
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
llm = ChatGroq(model="Gemma2-9b-It")
st.set_page_config(page_title="PDFSense", page_icon="๐Ÿ“œ")
# Streamlit app title
st.title("๐Ÿ“œ ๐๐ƒ๐…๐’๐ž๐ง๐ฌ๐ž : ๐๐ƒ๐… ๐๐ฎ๐ž๐ฌ๐ญ๐ข๐จ๐ง ๐€๐ง๐ฌ๐ฐ๐ž๐ซ๐ข๐ง๐  ๐š๐ฌ๐ฌ๐ข๐ฌ๐ญ๐š๐ง๐ญ ๐ฐ๐ข๐ญ๐ก ๐‚๐ก๐š๐ญ ๐‡๐ข๐ฌ๐ญ๐จ๐ซ๐ฒ")
# PDF Uploader Section (Keeps it at the top)
uploaded_files = st.file_uploader("Drop PDF files here", type="pdf", accept_multiple_files=True)
# Initialize chat history
if "messages" not in st.session_state:
st.session_state["messages"] = [
{"role": "assistant", "content": "Hi! I am PDFSense. Upload your PDF and ask me anything related to it."}
]
st.text("If the application fails to read the PDFs, try refreshing the webpage.")
# Process PDFs if uploaded
if uploaded_files:
documents = []
for uploaded_file in uploaded_files:
temppdf = "./temp.pdf"
with open(temppdf, "wb") as file:
file.write(uploaded_file.getvalue())
docs = PyPDFLoader(temppdf).load()
documents.extend(docs)
os.remove("./temp.pdf") # Clean up temporary file
# Text splitting and FAISS index creation
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
splits = text_splitter.split_documents(documents)
faiss_index = FAISS.from_documents(splits, embeddings)
retriever = faiss_index.as_retriever()
# History-aware retriever and prompt setup
context_prompt = ChatPromptTemplate.from_messages([
("system", "Refactor the question using chat history for context."),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
history_aware_ret = create_history_aware_retriever(llm, retriever, context_prompt)
system_prompt = (
"You are PDFSense, a PDF reading assistant. Use the following context to answer the question: "
"{context}. If unsure, respond with 'I don't know.'"
)
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
qa_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(history_aware_ret, qa_chain)
# Display chat history
for msg in st.session_state["messages"]:
st.chat_message(msg["role"]).write(msg["content"])
# User input handling
if user_input := st.chat_input(placeholder="Ask a question about your uploaded PDF..."):
st.session_state["messages"].append({"role": "user", "content": user_input})
st.chat_message("user").write(user_input)
# Run retrieval and answer generation using invoke()
with st.chat_message("assistant"):
chat_history = [{"role": msg["role"], "content": msg["content"]} for msg in st.session_state["messages"]]
result = rag_chain.invoke({"input": user_input, "chat_history": chat_history})
# Extract and display only the answer
answer = result.get("answer", "I don't know.")
st.session_state["messages"].append({"role": "assistant", "content": answer})
st.write(answer)
else:
st.error("Enter PDFs.")