Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Running

App Files Files Community

AI-Powered-PDF-Document-Search-and-QA / app.py

jarif

Update app.py

d153de8 verified 5 months ago

raw

history blame

3.24 kB

	import os
	import logging
	import faiss
	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain_community.llms import HuggingFacePipeline
	from langchain.chains import RetrievalQA

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# HuggingFace model checkpoint
	checkpoint = "LaMini-T5-738M"

	@st.cache_resource
	def load_llm():
	"""Load the language model for text generation."""
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	pipe = pipeline(
	'text2text-generation',
	model=model,
	tokenizer=tokenizer,
	max_length=256,
	do_sample=True,
	temperature=0.3,
	top_p=0.95
	)
	return HuggingFacePipeline(pipeline=pipe)

	def load_faiss_index():
	"""Load the FAISS index for vector search."""
	index_path = "faiss_index/index.faiss"
	if not os.path.exists(index_path):
	st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.")
	raise RuntimeError(f"FAISS index not found at {index_path}.")

	try:
	index = faiss.read_index(index_path)
	logger.info(f"FAISS index loaded successfully from {index_path}")
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	retriever = FAISS(index, embeddings)
	return retriever
	except Exception as e:
	st.error(f"Failed to load FAISS index: {e}")
	logger.exception("Exception in load_faiss_index")
	raise

	def process_answer(instruction):
	"""Process the user's question using the QA system."""
	try:
	retriever = load_faiss_index()
	llm = load_llm()
	qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)
	generated_text = qa.invoke(instruction)
	answer = generated_text['result']
	return answer, generated_text
	except Exception as e:
	st.error(f"An error occurred while processing the answer: {e}")
	logger.exception("Exception in process_answer")
	return "An error occurred while processing your request.", {}

	def main():
	"""Main function to run the Streamlit application."""
	st.title("Search Your PDF 📚📝")

	with st.expander("About the App"):
	st.markdown(
	"""
	This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
	"""
	)

	question = st.text_area("Enter your Question")

	if st.button("Ask"):
	st.info("Your Question: " + question)
	st.info("Your Answer")
	try:
	answer, metadata = process_answer(question)
	st.write(answer)
	st.write(metadata)
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	logger.exception("Unexpected error in main function")

	if __name__ == '__main__':
	main()