Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Sleeping

File size: 3,236 Bytes

01aade3
 
a144f48
d153de8
01aade3
 
a144f48
d153de8
 
01aade3
 
 
 
 
d153de8
01aade3
 
 
 
d153de8
01aade3
 
 
 
 
 
 
 
 
 
 
 
 
a144f48
d153de8
b5d8569
a144f48
b5d8569
 
d153de8
01aade3
d153de8
a144f48
8b99414
d153de8
8b99414
01aade3
a144f48
 
01aade3
 
 
d153de8
01aade3
a144f48
01aade3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d153de8
01aade3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90c47ef

import os
import logging
import faiss
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# HuggingFace model checkpoint
checkpoint = "LaMini-T5-738M"

@st.cache_resource
def load_llm():
    """Load the language model for text generation."""
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    pipe = pipeline(
        'text2text-generation',
        model=model,
        tokenizer=tokenizer,
        max_length=256,
        do_sample=True,
        temperature=0.3,
        top_p=0.95
    )
    return HuggingFacePipeline(pipeline=pipe)

def load_faiss_index():
    """Load the FAISS index for vector search."""
    index_path = "faiss_index/index.faiss"
    if not os.path.exists(index_path):
        st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.")
        raise RuntimeError(f"FAISS index not found at {index_path}.")

    try:
        index = faiss.read_index(index_path)
        logger.info(f"FAISS index loaded successfully from {index_path}")
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        retriever = FAISS(index, embeddings)
        return retriever
    except Exception as e:
        st.error(f"Failed to load FAISS index: {e}")
        logger.exception("Exception in load_faiss_index")
        raise

def process_answer(instruction):
    """Process the user's question using the QA system."""
    try:
        retriever = load_faiss_index()
        llm = load_llm()
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )
        generated_text = qa.invoke(instruction)
        answer = generated_text['result']
        return answer, generated_text
    except Exception as e:
        st.error(f"An error occurred while processing the answer: {e}")
        logger.exception("Exception in process_answer")
        return "An error occurred while processing your request.", {}

def main():
    """Main function to run the Streamlit application."""
    st.title("Search Your PDF 📚📝")
    
    with st.expander("About the App"):
        st.markdown(
            """
            This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
            """
        )

    question = st.text_area("Enter your Question")
    
    if st.button("Ask"):
        st.info("Your Question: " + question)
        st.info("Your Answer")
        try:
            answer, metadata = process_answer(question)
            st.write(answer)
            st.write(metadata)
        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")
            logger.exception("Unexpected error in main function")

if __name__ == '__main__':
    main()