|
import os |
|
import logging |
|
import faiss |
|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain_community.llms import HuggingFacePipeline |
|
from langchain.chains import RetrievalQA |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
checkpoint = "LaMini-T5-738M" |
|
|
|
@st.cache_resource |
|
def load_llm(): |
|
"""Load the language model for text generation.""" |
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) |
|
pipe = pipeline( |
|
'text2text-generation', |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_length=256, |
|
do_sample=True, |
|
temperature=0.3, |
|
top_p=0.95 |
|
) |
|
return HuggingFacePipeline(pipeline=pipe) |
|
|
|
def load_faiss_index(): |
|
"""Load the FAISS index for vector search.""" |
|
index_path = "faiss_index/index.faiss" |
|
if not os.path.exists(index_path): |
|
st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.") |
|
raise RuntimeError(f"FAISS index not found at {index_path}.") |
|
|
|
try: |
|
index = faiss.read_index(index_path) |
|
logger.info(f"FAISS index loaded successfully from {index_path}") |
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
retriever = FAISS(index, embeddings) |
|
return retriever |
|
except Exception as e: |
|
st.error(f"Failed to load FAISS index: {e}") |
|
logger.exception("Exception in load_faiss_index") |
|
raise |
|
|
|
def process_answer(instruction): |
|
"""Process the user's question using the QA system.""" |
|
try: |
|
retriever = load_faiss_index() |
|
llm = load_llm() |
|
qa = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True |
|
) |
|
generated_text = qa.invoke(instruction) |
|
answer = generated_text['result'] |
|
return answer, generated_text |
|
except Exception as e: |
|
st.error(f"An error occurred while processing the answer: {e}") |
|
logger.exception("Exception in process_answer") |
|
return "An error occurred while processing your request.", {} |
|
|
|
def main(): |
|
"""Main function to run the Streamlit application.""" |
|
st.title("Search Your PDF ππ") |
|
|
|
with st.expander("About the App"): |
|
st.markdown( |
|
""" |
|
This is a Generative AI powered Question and Answering app that responds to questions about your PDF File. |
|
""" |
|
) |
|
|
|
question = st.text_area("Enter your Question") |
|
|
|
if st.button("Ask"): |
|
st.info("Your Question: " + question) |
|
st.info("Your Answer") |
|
try: |
|
answer, metadata = process_answer(question) |
|
st.write(answer) |
|
st.write(metadata) |
|
except Exception as e: |
|
st.error(f"An unexpected error occurred: {e}") |
|
logger.exception("Unexpected error in main function") |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|