File size: 3,236 Bytes
01aade3 a144f48 d153de8 01aade3 a144f48 d153de8 01aade3 d153de8 01aade3 d153de8 01aade3 a144f48 d153de8 b5d8569 a144f48 b5d8569 d153de8 01aade3 d153de8 a144f48 8b99414 d153de8 8b99414 01aade3 a144f48 01aade3 d153de8 01aade3 a144f48 01aade3 d153de8 01aade3 90c47ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import os
import logging
import faiss
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# HuggingFace model checkpoint
checkpoint = "LaMini-T5-738M"
@st.cache_resource
def load_llm():
"""Load the language model for text generation."""
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
pipe = pipeline(
'text2text-generation',
model=model,
tokenizer=tokenizer,
max_length=256,
do_sample=True,
temperature=0.3,
top_p=0.95
)
return HuggingFacePipeline(pipeline=pipe)
def load_faiss_index():
"""Load the FAISS index for vector search."""
index_path = "faiss_index/index.faiss"
if not os.path.exists(index_path):
st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.")
raise RuntimeError(f"FAISS index not found at {index_path}.")
try:
index = faiss.read_index(index_path)
logger.info(f"FAISS index loaded successfully from {index_path}")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
retriever = FAISS(index, embeddings)
return retriever
except Exception as e:
st.error(f"Failed to load FAISS index: {e}")
logger.exception("Exception in load_faiss_index")
raise
def process_answer(instruction):
"""Process the user's question using the QA system."""
try:
retriever = load_faiss_index()
llm = load_llm()
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
generated_text = qa.invoke(instruction)
answer = generated_text['result']
return answer, generated_text
except Exception as e:
st.error(f"An error occurred while processing the answer: {e}")
logger.exception("Exception in process_answer")
return "An error occurred while processing your request.", {}
def main():
"""Main function to run the Streamlit application."""
st.title("Search Your PDF ππ")
with st.expander("About the App"):
st.markdown(
"""
This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
"""
)
question = st.text_area("Enter your Question")
if st.button("Ask"):
st.info("Your Question: " + question)
st.info("Your Answer")
try:
answer, metadata = process_answer(question)
st.write(answer)
st.write(metadata)
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
logger.exception("Unexpected error in main function")
if __name__ == '__main__':
main()
|