|
import os |
|
import tempfile |
|
import gradio as gr |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_groq import ChatGroq |
|
from langchain_community.embeddings import HuggingFaceBgeEmbeddings |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_core.runnables import RunnablePassthrough |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain import hub |
|
|
|
|
|
os.environ["GROQ_API_KEY"] = "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" |
|
|
|
|
|
llm = ChatGroq(model="llama3-8b-8192") |
|
model_name = "BAAI/bge-small-en" |
|
hf_embeddings = HuggingFaceBgeEmbeddings( |
|
model_name=model_name, |
|
model_kwargs={'device': 'cpu'}, |
|
encode_kwargs={'normalize_embeddings': True} |
|
) |
|
|
|
|
|
def process_pdf(file): |
|
if file is None: |
|
return "Please upload a PDF file." |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: |
|
temp_file.write(file) |
|
temp_file_path = temp_file.name |
|
|
|
|
|
loader = PyPDFLoader(temp_file_path) |
|
docs = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
splits = text_splitter.split_documents(docs) |
|
|
|
|
|
vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings) |
|
retriever = vectorstore.as_retriever() |
|
|
|
|
|
prompt = hub.pull("rlm/rag-prompt") |
|
|
|
def format_docs(docs): |
|
return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
global rag_chain |
|
rag_chain = ( |
|
{"context": retriever | format_docs, "question": RunnablePassthrough()} |
|
| prompt |
|
| llm |
|
) |
|
|
|
return "PDF processed successfully! Now ask questions." |
|
|
|
|
|
def ask_question(query): |
|
if "rag_chain" not in globals(): |
|
return "Please upload and process a PDF first." |
|
|
|
response = rag_chain.invoke(query).content |
|
return response |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# π PDF Chatbot with RAG") |
|
gr.Markdown("Upload a PDF and ask questions!") |
|
|
|
pdf_input = gr.File(label="Upload PDF", type="binary") |
|
process_button = gr.Button("Process PDF") |
|
output_message = gr.Textbox(label="Status", interactive=False) |
|
|
|
query_input = gr.Textbox(label="Ask a Question") |
|
submit_button = gr.Button("Submit") |
|
response_output = gr.Textbox(label="AI Response") |
|
|
|
process_button.click(process_pdf, inputs=pdf_input, outputs=output_message) |
|
submit_button.click(ask_question, inputs=query_input, outputs=response_output) |
|
demo.launch() |