import os import gradio as gr import tempfile import faiss from groq import Groq from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS # Set your Groq API key os.environ["GROQ_API_KEY"] = "GROQ_API_KEY" client = Groq(api_key=os.environ["GROQ_API_KEY"]) # Load PDF and split into chunks def process_pdf(file): loader = PyPDFLoader(file.name) documents = loader.load() # Split documents into chunks splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_documents(documents) # Create embeddings embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(chunks, embeddings) return db # Query handler def query_pdf(pdf, question): if pdf is None or question.strip() == "": return "Please upload a PDF and enter a query." db = process_pdf(pdf) retriever = db.as_retriever() relevant_docs = retriever.get_relevant_documents(question) context = "\n\n".join([doc.page_content for doc in relevant_docs]) # Send to Groq API chat_completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"Answer the following question based on the context below:\n\n{context}\n\nQuestion: {question}"} ], model="llama3-8b-8192" ) return chat_completion.choices[0].message.content # Gradio interface with gr.Blocks() as demo: gr.Markdown("## 🔍 RAG-based PDF QA with Groq & FAISS") with gr.Row(): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) query_input = gr.Textbox(label="Ask a question", placeholder="What is the document about?") answer_output = gr.Textbox(label="Answer") submit_btn = gr.Button("Get Answer") submit_btn.click(fn=query_pdf, inputs=[pdf_input, query_input], outputs=answer_output) demo.launch(share=True, debug=True)