Spaces:

Zohaib366
/

Enhanced_RAG_Chatbot

Sleeping

App Files Files Community

Enhanced_RAG_Chatbot / app.py

Zohaib366

added api key

97c6b1f verified 10 months ago

raw

history blame contribute delete

3.5 kB

	import gradio as gr
	import fitz # PyMuPDF
	import os
	from sentence_transformers import SentenceTransformer
	import numpy as np
	import faiss
	from groq import Groq

	# Initialize Groq client
	key = os.getenv("GROQ_API_KEY")
	if not key:
	raise ValueError("No API key found")
	groq_client = Groq(api_key=key)
	model = "llama3-8b-8192"

	embedder = SentenceTransformer('all-MiniLM-L6-v2')

	# Global state
	state = {
	"document_chunks": [],
	"metadata": [],
	"index": None,
	"embeddings": None
	}

	# Extract text from PDF using file path
	def extract_text_from_pdf(file_path):
	doc = fitz.open(file_path)
	texts = []
	for i, page in enumerate(doc):
	text = page.get_text().strip()
	if text:
	texts.append({"text": text, "page": i + 1})
	return texts

	# Process PDFs
	def process_pdfs(files):
	state["document_chunks"] = []
	state["metadata"] = []

	for file in files:
	file_name = os.path.basename(file.name)
	chunks = extract_text_from_pdf(file.name)
	for chunk in chunks:
	state["document_chunks"].append(chunk['text'])
	state["metadata"].append({"file": file_name, "page": chunk['page']})

	embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
	dim = embeddings.shape[1]
	index = faiss.IndexFlatL2(dim)
	index.add(np.array(embeddings))
	state["index"] = index
	state["embeddings"] = embeddings

	return "✅ Book(s) loaded successfully!"

	# Retrieve top chunks
	def retrieve_chunks(question, top_k=3):
	if not state["index"]:
	return []
	q_embedding = embedder.encode([question])
	D, I = state["index"].search(q_embedding, top_k)
	return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]

	# Generate answer with source references
	def generate_answer(context, question):
	context_text = "\n\n".join(
	f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
	for chunk, meta in context
	)
	prompt = f"""You are a helpful assistant. Use the context below to answer the question.
	Include the source references (file name and page number) in your answer.

	Context:
	{context_text}

	Question:
	{question}

	Answer (with sources):"""

	response = groq_client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	temperature=0.2
	)
	return response.choices[0].message.content

	# Chat function for ChatInterface
	def chatbot_interface_fn(message, history):
	if not state["document_chunks"]:
	return "⚠️ Please upload PDF files first."
	context = retrieve_chunks(message)
	return generate_answer(context, message)

	# Gradio UI
	with gr.Blocks(title="RAG Chatbot") as demo:
	gr.Markdown("# 📚 Enhanced RAG Chatbot\nUpload books and chat naturally!")

	with gr.Row():
	pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="📂 Upload PDFs")
	upload_btn = gr.Button("Upload & Process PDFs")
	status = gr.Textbox(label="Status", interactive=False)

	upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])

	gr.ChatInterface(
	fn=chatbot_interface_fn,
	chatbot=gr.Chatbot(height=400, type="messages"),
	textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
	title="📖 PDF Chat",
	description="Ask questions based on uploaded PDF content.",
	submit_btn="Send"
	)

	if __name__ == "__main__":
	demo.launch()