RAG-CHATBOT / app.py
simran40's picture
Update app.py
c0cb811 verified
import gradio as gr
import fitz
import re
import faiss
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
# ===============================
# MODEL LOADING
# ===============================
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
LLM_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(LLM_NAME)
llm = AutoModelForCausalLM.from_pretrained(
LLM_NAME,
torch_dtype=torch.float32
)
llm.eval()
# ===============================
# PDF PROCESSING
# ===============================
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
def clean_text(text):
return re.sub(r"\s+", " ", text).strip()
def chunk_text(text, chunk_size=500, overlap=50):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunks.append(text[start:end])
start = end - overlap
return chunks
# ===============================
# VECTOR DB (FAISS)
# ===============================
def build_faiss_index(chunks):
embeddings = embedding_model.encode(chunks)
embeddings = np.array(embeddings).astype("float32")
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
return index, chunks
def retrieve_relevant_chunks(query, index, chunks, top_k=3):
query_embedding = embedding_model.encode([query]).astype("float32")
_, indices = index.search(query_embedding, top_k)
return [chunks[i] for i in indices[0]]
# ===============================
# LLM ANSWER
# ===============================
def generate_answer(question, context_chunks):
context = "\n\n".join(context_chunks)
prompt = f"""
Answer the question strictly using the given context.
If the answer is not found, say:
"Information not found in the document."
Context:
{context}
Question:
{question}
Answer:
"""
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
with torch.no_grad():
output = llm.generate(
**inputs,
max_new_tokens=200,
temperature=0.2
)
decoded = tokenizer.decode(output[0], skip_special_tokens=True)
return decoded.split("Answer:")[-1].strip()
# ===============================
# MAIN PIPELINE
# ===============================
def pdf_rag_chat(pdf_file, question):
if pdf_file is None or question.strip() == "":
return "Please upload a PDF and enter a question."
text = extract_text_from_pdf(pdf_file.name)
text = clean_text(text)
chunks = chunk_text(text)
index, chunks = build_faiss_index(chunks)
context = retrieve_relevant_chunks(question, index, chunks)
return generate_answer(question, context)
# ===============================
# GRADIO UI (GRADIO 6 SAFE)
# ===============================
with gr.Blocks() as demo:
gr.Markdown("""
# πŸ“„ PDF RAG Chatbot (Open-Source AI)
Upload a **PDF** and ask questions based **only on its content**.
Built using **Retrieval Augmented Generation (RAG)** and
**open-source Hugging Face models**, running on **free CPU**.
""")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(
label="πŸ“€ Upload PDF",
file_types=[".pdf"]
)
question_input = gr.Textbox(
label="❓ Ask a question",
placeholder="e.g. What is the objective of the project?",
lines=2
)
submit_btn = gr.Button("πŸ” Get Answer")
with gr.Column(scale=2):
answer_output = gr.Textbox(
label="πŸ“Œ Answer",
lines=10
)
submit_btn.click(
fn=pdf_rag_chat,
inputs=[pdf_input, question_input],
outputs=answer_output
)
gr.Markdown("""
---
**Β© Simranpreet Kaur**
**NIELIT Ropar | AIML Six Months Training | 2026**
""")
demo.launch()