ragchatbot / app.py
shah1zil's picture
Update app.py
f99a711 verified
import streamlit as st
import fitz # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import pickle
from groq import Groq
import os
# Streamlit App
st.title("RAG-based PDF Query App")
st.write("Upload a PDF, extract its content, and query it using Groq API.")
# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file is not None:
# Extract text from PDF
def extract_text_from_pdf(uploaded_file):
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
return text
pdf_text = extract_text_from_pdf(uploaded_file)
st.success("PDF uploaded and extracted successfully!")
# Chunk & Tokenize Text
def chunk_text(text, chunk_size=500, overlap=50):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
return text_splitter.split_text(text)
chunks = chunk_text(pdf_text)
# Create Embeddings & Store in FAISS
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(chunks, embedding_model)
# Save FAISS index
with open("faiss_index.pkl", "wb") as f:
pickle.dump(vector_store, f)
st.success("Document processed and stored in vector database!")
# Query Section
query = st.text_input("Enter your query:")
if st.button("Search"):
if query:
# Load FAISS index
with open("faiss_index.pkl", "rb") as f:
vector_store = pickle.load(f)
docs = vector_store.similarity_search(query, k=3)
context = "\n".join([doc.page_content for doc in docs])
GROQ_API_KEY = os.environ["GROQ_API_KEY"] # Ensure you have stored it properly
client = Groq(api_key=GROQ_API_KEY)
response = client.chat.completions.create(
messages=[{"role": "user", "content": context + "\n\n" + query}],
model="llama-3.3-70b-versatile",
)
st.subheader("Response:")
st.write(response.choices[0].message.content)
else:
st.warning("Please enter a query to search.")