Spaces:
Sleeping
Sleeping
import streamlit as st | |
import fitz # PyMuPDF | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
import pickle | |
from groq import Groq | |
import os | |
# Streamlit App | |
st.title("RAG-based PDF Query App") | |
st.write("Upload a PDF, extract its content, and query it using Groq API.") | |
# Upload PDF | |
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
if uploaded_file is not None: | |
# Extract text from PDF | |
def extract_text_from_pdf(uploaded_file): | |
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf") | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
pdf_text = extract_text_from_pdf(uploaded_file) | |
st.success("PDF uploaded and extracted successfully!") | |
# Chunk & Tokenize Text | |
def chunk_text(text, chunk_size=500, overlap=50): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap) | |
return text_splitter.split_text(text) | |
chunks = chunk_text(pdf_text) | |
# Create Embeddings & Store in FAISS | |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
vector_store = FAISS.from_texts(chunks, embedding_model) | |
# Save FAISS index | |
with open("faiss_index.pkl", "wb") as f: | |
pickle.dump(vector_store, f) | |
st.success("Document processed and stored in vector database!") | |
# Query Section | |
query = st.text_input("Enter your query:") | |
if st.button("Search"): | |
if query: | |
# Load FAISS index | |
with open("faiss_index.pkl", "rb") as f: | |
vector_store = pickle.load(f) | |
docs = vector_store.similarity_search(query, k=3) | |
context = "\n".join([doc.page_content for doc in docs]) | |
GROQ_API_KEY = os.environ["GROQ_API_KEY"] # Ensure you have stored it properly | |
client = Groq(api_key=GROQ_API_KEY) | |
response = client.chat.completions.create( | |
messages=[{"role": "user", "content": context + "\n\n" + query}], | |
model="llama-3.3-70b-versatile", | |
) | |
st.subheader("Response:") | |
st.write(response.choices[0].message.content) | |
else: | |
st.warning("Please enter a query to search.") | |