Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from groq import Groq | |
import requests | |
# Helper function to download and load the PDF from Google Drive | |
def load_pdf_from_drive(output_path="downloaded_document.pdf"): | |
drive_link = "https://drive.google.com/file/d/1SzVEuEdKi4dHeKgDrUbmoq1MShB-hyG4/view?usp=drive_link" | |
file_id = drive_link.split("/d/")[1].split("/")[0] | |
download_url = f"https://drive.google.com/uc?export=download&id={file_id}" | |
response = requests.get(download_url) | |
with open(output_path, "wb") as f: | |
f.write(response.content) | |
return output_path | |
# Helper function to parse the PDF | |
def load_pdf_content(pdf_path): | |
reader = PdfReader(pdf_path) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Define the Streamlit app | |
st.title("RAG-Based Application with Groq API") | |
st.write("Processing a predefined PDF document from Google Drive to create a vector database and interact with it.") | |
st.write("Downloading and processing the document...") | |
# Download and load content from the PDF | |
pdf_path = load_pdf_from_drive() | |
document_text = load_pdf_content(pdf_path) | |
# Split the text into manageable chunks | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, chunk_overlap=200 | |
) | |
text_chunks = text_splitter.split_text(document_text) | |
st.write(f"Document split into {len(text_chunks)} chunks.") | |
# Initialize embedding function | |
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# Create FAISS vector database | |
faiss_index = FAISS.from_texts(text_chunks, embedding=embedding_function) | |
st.write("Vector database created successfully.") | |
# Save the FAISS index | |
faiss_index.save_local("faiss_index") | |
# Initialize Groq client for querying | |
GROQ_API_KEY = "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" | |
client = Groq(api_key=GROQ_API_KEY) | |
# Chat interaction setup | |
st.write("Ask a question related to the document:") | |
user_query = st.text_input("Your question:") | |
if user_query: | |
query_response = client.chat.completions.create( | |
messages=[ | |
{"role": "user", "content": user_query} | |
], | |
model="llama-3.3-70b-versatile", | |
) | |
st.write("Response:") | |
st.write(query_response.choices[0].message.content) |