suhail1399's picture
Update app.py
22ae60f verified
import os
import streamlit as st
import gdown
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
# Constants
DOCUMENT_LINK = "https://drive.google.com/file/d/1dbVp5inTNxd1SWrYbSgStX-kVp18P8TH/view?usp=sharing"
PDF_PATH = "document.pdf"
# Function to download file from Google Drive
def download_file_from_drive(drive_link: str, output_path: str):
file_id = drive_link.split("/d/")[1].split("/")[0]
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_path, quiet=False)
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path: str) -> str:
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function to create a vector store using FAISS
def create_vector_store(text: str, model_name: str = "all-MiniLM-L6-v2"):
model = SentenceTransformer(model_name)
sentences = [sent.strip() for sent in text.split("\n") if sent.strip()]
embeddings = model.encode(sentences, convert_to_tensor=False)
# Initialize FAISS
dimension = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
return index, sentences
# Download the document if not already downloaded
if not os.path.exists(PDF_PATH):
st.sidebar.text("Downloading document...")
download_file_from_drive(DOCUMENT_LINK, PDF_PATH)
# Extract text from the document
st.sidebar.text("Extracting text from document...")
text = extract_text_from_pdf(PDF_PATH)
# Create a vector store
st.sidebar.text("Creating vector store...")
index, sentences = create_vector_store(text)
# Initialize Groq API Client
st.sidebar.text("Initializing Groq Client...")
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Streamlit App Frontend
st.title("Electricity-Related Q&A Application")
st.write("You can ask any question about electricity load balancing for your house appliances,")
st.write("electricity load consumption calculation etc...")
user_input = st.text_input("Ask a question:")
if st.button("Submit") and user_input:
# Search for relevant content
model_name = "all-MiniLM-L6-v2"
query_embedding = SentenceTransformer(model_name).encode([user_input])
distances, indices = index.search(query_embedding, k=3)
# Fetch relevant sentences
relevant_context = "\n".join([sentences[i] for i in indices[0]])
# Interact with Groq model
st.text("Generating response...")
chat_completion = client.chat.completions.create(
messages=[
{"role": "user", "content": f"{user_input}\nContext:\n{relevant_context}"}
],
model="llama-3.3-70b-versatile",
)
response = chat_completion.choices[0].message.content
st.write(response)
st.sidebar.text("Ready to Deploy!")