import os import streamlit as st import gdown from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer import faiss from groq import Groq # Constants DOCUMENT_LINK = "https://drive.google.com/file/d/1dbVp5inTNxd1SWrYbSgStX-kVp18P8TH/view?usp=sharing" PDF_PATH = "document.pdf" # Function to download file from Google Drive def download_file_from_drive(drive_link: str, output_path: str): file_id = drive_link.split("/d/")[1].split("/")[0] gdown.download(f"https://drive.google.com/uc?id={file_id}", output_path, quiet=False) # Function to extract text from PDF def extract_text_from_pdf(pdf_path: str) -> str: reader = PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() return text # Function to create a vector store using FAISS def create_vector_store(text: str, model_name: str = "all-MiniLM-L6-v2"): model = SentenceTransformer(model_name) sentences = [sent.strip() for sent in text.split("\n") if sent.strip()] embeddings = model.encode(sentences, convert_to_tensor=False) # Initialize FAISS dimension = embeddings[0].shape[0] index = faiss.IndexFlatL2(dimension) index.add(embeddings) return index, sentences # Download the document if not already downloaded if not os.path.exists(PDF_PATH): st.sidebar.text("Downloading document...") download_file_from_drive(DOCUMENT_LINK, PDF_PATH) # Extract text from the document st.sidebar.text("Extracting text from document...") text = extract_text_from_pdf(PDF_PATH) # Create a vector store st.sidebar.text("Creating vector store...") index, sentences = create_vector_store(text) # Initialize Groq API Client st.sidebar.text("Initializing Groq Client...") client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Streamlit App Frontend st.title("Electricity-Related Q&A Application") st.write("You can ask any question about electricity load balancing for your house appliances,") st.write("electricity load consumption calculation etc...") user_input = st.text_input("Ask a question:") if st.button("Submit") and user_input: # Search for relevant content model_name = "all-MiniLM-L6-v2" query_embedding = SentenceTransformer(model_name).encode([user_input]) distances, indices = index.search(query_embedding, k=3) # Fetch relevant sentences relevant_context = "\n".join([sentences[i] for i in indices[0]]) # Interact with Groq model st.text("Generating response...") chat_completion = client.chat.completions.create( messages=[ {"role": "user", "content": f"{user_input}\nContext:\n{relevant_context}"} ], model="llama-3.3-70b-versatile", ) response = chat_completion.choices[0].message.content st.write(response) st.sidebar.text("Ready to Deploy!")