Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from groq import Groq | |
from PyPDF2 import PdfReader | |
from sentence_transformers import SentenceTransformer | |
import faiss | |
import numpy as np | |
import whisper | |
from gtts import gTTS | |
from tempfile import NamedTemporaryFile | |
import json | |
import gdown | |
# Initialize Groq client | |
client = Groq(api_key="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") | |
# Load embedding model | |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Load Whisper model | |
whisper_model = whisper.load_model("base") | |
# Initialize FAISS | |
embedding_dimension = 384 # Dimension of embeddings from the model | |
index = faiss.IndexFlatL2(embedding_dimension) | |
metadata = [] | |
# List of Google Drive PDF links | |
google_drive_links = [ | |
"https://drive.google.com/file/d/1l7uT2KK-g_r853KZtqY3yjYL0JQRGRFg/view?usp=sharing" | |
] | |
# Streamlit App Configuration | |
st.set_page_config(page_title="Voice/Text Chatbot with RAG PDF Query", page_icon="π", layout="wide") | |
# Title | |
st.markdown("<h1 style='text-align: center; color: #006400;'>Quranic-Therapy: AI-Driven Mental Health and Wellness </h1>", unsafe_allow_html=True) | |
st.markdown("---") | |
# Sidebar for PDF Upload | |
st.sidebar.header("Upload Your PDF File") | |
uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type="pdf") | |
# Function to extract file ID from Google Drive link | |
def extract_file_id(drive_link): | |
return drive_link.split("/d/")[1].split("/view")[0] | |
# Function to download PDF from Google Drive | |
def download_pdf_from_google_drive(file_id, output_path): | |
download_url = f"https://drive.google.com/uc?id={file_id}" | |
gdown.download(download_url, output_path, quiet=False) | |
# Function for text extraction from PDF | |
def extract_text_from_pdf(file): | |
reader = PdfReader(file) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Function for text-to-speech | |
def text_to_speech(response_text): | |
tts = gTTS(text=response_text, lang="en") | |
audio_file = NamedTemporaryFile(delete=False, suffix=".mp3") | |
tts.save(audio_file.name) | |
return audio_file.name | |
# Save embeddings and metadata | |
def save_database(faiss_index, metadata, file_path="vector_database.json"): | |
all_embeddings = [] | |
for i in range(faiss_index.ntotal): | |
all_embeddings.append(faiss_index.reconstruct(i).tolist()) | |
data = { | |
"embeddings": all_embeddings, | |
"metadata": metadata | |
} | |
with open(file_path, "w") as f: | |
json.dump(data, f) | |
st.success(f"Vector database saved to {file_path}!") | |
# Process Google Drive PDFs | |
st.sidebar.header("Processing Google Drive PDFs") | |
with st.spinner("Downloading and processing Google Drive PDFs..."): | |
for link in google_drive_links: | |
file_id = extract_file_id(link) | |
output_pdf_path = f"downloaded_{file_id}.pdf" | |
# Download PDF | |
if not os.path.exists(output_pdf_path): # Avoid re-downloading | |
download_pdf_from_google_drive(file_id, output_pdf_path) | |
# Extract text and process | |
pdf_text = extract_text_from_pdf(output_pdf_path) | |
if pdf_text.strip(): | |
# Split text into chunks and create embeddings | |
chunk_size = 500 | |
chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)] | |
embeddings = embedding_model.encode(chunks, convert_to_numpy=True) | |
index.add(embeddings) | |
# Store metadata | |
metadata.extend([{"chunk": chunk, "source": f"Google Drive: {output_pdf_path}"} for chunk in chunks]) | |
# PDF Text Processing | |
if uploaded_file: | |
pdf_text = extract_text_from_pdf(uploaded_file) | |
if pdf_text.strip(): | |
st.success("PDF text successfully extracted!") | |
with st.expander("View Extracted Text", expanded=False): | |
st.write(pdf_text[:3000] + "..." if len(pdf_text) > 3000 else pdf_text) | |
# Split text into chunks and create embeddings | |
chunk_size = 500 | |
chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)] | |
embeddings = embedding_model.encode(chunks, convert_to_numpy=True) | |
index.add(embeddings) | |
# Store metadata | |
metadata.extend([{"chunk": chunk, "source": uploaded_file.name} for chunk in chunks]) | |
save_database(index, metadata) | |
st.success(f"Processed {len(chunks)} chunks and stored embeddings in FAISS!") | |
# Main Chatbot Interface | |
st.header("π€ Chatbot Interface") | |
# Input Method Selection | |
input_method = st.radio("Select Input Method:", options=["Text", "Audio"]) | |
if input_method == "Text": | |
st.subheader("π¬ Text Query Input") | |
text_query = st.text_input("Enter your query:") | |
if st.button("Submit Text Query"): | |
if text_query: | |
try: | |
# Search FAISS for nearest chunks | |
query_embedding = embedding_model.encode([text_query], convert_to_numpy=True) | |
distances, indices = index.search(query_embedding, k=5) | |
relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]] | |
# Generate response using Groq API | |
prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {text_query}" | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": prompt}], | |
model="llama-3.3-70b-versatile", | |
) | |
response = chat_completion.choices[0].message.content | |
# Display text response | |
st.write(f"**Chatbot Response:** {response}") | |
# Generate and play audio response | |
response_audio_path = text_to_speech(response) | |
st.audio(response_audio_path, format="audio/mp3", start_time=0) | |
except Exception as e: | |
st.error(f"Error processing your query: {e}") | |
elif input_method == "Audio": | |
st.subheader("π€ Audio Query Input") | |
uploaded_audio = st.file_uploader("Upload your audio file", type=["m4a", "mp3", "wav"]) | |
if uploaded_audio: | |
try: | |
audio_data = uploaded_audio.read() | |
audio_file = NamedTemporaryFile(delete=False, suffix=".m4a") | |
audio_file.write(audio_data) | |
audio_file_path = audio_file.name | |
st.success("Audio file uploaded successfully!") | |
# Transcribe the audio using Whisper model | |
transcription = whisper_model.transcribe(audio_file_path)["text"] | |
st.write(f"**You said:** {transcription}") | |
# Search FAISS for nearest chunks | |
query_embedding = embedding_model.encode([transcription], convert_to_numpy=True) | |
distances, indices = index.search(query_embedding, k=5) | |
relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]] | |
# Generate response using Groq API | |
prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {transcription}" | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": prompt}], | |
model="llama-3.3-70b-versatile", | |
) | |
response = chat_completion.choices[0].message.content | |
# Display text response | |
st.write(f"**Chatbot Response:** {response}") | |
# Generate and play audio response | |
response_audio_path = text_to_speech(response) | |
st.audio(response_audio_path, format="audio/mp3", start_time=0) | |
except Exception as e: | |
st.error(f"Error processing your query: {e}") | |
# Footer | |
st.markdown("<p style='text-align: center;'> Quran is the therapy we all need </p>", unsafe_allow_html=True) | |