Spaces:
Sleeping
Sleeping
File size: 7,728 Bytes
5896df7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import os
import streamlit as st
from groq import Groq
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import whisper
from gtts import gTTS
from tempfile import NamedTemporaryFile
import json
import gdown
# Initialize Groq client
client = Groq(api_key="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")
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Load Whisper model
whisper_model = whisper.load_model("base")
# Initialize FAISS
embedding_dimension = 384 # Dimension of embeddings from the model
index = faiss.IndexFlatL2(embedding_dimension)
metadata = []
# List of Google Drive PDF links
google_drive_links = [
"https://drive.google.com/file/d/1l7uT2KK-g_r853KZtqY3yjYL0JQRGRFg/view?usp=sharing"
]
# Streamlit App Configuration
st.set_page_config(page_title="Voice/Text Chatbot with RAG PDF Query", page_icon="π", layout="wide")
# Title
st.markdown("<h1 style='text-align: center; color: #006400;'>Quranic-Therapy: AI-Driven Mental Health and Wellness </h1>", unsafe_allow_html=True)
st.markdown("---")
# Sidebar for PDF Upload
st.sidebar.header("Upload Your PDF File")
uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type="pdf")
# Function to extract file ID from Google Drive link
def extract_file_id(drive_link):
return drive_link.split("/d/")[1].split("/view")[0]
# Function to download PDF from Google Drive
def download_pdf_from_google_drive(file_id, output_path):
download_url = f"https://drive.google.com/uc?id={file_id}"
gdown.download(download_url, output_path, quiet=False)
# Function for text extraction from PDF
def extract_text_from_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function for text-to-speech
def text_to_speech(response_text):
tts = gTTS(text=response_text, lang="en")
audio_file = NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(audio_file.name)
return audio_file.name
# Save embeddings and metadata
def save_database(faiss_index, metadata, file_path="vector_database.json"):
all_embeddings = []
for i in range(faiss_index.ntotal):
all_embeddings.append(faiss_index.reconstruct(i).tolist())
data = {
"embeddings": all_embeddings,
"metadata": metadata
}
with open(file_path, "w") as f:
json.dump(data, f)
st.success(f"Vector database saved to {file_path}!")
# Process Google Drive PDFs
st.sidebar.header("Processing Google Drive PDFs")
with st.spinner("Downloading and processing Google Drive PDFs..."):
for link in google_drive_links:
file_id = extract_file_id(link)
output_pdf_path = f"downloaded_{file_id}.pdf"
# Download PDF
if not os.path.exists(output_pdf_path): # Avoid re-downloading
download_pdf_from_google_drive(file_id, output_pdf_path)
# Extract text and process
pdf_text = extract_text_from_pdf(output_pdf_path)
if pdf_text.strip():
# Split text into chunks and create embeddings
chunk_size = 500
chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]
embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
index.add(embeddings)
# Store metadata
metadata.extend([{"chunk": chunk, "source": f"Google Drive: {output_pdf_path}"} for chunk in chunks])
# PDF Text Processing
if uploaded_file:
pdf_text = extract_text_from_pdf(uploaded_file)
if pdf_text.strip():
st.success("PDF text successfully extracted!")
with st.expander("View Extracted Text", expanded=False):
st.write(pdf_text[:3000] + "..." if len(pdf_text) > 3000 else pdf_text)
# Split text into chunks and create embeddings
chunk_size = 500
chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]
embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
index.add(embeddings)
# Store metadata
metadata.extend([{"chunk": chunk, "source": uploaded_file.name} for chunk in chunks])
save_database(index, metadata)
st.success(f"Processed {len(chunks)} chunks and stored embeddings in FAISS!")
# Main Chatbot Interface
st.header("π€ Chatbot Interface")
# Input Method Selection
input_method = st.radio("Select Input Method:", options=["Text", "Audio"])
if input_method == "Text":
st.subheader("π¬ Text Query Input")
text_query = st.text_input("Enter your query:")
if st.button("Submit Text Query"):
if text_query:
try:
# Search FAISS for nearest chunks
query_embedding = embedding_model.encode([text_query], convert_to_numpy=True)
distances, indices = index.search(query_embedding, k=5)
relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]]
# Generate response using Groq API
prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {text_query}"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.3-70b-versatile",
)
response = chat_completion.choices[0].message.content
# Display text response
st.write(f"**Chatbot Response:** {response}")
# Generate and play audio response
response_audio_path = text_to_speech(response)
st.audio(response_audio_path, format="audio/mp3", start_time=0)
except Exception as e:
st.error(f"Error processing your query: {e}")
elif input_method == "Audio":
st.subheader("π€ Audio Query Input")
uploaded_audio = st.file_uploader("Upload your audio file", type=["m4a", "mp3", "wav"])
if uploaded_audio:
try:
audio_data = uploaded_audio.read()
audio_file = NamedTemporaryFile(delete=False, suffix=".m4a")
audio_file.write(audio_data)
audio_file_path = audio_file.name
st.success("Audio file uploaded successfully!")
# Transcribe the audio using Whisper model
transcription = whisper_model.transcribe(audio_file_path)["text"]
st.write(f"**You said:** {transcription}")
# Search FAISS for nearest chunks
query_embedding = embedding_model.encode([transcription], convert_to_numpy=True)
distances, indices = index.search(query_embedding, k=5)
relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]]
# Generate response using Groq API
prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {transcription}"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.3-70b-versatile",
)
response = chat_completion.choices[0].message.content
# Display text response
st.write(f"**Chatbot Response:** {response}")
# Generate and play audio response
response_audio_path = text_to_speech(response)
st.audio(response_audio_path, format="audio/mp3", start_time=0)
except Exception as e:
st.error(f"Error processing your query: {e}")
# Footer
st.markdown("<p style='text-align: center;'> Quran is the therapy we all need </p>", unsafe_allow_html=True)
|