File size: 7,728 Bytes
5896df7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os
import streamlit as st
from groq import Groq
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import whisper
from gtts import gTTS
from tempfile import NamedTemporaryFile
import json
import gdown

# Initialize Groq client
client = Groq(api_key="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")

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Load Whisper model
whisper_model = whisper.load_model("base")

# Initialize FAISS
embedding_dimension = 384  # Dimension of embeddings from the model
index = faiss.IndexFlatL2(embedding_dimension)
metadata = []

# List of Google Drive PDF links
google_drive_links = [
    "https://drive.google.com/file/d/1l7uT2KK-g_r853KZtqY3yjYL0JQRGRFg/view?usp=sharing"
]

# Streamlit App Configuration
st.set_page_config(page_title="Voice/Text Chatbot with RAG PDF Query", page_icon="πŸ”Š", layout="wide")

# Title
st.markdown("<h1 style='text-align: center; color: #006400;'>Quranic-Therapy: AI-Driven Mental Health and Wellness </h1>", unsafe_allow_html=True)
st.markdown("---")

# Sidebar for PDF Upload
st.sidebar.header("Upload Your PDF File")
uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type="pdf")

# Function to extract file ID from Google Drive link
def extract_file_id(drive_link):
    return drive_link.split("/d/")[1].split("/view")[0]

# Function to download PDF from Google Drive
def download_pdf_from_google_drive(file_id, output_path):
    download_url = f"https://drive.google.com/uc?id={file_id}"
    gdown.download(download_url, output_path, quiet=False)

# Function for text extraction from PDF
def extract_text_from_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function for text-to-speech
def text_to_speech(response_text):
    tts = gTTS(text=response_text, lang="en")
    audio_file = NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(audio_file.name)
    return audio_file.name

# Save embeddings and metadata
def save_database(faiss_index, metadata, file_path="vector_database.json"):
    all_embeddings = []
    for i in range(faiss_index.ntotal):
        all_embeddings.append(faiss_index.reconstruct(i).tolist())
    data = {
        "embeddings": all_embeddings,
        "metadata": metadata
    }
    with open(file_path, "w") as f:
        json.dump(data, f)
    st.success(f"Vector database saved to {file_path}!")

# Process Google Drive PDFs
st.sidebar.header("Processing Google Drive PDFs")
with st.spinner("Downloading and processing Google Drive PDFs..."):
    for link in google_drive_links:
        file_id = extract_file_id(link)
        output_pdf_path = f"downloaded_{file_id}.pdf"

        # Download PDF
        if not os.path.exists(output_pdf_path):  # Avoid re-downloading
            download_pdf_from_google_drive(file_id, output_pdf_path)

        # Extract text and process
        pdf_text = extract_text_from_pdf(output_pdf_path)
        if pdf_text.strip():
            # Split text into chunks and create embeddings
            chunk_size = 500
            chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]
            embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
            index.add(embeddings)

            # Store metadata
            metadata.extend([{"chunk": chunk, "source": f"Google Drive: {output_pdf_path}"} for chunk in chunks])

# PDF Text Processing
if uploaded_file:
    pdf_text = extract_text_from_pdf(uploaded_file)
    if pdf_text.strip():
        st.success("PDF text successfully extracted!")
        with st.expander("View Extracted Text", expanded=False):
            st.write(pdf_text[:3000] + "..." if len(pdf_text) > 3000 else pdf_text)

        # Split text into chunks and create embeddings
        chunk_size = 500
        chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]
        embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
        index.add(embeddings)

        # Store metadata
        metadata.extend([{"chunk": chunk, "source": uploaded_file.name} for chunk in chunks])
        save_database(index, metadata)

        st.success(f"Processed {len(chunks)} chunks and stored embeddings in FAISS!")

# Main Chatbot Interface
st.header("πŸ€– Chatbot Interface")

# Input Method Selection
input_method = st.radio("Select Input Method:", options=["Text", "Audio"])

if input_method == "Text":
    st.subheader("πŸ’¬ Text Query Input")
    text_query = st.text_input("Enter your query:")
    if st.button("Submit Text Query"):
        if text_query:
            try:
                # Search FAISS for nearest chunks
                query_embedding = embedding_model.encode([text_query], convert_to_numpy=True)
                distances, indices = index.search(query_embedding, k=5)
                relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]]

                # Generate response using Groq API
                prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {text_query}"
                chat_completion = client.chat.completions.create(
                    messages=[{"role": "user", "content": prompt}],
                    model="llama-3.3-70b-versatile",
                )
                response = chat_completion.choices[0].message.content

                # Display text response
                st.write(f"**Chatbot Response:** {response}")

                # Generate and play audio response
                response_audio_path = text_to_speech(response)
                st.audio(response_audio_path, format="audio/mp3", start_time=0)

            except Exception as e:
                st.error(f"Error processing your query: {e}")

elif input_method == "Audio":
    st.subheader("🎀 Audio Query Input")
    uploaded_audio = st.file_uploader("Upload your audio file", type=["m4a", "mp3", "wav"])

    if uploaded_audio:
        try:
            audio_data = uploaded_audio.read()
            audio_file = NamedTemporaryFile(delete=False, suffix=".m4a")
            audio_file.write(audio_data)
            audio_file_path = audio_file.name

            st.success("Audio file uploaded successfully!")

            # Transcribe the audio using Whisper model
            transcription = whisper_model.transcribe(audio_file_path)["text"]
            st.write(f"**You said:** {transcription}")

            # Search FAISS for nearest chunks
            query_embedding = embedding_model.encode([transcription], convert_to_numpy=True)
            distances, indices = index.search(query_embedding, k=5)
            relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]]

            # Generate response using Groq API
            prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {transcription}"
            chat_completion = client.chat.completions.create(
                messages=[{"role": "user", "content": prompt}],
                    model="llama-3.3-70b-versatile",
                )
            response = chat_completion.choices[0].message.content

            # Display text response
            st.write(f"**Chatbot Response:** {response}")

            # Generate and play audio response
            response_audio_path = text_to_speech(response)
            st.audio(response_audio_path, format="audio/mp3", start_time=0)

        except Exception as e:
            st.error(f"Error processing your query: {e}")

# Footer
st.markdown("<p style='text-align: center;'> Quran is the therapy we all need </p>", unsafe_allow_html=True)