Spaces:

Techbite
/

faq-rag-chatbot

Sleeping

App Files Files Community

Techbite commited on 22 days ago

Commit

f402ae8

1 Parent(s): 3c4eeeb

changes:minor changes

Browse files

Files changed (11) hide show

.gitignore +22 -3
app.py +135 -120
data/faq_data.csv +6 -1
notes.txt +0 -16
requirements.txt +8 -2
src/__init__.py +1 -2
src/data_processing.py +98 -44
src/embedding.py +31 -23
src/llm_response.py +35 -69
src/utils.py +54 -13
test_set.json +7 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,22 @@
-# Ignore all files in the .venv directory
-venv
-__pycache__

+# Python virtual environment
+venv/
+# Python cache files
+__pycache__/
+# Data and model files
+data/ecommerce_faqs.json
+embeddings/
+offload/
+*.bin
+# Feedback data
+feedback.json
+# NLTK data
+nltk_data/
+# Streamlit cache
+.venv/
+streamlit_cache/

app.py CHANGED Viewed

@@ -3,233 +3,248 @@ import time
 import os
 import gc
 import torch
-from src.data_processing import load_huggingface_faq_data, load_faq_data, preprocess_faq
 from src.embedding import FAQEmbedder
 from src.llm_response import ResponseGenerator
-from src.utils import time_function, format_memory_stats
-# Set page title and layout
-st.set_page_config(
-    page_title="E-Commerce FAQ Chatbot",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-# Memory optimization: Force garbage collection before starting
-gc.collect()
-if torch.cuda.is_available():
-    torch.cuda.empty_cache()
 @time_function
-def initialize_components(use_huggingface: bool = True, model_name: str = "mistralai/Mistral-7B-Instruct-v0.1"):
-    """Initialize all components of the RAG system with memory optimization"""
-    # Step 1: Load and preprocess FAQ data
-    if use_huggingface:
-        faqs = load_huggingface_faq_data("NebulaByte/E-Commerce_FAQs")
-    else:
-        data_path = os.path.join("data", "faq_data.csv")
-        faqs = load_faq_data(data_path)
-    processed_faqs = preprocess_faq(faqs)
-    # Step 2: Initialize and create embeddings
-    # Use smaller batch size for memory efficiency
-    embedder = FAQEmbedder()
-    embedder.create_embeddings(processed_faqs, batch_size=32)
-    # Clear memory before loading the LLM
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    # Step 3: Initialize response generator
-    response_generator = ResponseGenerator(model_name=model_name)
-    return embedder, response_generator, len(processed_faqs)
 def main():
     st.title("E-Commerce Customer Support FAQ Chatbot")
-    st.subheader("Ask any question about your orders, shipping, returns, or any other e-commerce related queries")
-    # Sidebar configuration
     st.sidebar.title("Configuration")
     use_huggingface = st.sidebar.checkbox("Use Hugging Face Dataset", value=True)
-    # Model options - include smaller models by default
     model_options = {
-        "Phi-2 (Recommended for 8GB GPU)": "microsoft/phi-2",
-        "TinyLlama-1.1B (Smallest, fastest)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        "Mistral-7B (Requires 4-bit quantization)": "mistralai/Mistral-7B-Instruct-v0.1"
     }
-    # Default to Phi-2 for 8-11GB GPU
     selected_model = st.sidebar.selectbox("Select LLM Model", list(model_options.keys()), index=0)
     model_name = model_options[selected_model]
-    # Memory usage monitoring
     if st.sidebar.checkbox("Show Memory Usage", value=True):
         st.sidebar.subheader("Memory Usage")
-        memory_stats = format_memory_stats()
-        for key, value in memory_stats.items():
             st.sidebar.text(f"{key}: {value}")
-    # Initialize session state for chat history if it doesn't exist
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
-    # Initialize RAG components (only once)
     if "system_initialized" not in st.session_state or st.sidebar.button("Reload System"):
-        with st.spinner("Initializing system components... This may take a few minutes."):
-            st.session_state.embedder, st.session_state.response_generator, num_faqs = initialize_components(
-                use_huggingface=use_huggingface,
-                model_name=model_name
-            )
-            st.session_state.system_initialized = True
-            st.sidebar.success(f"System initialized with {num_faqs} FAQs!")
-    # Chat interface
     col1, col2 = st.columns([2, 1])
     with col1:
-        # Display chat history
         st.subheader("Conversation")
         chat_container = st.container(height=400)
         with chat_container:
             for i, message in enumerate(st.session_state.chat_history):
                 if message["role"] == "user":
                     st.markdown(f"**You**: {message['content']}")
                 else:
                     st.markdown(f"**Bot**: {message['content']}")
                 if i < len(st.session_state.chat_history) - 1:
                     st.markdown("---")
-        # Chat input
         with st.form(key="chat_form"):
-            user_query = st.text_input("Type your question:", key="user_input",
-                                       placeholder="e.g., How do I track my order?")
             submit_button = st.form_submit_button("Ask")
     with col2:
         if st.session_state.get("system_initialized", False):
-            # Show FAQ metadata and information
             st.subheader("Retrieved Information")
             info_container = st.container(height=500)
             with info_container:
                 if "current_faqs" in st.session_state:
-                    for i, faq in enumerate(st.session_state.get("current_faqs", [])):
                         st.markdown(f"**Relevant FAQ #{i+1}**")
                         st.markdown(f"**Q**: {faq['question']}")
-                        # Limit answer length to save UI memory
                         st.markdown(f"**A**: {faq['answer'][:150]}..." if len(faq['answer']) > 150 else f"**A**: {faq['answer']}")
                         st.markdown(f"*Similarity Score*: {faq['similarity']:.2f}")
                         if 'category' in faq and faq['category']:
                             st.markdown(f"*Category*: {faq['category']}")
                         st.markdown("---")
                 else:
-                    st.markdown("Ask a question to see relevant FAQs here.")
-    # Performance metrics in the sidebar
     if "retrieval_time" in st.session_state and "generation_time" in st.session_state:
         st.sidebar.subheader("Performance Metrics")
         st.sidebar.markdown(f"Retrieval time: {st.session_state.retrieval_time:.2f} seconds")
         st.sidebar.markdown(f"Response generation: {st.session_state.generation_time:.2f} seconds")
         st.sidebar.markdown(f"Total time: {st.session_state.retrieval_time + st.session_state.generation_time:.2f} seconds")
-    # Process user query
     if submit_button and user_query:
-        # Add user query to chat history
-        st.session_state.chat_history.append({"role": "user", "content": user_query})
-        # Process query
-        with st.spinner("Thinking..."):
-            # Free memory before processing
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
-            # Step 1: Retrieve relevant FAQs
             start_time = time.time()
-            relevant_faqs = st.session_state.embedder.retrieve_relevant_faqs(user_query)
             retrieval_time = time.time() - start_time
-            # Step 2: Generate response
             start_time = time.time()
-            response = st.session_state.response_generator.generate_response(user_query, relevant_faqs)
             generation_time = time.time() - start_time
-            # Store metrics and retrieved FAQs
             st.session_state.retrieval_time = retrieval_time
             st.session_state.generation_time = generation_time
             st.session_state.current_faqs = relevant_faqs
-            # Step 3: Add response to chat history
-            st.session_state.chat_history.append({"role": "assistant", "content": response})
-            # Free memory after processing
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-        # Rerun to display the updated chat history
-        st.experimental_rerun()
-    # Add sample questions at the bottom
     st.subheader("Sample Questions")
     sample_questions = [
         "How do I track my order?",
         "What should I do if my delivery is delayed?",
         "How do I return a product?",
         "Can I cancel my order after placing it?",
-        "How quickly will my order be delivered?",
-        "Why can't I track my order yet?"
     ]
-    # Use two columns instead of three to reduce memory usage
     cols = st.columns(2)
     for i, question in enumerate(sample_questions):
         col_idx = i % 2
         if cols[col_idx].button(question, key=f"sample_{i}"):
-            # Clear the text input and set the sample question
             st.session_state.user_input = question
-            # Simulate form submission
             st.session_state.chat_history.append({"role": "user", "content": question})
-            # Process query (similar to above)
-            with st.spinner("Thinking..."):
-                # Free memory before processing
                 gc.collect()
                 if torch.cuda.is_available():
                     torch.cuda.empty_cache()
-                # Step 1: Retrieve relevant FAQs
                 start_time = time.time()
-                relevant_faqs = st.session_state.embedder.retrieve_relevant_faqs(question)
                 retrieval_time = time.time() - start_time
-                # Step 2: Generate response
                 start_time = time.time()
-                response = st.session_state.response_generator.generate_response(question, relevant_faqs)
                 generation_time = time.time() - start_time
-                # Store metrics and retrieved FAQs
                 st.session_state.retrieval_time = retrieval_time
                 st.session_state.generation_time = generation_time
                 st.session_state.current_faqs = relevant_faqs
-                # Step 3: Add response to chat history
-                st.session_state.chat_history.append({"role": "assistant", "content": response})
-                # Free memory after processing
-                gc.collect()
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-            # Rerun to display the updated chat history
-            st.experimental_rerun()
 if __name__ == "__main__":
     main()

 import os
 import gc
 import torch
+from src.data_processing import load_huggingface_faq_data, load_faq_data, preprocess_faq, augment_faqs
 from src.embedding import FAQEmbedder
 from src.llm_response import ResponseGenerator
+from src.utils import time_function, format_memory_stats, evaluate_response, evaluate_retrieval, baseline_keyword_search
+# Suppress CUDA warning and Torch path errors
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+os.environ["TORCH_NO_PATH_CHECK"] = "1"
+st.set_page_config(page_title="E-Commerce FAQ Chatbot", layout="wide", initial_sidebar_state="expanded")
 @time_function
+def initialize_components(use_huggingface: bool = True, model_name: str = "microsoft/phi-2", enable_augmentation: bool = True):
+    """
+    Initialize RAG system components
+    """
+    try:
+        if use_huggingface:
+            faqs = load_huggingface_faq_data("NebulaByte/E-Commerce_FAQs")
+        else:
+            faqs = load_faq_data("data/faq_data.csv")
+        processed_faqs = augment_faqs(preprocess_faq(faqs), enable_augmentation=enable_augmentation)
+        embedder = FAQEmbedder()
+        if os.path.exists("embeddings"):
+            embedder.load("embeddings")
+        else:
+            embedder.create_embeddings(processed_faqs)
+            embedder.save("embeddings")
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        response_generator = ResponseGenerator(model_name=model_name)
+        response_generator.generate_response("Warmup query", [{"question": "Test", "answer": "Test"}])
+        return embedder, response_generator, len(processed_faqs)
+    except Exception as e:
+        st.error(f"Initialization failed: {e}")
+        raise
 def main():
     st.title("E-Commerce Customer Support FAQ Chatbot")
+    st.subheader("Ask about orders, shipping, returns, or other e-commerce queries")
     st.sidebar.title("Configuration")
     use_huggingface = st.sidebar.checkbox("Use Hugging Face Dataset", value=True)
+    enable_augmentation = st.sidebar.checkbox("Enable FAQ Augmentation", value=True, help="Generate paraphrased questions to expand dataset")
+    target_lang = st.sidebar.selectbox("Language", ["en", "es", "fr"], index=0)
     model_options = {
+        "Phi-2 (Recommended for 16GB RAM)": "microsoft/phi-2",
+        "TinyLlama-1.1B (Fastest)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        "Mistral-7B (For 15GB+ GPU)": "mistralai/Mistral-7B-Instruct-v0.1"
     }
     selected_model = st.sidebar.selectbox("Select LLM Model", list(model_options.keys()), index=0)
     model_name = model_options[selected_model]
     if st.sidebar.checkbox("Show Memory Usage", value=True):
         st.sidebar.subheader("Memory Usage")
+        for key, value in format_memory_stats().items():
             st.sidebar.text(f"{key}: {value}")
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
+    if "query_cache" not in st.session_state:
+        st.session_state.query_cache = {}
+    if "feedback" not in st.session_state:
+        st.session_state.feedback = []
     if "system_initialized" not in st.session_state or st.sidebar.button("Reload System"):
+        with st.spinner("Initializing system..."):
+            try:
+                st.session_state.embedder, st.session_state.response_generator, num_faqs = initialize_components(
+                    use_huggingface=use_huggingface,
+                    model_name=model_name,
+                    enable_augmentation=enable_augmentation
+                )
+                st.session_state.system_initialized = True
+                st.sidebar.success(f"System initialized with {num_faqs} FAQs!")
+            except Exception as e:
+                st.error(f"System initialization failed: {e}")
+                return
     col1, col2 = st.columns([2, 1])
     with col1:
         st.subheader("Conversation")
         chat_container = st.container(height=400)
         with chat_container:
             for i, message in enumerate(st.session_state.chat_history):
                 if message["role"] == "user":
                     st.markdown(f"**You**: {message['content']}")
                 else:
                     st.markdown(f"**Bot**: {message['content']}")
                 if i < len(st.session_state.chat_history) - 1:
                     st.markdown("---")
         with st.form(key="chat_form"):
+            user_query = st.text_input("Type your question:", key="user_input", placeholder="e.g., How do I track my order?")
             submit_button = st.form_submit_button("Ask")
+        if len(st.session_state.chat_history) > 0:
+            with st.form(key=f"feedback_form_{len(st.session_state.chat_history)}"):
+                rating = st.slider("Rate this response (1-5)", 1, 5, key=f"rating_{len(st.session_state.chat_history)}")
+                comments = st.text_area("Comments", key=f"comments_{len(st.session_state.chat_history)}")
+                if st.form_submit_button("Submit Feedback"):
+                    st.session_state.feedback.append({
+                        "rating": rating,
+                        "comments": comments,
+                        "response": st.session_state.chat_history[-1]["content"]
+                    })
+                    with open("feedback.json", "w") as f:
+                        json.dump(st.session_state.feedback, f)
+                    st.success("Feedback submitted!")
     with col2:
         if st.session_state.get("system_initialized", False):
             st.subheader("Retrieved Information")
             info_container = st.container(height=500)
             with info_container:
                 if "current_faqs" in st.session_state:
+                    for i, faq in enumerate(st.session_state.current_faqs):
                         st.markdown(f"**Relevant FAQ #{i+1}**")
                         st.markdown(f"**Q**: {faq['question']}")
                         st.markdown(f"**A**: {faq['answer'][:150]}..." if len(faq['answer']) > 150 else f"**A**: {faq['answer']}")
                         st.markdown(f"*Similarity Score*: {faq['similarity']:.2f}")
                         if 'category' in faq and faq['category']:
                             st.markdown(f"*Category*: {faq['category']}")
                         st.markdown("---")
                 else:
+                    st.markdown("Ask a question to see relevant FAQs.")
     if "retrieval_time" in st.session_state and "generation_time" in st.session_state:
         st.sidebar.subheader("Performance Metrics")
         st.sidebar.markdown(f"Retrieval time: {st.session_state.retrieval_time:.2f} seconds")
         st.sidebar.markdown(f"Response generation: {st.session_state.generation_time:.2f} seconds")
         st.sidebar.markdown(f"Total time: {st.session_state.retrieval_time + st.session_state.generation_time:.2f} seconds")
     if submit_button and user_query:
+        from src.data_processing import translate_faq
+        from googletrans import Translator
+        translator = Translator()
+        if target_lang != "en":
+            user_query_translated = translator.translate(user_query, dest="en").text
+        else:
+            user_query_translated = user_query
+        if user_query_translated in st.session_state.query_cache:
+            response, relevant_faqs = st.session_state.query_cache[user_query_translated]
+        else:
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
             start_time = time.time()
+            relevant_faqs = st.session_state.embedder.retrieve_relevant_faqs(user_query_translated)
             retrieval_time = time.time() - start_time
+            if target_lang != "en":
+                relevant_faqs = [translate_faq(faq, target_lang) for faq in relevant_faqs]
             start_time = time.time()
+            response = st.session_state.response_generator.generate_response(user_query_translated, relevant_faqs)
             generation_time = time.time() - start_time
+            if target_lang != "en":
+                response = translator.translate(response, dest=target_lang).text
+            st.session_state.query_cache[user_query_translated] = (response, relevant_faqs)
             st.session_state.retrieval_time = retrieval_time
             st.session_state.generation_time = generation_time
             st.session_state.current_faqs = relevant_faqs
+        st.session_state.chat_history.append({"role": "user", "content": user_query})
+        st.session_state.chat_history.append({"role": "assistant", "content": response})
+    if st.button("Clear Chat History"):
+        st.session_state.chat_history = []
+        st.session_state.query_cache = {}
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    if st.session_state.get("system_initialized", False):
+        st.sidebar.subheader("Baseline Comparison")
+        baseline_faqs = baseline_keyword_search(user_query_translated if 'user_query_translated' in locals() else "", st.session_state.embedder.faqs)
+        st.sidebar.write(f"RAG FAQs: {[faq['question'][:50] for faq in st.session_state.get('current_faqs', [])]}")
+        st.sidebar.write(f"Keyword FAQs: {[faq['question'][:50] for faq in baseline_faqs]}")
     st.subheader("Sample Questions")
     sample_questions = [
         "How do I track my order?",
         "What should I do if my delivery is delayed?",
         "How do I return a product?",
         "Can I cancel my order after placing it?",
+        "How quickly will my order be delivered?"
     ]
     cols = st.columns(2)
     for i, question in enumerate(sample_questions):
         col_idx = i % 2
         if cols[col_idx].button(question, key=f"sample_{i}"):
             st.session_state.user_input = question
             st.session_state.chat_history.append({"role": "user", "content": question})
+            from src.data_processing import translate_faq
+            from googletrans import Translator
+            translator = Translator()
+            if target_lang != "en":
+                question_translated = translator.translate(question, dest="en").text
+            else:
+                question_translated = question
+            if question_translated in st.session_state.query_cache:
+                response, relevant_faqs = st.session_state.query_cache[question_translated]
+            else:
                 gc.collect()
                 if torch.cuda.is_available():
                     torch.cuda.empty_cache()
                 start_time = time.time()
+                relevant_faqs = st.session_state.embedder.retrieve_relevant_faqs(question_translated)
                 retrieval_time = time.time() - start_time
+                if target_lang != "en":
+                    relevant_faqs = [translate_faq(faq, target_lang) for faq in relevant_faqs]
                 start_time = time.time()
+                response = st.session_state.response_generator.generate_response(question_translated, relevant_faqs)
                 generation_time = time.time() - start_time
+                if target_lang != "en":
+                    response = translator.translate(response, dest=target_lang).text
+                st.session_state.query_cache[question_translated] = (response, relevant_faqs)
                 st.session_state.retrieval_time = retrieval_time
                 st.session_state.generation_time = generation_time
                 st.session_state.current_faqs = relevant_faqs
+            st.session_state.chat_history.append({"role": "assistant", "content": response})
 if __name__ == "__main__":
     main()

data/faq_data.csv CHANGED Viewed

@@ -8,4 +8,9 @@ question,answer
 "How do I cancel my subscription?","To cancel your subscription, log in to your account, go to 'Subscription Settings', and click on 'Cancel Subscription'. Follow the prompts to complete the cancellation process."
 "Is my personal information secure?","Yes, we take data security seriously. We use industry-standard encryption protocols to protect your personal information. We never share your data with third parties without your consent."
 "How do I contact customer support?","You can contact our customer support team via email at support@example.com, through the live chat on our website, or by calling our support line at 1-800-123-4567 during business hours (9 AM - 5 PM EST, Monday-Friday)."
-"Do you offer discounts for bulk orders?","Yes, we offer volume discounts for bulk orders. Please contact our sales team at sales@example.com with your requirements to get a custom quote."

 "How do I cancel my subscription?","To cancel your subscription, log in to your account, go to 'Subscription Settings', and click on 'Cancel Subscription'. Follow the prompts to complete the cancellation process."
 "Is my personal information secure?","Yes, we take data security seriously. We use industry-standard encryption protocols to protect your personal information. We never share your data with third parties without your consent."
 "How do I contact customer support?","You can contact our customer support team via email at support@example.com, through the live chat on our website, or by calling our support line at 1-800-123-4567 during business hours (9 AM - 5 PM EST, Monday-Friday)."
+"Do you offer discounts for bulk orders?","Yes, we offer volume discounts for bulk orders. Please contact our sales team at sales@example.com with your requirements to get a custom quote."
+"Can I change my shipping address?","You can change your shipping address before the order is processed by contacting support at support@example.com."
+"What happens if my order is damaged?","If your order arrives damaged, please contact us within 7 days with photos of the damage to initiate a replacement or refund."
+"How do I apply a discount code?","Enter your discount code at checkout in the 'Promo Code' field. The discount will be applied to eligible items in your cart."
+"What if I receive the wrong item?","If you receive the wrong item, contact support within 7 days with your order number and photos of the item received. We'll arrange a replacement or refund."
+"Do you offer gift cards?","Yes, we offer digital gift cards in various denominations. Purchase them on our website and send them via email to the recipient."

notes.txt DELETED Viewed

@@ -1,16 +0,0 @@
-# Create a virtual environment (recommended)
-python -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
-# Install required packages
-pip install torch transformers sentence-transformers faiss-cpu pandas streamlit nltk
-# Activate your virtual environment if not already active
-source venv/bin/activate  # On Windows: venv\Scripts\activate
-# Run the Streamlit app
-streamlit run app.py

requirements.txt CHANGED Viewed

@@ -3,8 +3,14 @@ transformers>=4.30.0
 sentence-transformers>=2.2.2
 faiss-cpu>=1.7.4
 pandas>=1.5.0
-streamlit>=1.25.0
 numpy>=1.24.0
 datasets>=2.10.0
 bitsandbytes>=0.40.0
-accelerate>=0.20.0

 sentence-transformers>=2.2.2
 faiss-cpu>=1.7.4
 pandas>=1.5.0
+streamlit>=1.36.0
 numpy>=1.24.0
 datasets>=2.10.0
 bitsandbytes>=0.40.0
+accelerate>=0.20.0
+evaluate>=0.4.0
+scikit-learn>=1.2.0
+nlpaug>=1.1.0
+googletrans==4.0.0-rc1
+psutil>=5.9.0
+nltk>=3.8.0

src/__init__.py CHANGED Viewed

	@@ -1,2 +1 @@
1	-
2	- # This file is intentionally left empty to mark the directory as a Python package


1	+ # This file marks the src directory as a Python package

src/data_processing.py CHANGED Viewed

@@ -1,40 +1,56 @@
 import pandas as pd
 import json
 from typing import List, Dict, Any
 from datasets import load_dataset
 def load_huggingface_faq_data(dataset_name: str = "NebulaByte/E-Commerce_FAQs") -> List[Dict[str, Any]]:
     """
-    Load FAQ data from Hugging Face datasets
     """
-    print(f"Loading dataset {dataset_name} from Hugging Face...")
     try:
-        # Load the dataset
         dataset = load_dataset(dataset_name)
-        # Get the train split (as seen in the screenshots)
-        train_data = dataset["train"]
-        # Convert to list of dictionaries
-        faqs = []
-        for item in train_data:
-            # Extract the required fields
-            faq = {
-                "question": item["question"],
-                "answer": item["answer"],
-                # Include additional metadata
-                "category": item.get("category", ""),
-                "question_id": item.get("question_id", ""),
-                "faq_url": item.get("faq_url", "")
-            }
-            faqs.append(faq)
-        print(f"Loaded {len(faqs)} FAQ entries from Hugging Face")
         return faqs
     except Exception as e:
-        print(f"Error loading dataset from Hugging Face: {e}")
         print("Falling back to local data...")
         return load_faq_data("data/faq_data.csv")
@@ -46,48 +62,86 @@ def load_faq_data(file_path: str) -> List[Dict[str, Any]]:
     try:
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
-            # Assume CSV has 'question' and 'answer' columns
             faqs = df.to_dict('records')
         elif file_path.endswith('.json'):
             with open(file_path, 'r') as f:
                 faqs = json.load(f)
         else:
             raise ValueError(f"Unsupported file format: {file_path}")
         print(f"Loaded {len(faqs)} FAQ entries")
         return faqs
     except Exception as e:
         print(f"Error loading data: {e}")
-        # Create a minimal sample dataset as fallback
         print("Creating sample dataset as fallback")
         sample_faqs = [
-            {"question": "How do I track my order?",
-             "answer": "You can track your order by logging into your account and visiting the Order History section."},
-            {"question": "How do I reset my password?",
-             "answer": "To reset your password, click on the 'Forgot Password' link on the login page."}
         ]
         return sample_faqs
 def preprocess_faq(faqs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
-    Preprocess FAQ data: clean text, handle formatting
     """
     processed_faqs = []
     for faq in faqs:
-        # Basic cleaning - remove extra whitespace
-        if 'question' in faq and faq['question'] is not None:
-            faq['question'] = faq['question'].strip()
-        else:
-            faq['question'] = ""
-        if 'answer' in faq and faq['answer'] is not None:
-            faq['answer'] = faq['answer'].strip()
-        else:
-            faq['answer'] = ""
         # Only include FAQs with both question and answer
-        if faq.get('question') and faq.get('answer'):
             processed_faqs.append(faq)
     print(f"After preprocessing: {len(processed_faqs)} valid FAQ entries")
-    return processed_faqs

 import pandas as pd
 import json
+import os
+import nltk
 from typing import List, Dict, Any
 from datasets import load_dataset
+import nlpaug.augmenter.word as naw
+from googletrans import Translator
+# Configure NLTK data path and download required resources
+NLTK_DATA_PATH = os.path.join(os.path.dirname(__file__), "../nltk_data")
+os.makedirs(NLTK_DATA_PATH, exist_ok=True)
+nltk.data.path.append(NLTK_DATA_PATH)
+def ensure_nltk_resources():
+    """
+    Ensure NLTK resources are downloaded and available
+    """
+    try:
+        nltk.download('averaged_perceptron_tagger', download_dir=NLTK_DATA_PATH)
+        nltk.download('punkt', download_dir=NLTK_DATA_PATH)
+        print(f"NLTK resources downloaded to {NLTK_DATA_PATH}")
+        return True
+    except Exception as e:
+        print(f"Failed to download NLTK resources: {e}")
+        return False
 def load_huggingface_faq_data(dataset_name: str = "NebulaByte/E-Commerce_FAQs") -> List[Dict[str, Any]]:
     """
+    Load FAQ data from Hugging Face datasets, cache locally
     """
+    local_path = "data/ecommerce_faqs.json"
+    if os.path.exists(local_path):
+        print(f"Loading cached dataset from {local_path}")
+        with open(local_path, 'r') as f:
+            return json.load(f)
+    print(f"Loading dataset {dataset_name} from Hugging Face...")
     try:
         dataset = load_dataset(dataset_name)
+        faqs = [{
+            "question": item["question"],
+            "answer": item["answer"],
+            "category": item.get("category", ""),
+            "question_id": item.get("question_id", ""),
+            "faq_url": item.get("faq_url", "")
+        } for item in dataset["train"]]
+        with open(local_path, 'w') as f:
+            json.dump(faqs, f)
+        print(f"Saved dataset to {local_path}, loaded {len(faqs)} FAQs")
         return faqs
     except Exception as e:
+        print(f"Error loading dataset: {e}")
         print("Falling back to local data...")
         return load_faq_data("data/faq_data.csv")
     try:
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
             faqs = df.to_dict('records')
         elif file_path.endswith('.json'):
             with open(file_path, 'r') as f:
                 faqs = json.load(f)
         else:
             raise ValueError(f"Unsupported file format: {file_path}")
         print(f"Loaded {len(faqs)} FAQ entries")
         return faqs
     except Exception as e:
         print(f"Error loading data: {e}")
         print("Creating sample dataset as fallback")
         sample_faqs = [
+            {"question": "How do I track my order?", "answer": "You can track your order by logging into your account and visiting the Order History section."},
+            {"question": "How do I reset my password?", "answer": "To reset your password, click on the 'Forgot Password' link on the login page."}
         ]
         return sample_faqs
 def preprocess_faq(faqs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
+    Preprocess FAQ data: clean text, handle formatting, and filter invalid entries
     """
     processed_faqs = []
     for faq in faqs:
+        # Safely handle question and answer fields
+        question = faq.get('question')
+        answer = faq.get('answer')
+        # Convert to string and strip, handling None values
+        question = str(question).strip() if question is not None else ""
+        answer = str(answer).strip() if answer is not None else ""
+        # Update FAQ dictionary
+        faq['question'] = question
+        faq['answer'] = answer
         # Only include FAQs with both question and answer
+        if question and answer:
             processed_faqs.append(faq)
+        else:
+            print(f"Skipping invalid FAQ: question='{question}', answer='{answer}'")
     print(f"After preprocessing: {len(processed_faqs)} valid FAQ entries")
+    return processed_faqs
+def augment_faqs(faqs: List[Dict[str, Any]], max_faqs: int = 1000, enable_augmentation: bool = True) -> List[Dict[str, Any]]:
+    """
+    Augment FAQs with paraphrased questions if enabled
+    """
+    if not enable_augmentation:
+        print("Augmentation disabled; returning original FAQs")
+        return faqs
+    if not ensure_nltk_resources():
+        print("NLTK resources unavailable; skipping augmentation")
+        return faqs
+    aug = naw.SynonymAug()
+    augmented = []
+    for faq in faqs:
+        augmented.append(faq)
+        if len(augmented) < max_faqs:
+            try:
+                aug_question = aug.augment(faq['question'])[0]
+                augmented.append({"question": aug_question, "answer": faq['answer'], "category": faq.get("category", "")})
+            except Exception as e:
+                print(f"Augmentation error for question '{faq['question'][:50]}...': {e}")
+    print(f"Augmented to {len(augmented)} FAQs")
+    return augmented
+def translate_faq(faq: Dict[str, Any], target_lang: str = "es") -> Dict[str, Any]:
+    """
+    Translate FAQ to a target language
+    """
+    try:
+        translator = Translator()
+        translated = faq.copy()
+        translated["question"] = translator.translate(faq["question"], dest=target_lang).text
+        translated["answer"] = translator.translate(faq["answer"], dest=target_lang).text
+        translated["language"] = target_lang
+        return translated
+    except Exception as e:
+        print(f"Translation error: {e}")
+        return faq

src/embedding.py CHANGED Viewed

@@ -4,50 +4,43 @@ import numpy as np
 from typing import List, Dict, Any
 import torch
 import gc
 class FAQEmbedder:
     def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
         """
         Initialize the FAQ embedder with a sentence transformer model
-        Optimized for memory efficiency
         """
-        print(f"Initializing FAQ Embedder with model: {model_name}")
-        # Use CPU for embedding model to save GPU memory for LLM
-        self.device = "cpu"
         self.model = SentenceTransformer(model_name, device=self.device)
         self.index = None
         self.faqs = None
         self.embeddings = None
-    def create_embeddings(self, faqs: List[Dict[str, Any]], batch_size: int = 32) -> None:
         """
         Create embeddings for all FAQs and build FAISS index
-        Using batching for memory efficiency
         """
         self.faqs = faqs
         print(f"Creating embeddings for {len(faqs)} FAQs in batches of {batch_size}...")
-        # Extract questions for embedding
         questions = [faq['question'] for faq in faqs]
-        # Process in batches to reduce memory usage
         all_embeddings = []
         for i in range(0, len(questions), batch_size):
             batch = questions[i:i+batch_size]
             print(f"Processing batch {i//batch_size + 1}/{(len(questions) + batch_size - 1)//batch_size}")
-            # Create embeddings for this batch
             batch_embeddings = self.model.encode(batch, show_progress_bar=False, convert_to_numpy=True)
             all_embeddings.append(batch_embeddings)
-        # Combine all batches
-        self.embeddings = np.vstack(all_embeddings).astype('float32')
-        # Clear memory explicitly
         all_embeddings = None
         gc.collect()
-        # Create FAISS index
         dimension = self.embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dimension)
         self.index.add(self.embeddings)
@@ -62,20 +55,35 @@ class FAQEmbedder:
         if self.index is None or self.faqs is None or self.embeddings is None:
             raise ValueError("Embeddings not created yet. Call create_embeddings first.")
-        # Embed the query
         query_embedding = self.model.encode([query], convert_to_numpy=True).astype('float32')
-        # Search in FAISS
         distances, indices = self.index.search(query_embedding, k)
-        # Get the relevant FAQs with their similarity scores
         relevant_faqs = []
         for i, idx in enumerate(indices[0]):
-            if idx < len(self.faqs):  # Ensure index is valid
                 faq = self.faqs[idx].copy()
-                # Convert L2 distance to similarity score (higher is better)
                 similarity = 1.0 / (1.0 + distances[0][i])
                 faq['similarity'] = similarity
                 relevant_faqs.append(faq)
-        return relevant_faqs

 from typing import List, Dict, Any
 import torch
 import gc
+import os
+import psutil
+import json
 class FAQEmbedder:
     def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
         """
         Initialize the FAQ embedder with a sentence transformer model
         """
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Embedding model using device: {self.device}")
         self.model = SentenceTransformer(model_name, device=self.device)
         self.index = None
         self.faqs = None
         self.embeddings = None
+    def create_embeddings(self, faqs: List[Dict[str, Any]], batch_size: int = None) -> None:
         """
         Create embeddings for all FAQs and build FAISS index
         """
         self.faqs = faqs
+        available_memory = psutil.virtual_memory().available / (1024 ** 3)  # GB
+        batch_size = batch_size or min(64, int(available_memory * 4))
         print(f"Creating embeddings for {len(faqs)} FAQs in batches of {batch_size}...")
         questions = [faq['question'] for faq in faqs]
         all_embeddings = []
         for i in range(0, len(questions), batch_size):
             batch = questions[i:i+batch_size]
             print(f"Processing batch {i//batch_size + 1}/{(len(questions) + batch_size - 1)//batch_size}")
             batch_embeddings = self.model.encode(batch, show_progress_bar=False, convert_to_numpy=True)
             all_embeddings.append(batch_embeddings)
+        self.embeddings = np.vstack(all_embeddings).astype('float32')
         all_embeddings = None
         gc.collect()
         dimension = self.embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dimension)
         self.index.add(self.embeddings)
         if self.index is None or self.faqs is None or self.embeddings is None:
             raise ValueError("Embeddings not created yet. Call create_embeddings first.")
         query_embedding = self.model.encode([query], convert_to_numpy=True).astype('float32')
         distances, indices = self.index.search(query_embedding, k)
         relevant_faqs = []
         for i, idx in enumerate(indices[0]):
+            if idx < len(self.faqs):
                 faq = self.faqs[idx].copy()
                 similarity = 1.0 / (1.0 + distances[0][i])
                 faq['similarity'] = similarity
                 relevant_faqs.append(faq)
+        return relevant_faqs
+    def save(self, path: str):
+        """
+        Save embeddings and FAQs to disk
+        """
+        os.makedirs(path, exist_ok=True)
+        self.model.save(path)
+        faiss.write_index(self.index, f"{path}/index.bin")
+        with open(f"{path}/faqs.json", "w") as f:
+            json.dump(self.faqs, f)
+    def load(self, path: str):
+        """
+        Load embeddings and FAQs from disk
+        """
+        self.model = SentenceTransformer(path)
+        self.index = faiss.read_index(f"{path}/index.bin")
+        with open(f"{path}/faqs.json", "r") as f:
+            self.faqs = json.load(f)
+        self.embeddings = np.array([self.model.encode(faq["question"]) for faq in self.faqs]).astype('float32')

src/llm_response.py CHANGED Viewed

@@ -2,101 +2,76 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 from typing import List, Dict, Any
 import gc
 class ResponseGenerator:
-    def __init__(self, model_name: str = "mistralai/Mistral-7B-Instruct-v0.1"):
         """
         Initialize the response generator with an LLM
-        Optimized for 8-11GB GPU
         """
         print(f"Loading LLM: {model_name}")
-        print("This may take a few minutes...")
-        # Load tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # Configure device and data type based on available resources
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {device}")
-        # Free up memory before loading model
         gc.collect()
-        if device == "cuda":
             torch.cuda.empty_cache()
-        # Configure 4-bit quantization for maximum memory efficiency
         try:
-            # Use 4-bit quantization for models that support it
-            quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_quant_type="nf4"
-            )
-            # Load the model with quantization
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                quantization_config=quantization_config,
-                device_map="auto",
-                torch_dtype=torch.float16,
-                # Load model in parts to avoid OOM errors
-                max_memory={0: "8GiB", "cpu": "16GiB"},
-                offload_folder="offload",
-                offload_state_dict=True,  # Offload weights to CPU when not in use
-                low_cpu_mem_usage=True
-            )
-        except Exception as e:
-            print(f"4-bit quantization error: {e}")
-            print("Falling back to 8-bit quantization...")
-            try:
-                # Try 8-bit quantization
                 quantization_config = BitsAndBytesConfig(
-                    load_in_8bit=True,
-                    bnb_8bit_use_double_quant=True
                 )
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     quantization_config=quantization_config,
                     device_map="auto",
                     torch_dtype=torch.float16,
-                    max_memory={0: "8GiB", "cpu": "16GiB"},
                     offload_folder="offload",
                     low_cpu_mem_usage=True
                 )
-            except Exception as e2:
-                print(f"8-bit quantization error: {e2}")
-                print("Falling back to smaller model...")
-                # Use a much smaller model as fallback
-                backup_model = "microsoft/phi-2"
-                self.tokenizer = AutoTokenizer.from_pretrained(backup_model)
                 self.model = AutoModelForCausalLM.from_pretrained(
-                    backup_model,
-                    device_map="auto",
-                    torch_dtype=torch.float16 if device == "cuda" else torch.float32
                 )
         print("LLM loaded successfully")
     def generate_response(self, query: str, relevant_faqs: List[Dict[str, Any]]) -> str:
         """
         Generate a response using the LLM with retrieved FAQs as context
-        Memory-optimized version
         """
-        # Create prompt with relevant FAQs
         prompt = self._create_prompt(query, relevant_faqs)
-        # Generate response with memory-efficient settings
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         with torch.no_grad():
-            # Use more conservative generation parameters
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=200,  # Shorter response for memory efficiency
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
@@ -104,12 +79,9 @@ class ResponseGenerator:
             )
         response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract just the response part (after the prompt)
         response = response[len(prompt):].strip()
-        # Clear GPU memory after generating response
-        if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return response
@@ -118,19 +90,13 @@ class ResponseGenerator:
         """
         Create a prompt for the LLM with retrieved FAQs as context
         """
-        # Format FAQs in a way that's suitable for the model
-        faq_context = "\n\n".join([
-            f"Q: {faq['question']}\nA: {faq['answer']}"
-            for faq in relevant_faqs
-        ])
-        # Create the prompt
         prompt = f"""
 Below are some relevant e-commerce customer support FAQ entries:
 {faq_context}
-Based on the information above, please provide a helpful, accurate, and concise response to the following customer query:
 Customer Query: {query}
 Response:

 import torch
 from typing import List, Dict, Any
 import gc
+import psutil
 class ResponseGenerator:
+    def __init__(self, model_name: str = "microsoft/phi-2"):
         """
         Initialize the response generator with an LLM
         """
         print(f"Loading LLM: {model_name}")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         gc.collect()
+        if self.device == "cuda":
             torch.cuda.empty_cache()
         try:
+            if self.device == "cuda":
                 quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_compute_dtype=torch.float16,
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_quant_type="nf4"
                 )
+                available_memory = psutil.virtual_memory().total / (1024 ** 3)
+                gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
+                max_memory = {0: f"{min(gpu_memory, 15)}GiB", "cpu": f"{min(available_memory, 30)}GiB"}
+                print(f"Setting max_memory: {max_memory}")
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     quantization_config=quantization_config,
                     device_map="auto",
                     torch_dtype=torch.float16,
+                    max_memory=max_memory,
                     offload_folder="offload",
+                    offload_state_dict=True,
                     low_cpu_mem_usage=True
                 )
+            else:
                 self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    device_map={"": "cpu"},
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True
                 )
+        except Exception as e:
+            print(f"Model loading error: {e}")
+            print("Falling back to TinyLlama...")
+            model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                device_map={"": self.device},
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+            )
         print("LLM loaded successfully")
     def generate_response(self, query: str, relevant_faqs: List[Dict[str, Any]]) -> str:
         """
         Generate a response using the LLM with retrieved FAQs as context
         """
         prompt = self._create_prompt(query, relevant_faqs)
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=150,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
             )
         response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         response = response[len(prompt):].strip()
+        if self.device == "cuda":
             torch.cuda.empty_cache()
         return response
         """
         Create a prompt for the LLM with retrieved FAQs as context
         """
+        faq_context = "\n\n".join([f"Q: {faq['question']}\nA: {faq['answer']}" for faq in relevant_faqs])
         prompt = f"""
 Below are some relevant e-commerce customer support FAQ entries:
 {faq_context}
+Based on the information above, provide a helpful, accurate, and concise response to the following customer query:
 Customer Query: {query}
 Response:

src/utils.py CHANGED Viewed

@@ -1,6 +1,12 @@
 import time
 import functools
-from typing import Callable, Any, Dict
 def time_function(func: Callable) -> Callable:
     """
@@ -17,38 +23,73 @@ def time_function(func: Callable) -> Callable:
 def evaluate_response(generated_response: str, ground_truth: str = None) -> Dict[str, Any]:
     """
-    Basic evaluation of generated response
     """
     results = {
         "length": len(generated_response),
         "word_count": len(generated_response.split())
     }
-    # If ground truth is provided, we could add metrics like BLEU or ROUGE
     if ground_truth:
-        # Simplified evaluation - word overlap
         generated_words = set(generated_response.lower().split())
         ground_truth_words = set(ground_truth.lower().split())
         overlap = len(generated_words.intersection(ground_truth_words))
-        results["word_overlap"] = overlap / len(ground_truth_words) if ground_truth_words else 0
     return results
-def format_memory_stats():
     """
-    Format memory usage statistics for display
     """
-    import torch
-    import psutil
-    import os
-    # System memory
     system_stats = {
         "RAM": f"{psutil.virtual_memory().used / (1024 ** 3):.1f}GB / {psutil.virtual_memory().total / (1024 ** 3):.1f}GB",
-        "RAM Usage": f"{psutil.virtual_memory().percent}%",
     }
-    # GPU memory if available
     if torch.cuda.is_available():
         gpu_stats = {}
         for i in range(torch.cuda.device_count()):

 import time
 import functools
+from typing import Callable, Any, Dict, List
+import torch
+import psutil
+import json
+from evaluate import load
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
 def time_function(func: Callable) -> Callable:
     """
 def evaluate_response(generated_response: str, ground_truth: str = None) -> Dict[str, Any]:
     """
+    Evaluate generated response with BLEU, ROUGE, and word overlap
     """
     results = {
         "length": len(generated_response),
         "word_count": len(generated_response.split())
     }
     if ground_truth:
+        bleu = load("bleu")
+        rouge = load("rouge")
+        bleu_score = bleu.compute(predictions=[generated_response], references=[[ground_truth]])
+        rouge_score = rouge.compute(predictions=[generated_response], references=[ground_truth])
         generated_words = set(generated_response.lower().split())
         ground_truth_words = set(ground_truth.lower().split())
         overlap = len(generated_words.intersection(ground_truth_words))
+        results.update({
+            "bleu": bleu_score["bleu"],
+            "rouge": rouge_score["rougeL"],
+            "word_overlap": overlap / len(ground_truth_words) if ground_truth_words else 0
+        })
     return results
+def evaluate_retrieval(embedder, test_set_path: str, k: int = 3) -> Dict[str, float]:
     """
+    Evaluate retrieval quality with Precision@k and Recall@k
     """
+    with open(test_set_path, 'r') as f:
+        test_set = json.load(f)
+    precision, recall = [], []
+    for item in test_set:
+        query = item['query']
+        true_ids = set(item['relevant_ids'])
+        retrieved_faqs = embedder.retrieve_relevant_faqs(query, k)
+        retrieved_ids = set(range(len(retrieved_faqs)))
+        true_positives = len(true_ids & retrieved_ids)
+        precision.append(true_positives / k if k > 0 else 0)
+        recall.append(true_positives / len(true_ids) if true_ids else 0)
+    return {
+        "Precision@k": sum(precision) / len(precision) if precision else 0,
+        "Recall@k": sum(recall) / len(recall) if recall else 0
+    }
+def baseline_keyword_search(query: str, faqs: List[Dict[str, Any]], k: int = 3) -> List[Dict[str, Any]]:
+    """
+    Keyword-based search baseline using TF-IDF
+    """
+    questions = [faq['question'] for faq in faqs]
+    vectorizer = TfidfVectorizer()
+    question_vectors = vectorizer.fit_transform(questions)
+    query_vector = vectorizer.transform([query])
+    similarities = cosine_similarity(query_vector, question_vectors).flatten()
+    top_k_indices = similarities.argsort()[-k:][::-1]
+    return [faqs[i] for i in top_k_indices]
+def format_memory_stats():
+    """
+    Format memory usage statistics
+    """
     system_stats = {
         "RAM": f"{psutil.virtual_memory().used / (1024 ** 3):.1f}GB / {psutil.virtual_memory().total / (1024 ** 3):.1f}GB",
+        "RAM Usage": f"{psutil.virtual_memory().percent}%"
     }
     if torch.cuda.is_available():
         gpu_stats = {}
         for i in range(torch.cuda.device_count()):

test_set.json ADDED Viewed

	@@ -0,0 +1,7 @@

+[
+    {"query": "How do I track my order?", "relevant_ids": [2]},
+    {"query": "How to reset password?", "relevant_ids": [0]},
+    {"query": "What is the return policy?", "relevant_ids": [3]},
+    {"query": "Can I change my shipping address?", "relevant_ids": [10]},
+    {"query": "Do you offer gift cards?", "relevant_ids": [14]}
+]