Spaces:

MLBench
/

getscenes

Sleeping

App Files Files Community

saim1309 commited on Feb 19

Commit

eb656a6

verified ·

1 Parent(s): 3784225

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -53

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import re
 import os
 from datetime import datetime, timedelta
 import uuid
 from typing import Dict
 from config import (
@@ -17,15 +18,16 @@ from utils import (
     get_embedding, cosine_similarity, find_top_k_matches,
     classify_intent, should_include_email, classify_user_type
 )
-from scraper import scrape_workshops_from_squarespace
 from database import (
     fetch_all_embeddings,
     fetch_row_by_id,
     fetch_all_faq_embeddings,
     get_session_state,
     update_session_state,
-    log_question
 )
 # ============================================================================
 # CONFIGURATION
@@ -37,8 +39,7 @@ if not OPENAI_API_KEY:
 openai.api_key = OPENAI_API_KEY
-# Store session ID for the conversation
-session_id = str(uuid.uuid4())
 # Cache for workshop data and embeddings
 workshop_cache = {
@@ -222,7 +223,7 @@ def generate_enriched_links(row):
     markdown = f"🎧 [Watch {guest_name}'s episode here]({base_url}) - {short_summary}"
     return [markdown]
-def build_enhanced_prompt(user_question, context_results, top_workshops, user_preference=None, user_type='unknown', enriched_podcast_links=None, wants_details=False, current_topic=None, mode="Mode B", is_low_confidence=False):
     """Builds the system prompt with strict formatting rules."""
     # Dynamic Links from Structured Knowledge
@@ -278,11 +279,13 @@ def build_enhanced_prompt(user_question, context_results, top_workshops, user_pr
         # Mandatory Hyperlink Enforcement
         workshop_text = f"We are constantly updating our schedule! You can view and [register for upcoming {label}workshops here]({link})."
-    # Handle missing podcast data strictly
     if not enriched_podcast_links:
-        single_podcast = "Our latest industry insights are available on YouTube: https://www.youtube.com/@GetSceneStudios"
     else:
-        single_podcast = enriched_podcast_links[0]
     # --- EMOTIONAL / SUPPORT MODE CHECK ---
     is_emotional = detect_response_type(user_question) == "support"
@@ -442,6 +445,39 @@ CRITICAL: The user is a BEGINNER. You MUST prioritize the Free Online Class abov
     elif user_type == 'current_student':
         user_type_instruction = "USER TYPE: EXISTING STUDENT. Focus on GSP membership benefits, advanced mentorships (WAM), and specialized recurring workshops."
     if mode == "Mode A":
         # Recommendation Mode: Existing checklist applies
         prompt = f"""{PERSONA_INSTRUCTION}
@@ -462,6 +498,7 @@ CRITICAL INSTRUCTIONS (RECOMMENDATION MODE):
 - For each recommendation, add a tiny bit of "mentor advice" on why it helps.
 - Use ONLY the provided links - do not invent recommendations.
 - **MANDATORY: Use direct hyperlinks.** For ANY mention of signing up, classes, kids programs, the Summit, or the free class, you MUST include the direct [Title](Link) format.
 - **NEVER say "check our website"** or "visit the link below". Embed the link directly into the relevant part of your mentor advice.
 - Focus on clean, readable formatting.{preference_instruction}
@@ -481,7 +518,7 @@ REQUIRED RESPONSE FORMAT (STRICT):
 Here's your path forward:
 1. Free Online Class (Mandatory First Step): {free_class_url}
 2. Recommended Podcast Episode (For Industry Mindset):
-{single_podcast}
 3. Recommended Workshop/Next Step:
 {workshop_text}{email_contact}
@@ -503,7 +540,7 @@ CRITICAL INSTRUCTIONS (FRONT DESK MODE):
 - Answer the user's question directly using the provided information but keep it punchy—**no essays**.
 - **MANDATORY: Provide direct hyperlinks** for ANY mention of registration, classes, kids programs, the Summit, or more information. Use EXACTLY these links as relevant:
     - Free Online Class: [{free_class_url}]({free_class_url})
-    - Recommended for you: {single_podcast}
     - Upcoming Workshops: {workshop_text}
     - Southeast Actor Summit: [Southeast Actor Summit Registration](https://www.getscenestudios.com/southeast-actor-summit)
 - **NEVER say "go to the website"** or "check our site". Always provide the specific hyperlink directly in your answer.
@@ -523,9 +560,9 @@ CRITICAL ROLE GUARD (FINAL AUTHORITY):
 USER'S QUESTION: {user_question}
-REQUIRED RESPONSE FORMAT:
 [Routing Question]
 [Helpful, punchy response with links]
 [Next step guidance]{email_contact}"""
     return prompt
@@ -546,6 +583,7 @@ def detect_question_category(question):
         'pricing': ['price', 'cost', 'pricing', '$', 'money', 'payment', 'fee'],
         'classes': ['class', 'workshop', 'training', 'course', 'learn'],
         'membership': ['membership', 'join', 'member', 'gsp', 'plus'],
         'technical': ['self-tape', 'equipment', 'lighting', 'editing', 'camera']
     }
@@ -726,32 +764,40 @@ def process_question(question: str, current_session_id: str):
             top_faqs.append((score, entry_id, question_text, answer_text))
             top_faqs.sort(reverse=True)
-        faq_threshold = 0.85
-        ambiguous_threshold = 0.65
         is_low_confidence = False  # Default safe initialization
         context_results = None
         if top_faqs and top_faqs[0][0] >= faq_threshold:
             best_score, faq_id, question_text, answer_text = top_faqs[0]
             print(f"DEBUG: Processing FAQ match through LLM and Truth Sheet rules...")
             context_results = answer_text
         elif activated_mode == "Mode A":
             # Mode A: Any score < 0.85 triggers Clarification -> Email
             clarification_count = session_state.get('clarification_count', 0)
-            if clarification_count == 0:
                  update_session_state(current_session_id, increment_clarification=True, increment_count=False)
                  return "I want to make sure I give you the best advice. Are you looking for classes in [Atlanta](https://www.getscenestudios.com/instudio), [Online](https://www.getscenestudios.com/online), or something else like getting an agent? You can also start right now with our [Free Online Class](https://www.getscenestudios.com/online)!"
-            else:
                  update_session_state(current_session_id, reset_clarification=True)
                  return "I'm still not quite sure, and I want to make sure you get the right answer! Please email our team at info@getscenestudios.com and we'll help you directly. In the meantime, you can explore or [register for our Online Path](https://www.getscenestudios.com/online) or [In-Studio classes in Atlanta](https://www.getscenestudios.com/instudio)."
         elif top_faqs and top_faqs[0][0] >= ambiguous_threshold:
-            # Mode B: Ambiguous Score (0.65 - 0.85) -> Ask "Did you mean?"
-            update_session_state(current_session_id, increment_clarification=True, increment_count=False)
-            best_match_q = top_faqs[0][2]
-            return f"Did you mean: {best_match_q}?"
         else:
             # 5. HALLUCINATION GUARD: Check if query is acting-related before blocking
@@ -760,7 +806,7 @@ def process_question(question: str, current_session_id: str):
             has_session_context = (current_topic is not None) or (user_preference is not None)
             FOLLOWUP_KEYWORDS = ['yes', 'no', 'sure', 'okay', 'thanks', 'thank you', 'please', 'go ahead', 'continue', 'more']
-            ACTING_KEYWORDS = ['class', 'workshop', 'coaching', 'studio', 'acting', 'online', 'person', 'atlanta', 'training', 'prefer', 'preference', 'format', 'recommendation', 'online class', 'online workshop','instudio class','instudio workshop', 'actor', 'scene', 'audition', 'theatre', 'film', 'tv', 'commercial', 'agent', 'rep', 'manager', 'instructor', 'role', 'auditing', 'audit', 'representation', 'summit', 'sign up', 'sign-up', 'register', 'enroll', 'schedule', 'cancel', 'reschedule', 'how do i']
             is_acting_related = (
                 len(categories) > 0 or
@@ -786,14 +832,40 @@ def process_question(question: str, current_session_id: str):
         top_workshops = find_top_workshops(user_embedding, k=3)
         top_podcasts = find_top_k_matches(user_embedding, podcast_data, k=3)
         enriched_podcast_links = []
         for _, podcast_id, _ in top_podcasts:
             row = fetch_row_by_id("podcast_episodes", podcast_id)
-            enriched_podcast_links.extend(generate_enriched_links(row))
         if not enriched_podcast_links:
             fallback = fetch_row_by_id("podcast_episodes", podcast_data[0][0])
             enriched_podcast_links = generate_enriched_links(fallback)
         # Brevity & Detail Detection
         wants_details = any(syn in question.lower() for syn in DETAIL_SYNONYMS)
@@ -805,22 +877,22 @@ def process_question(question: str, current_session_id: str):
             top_workshops,
             user_preference=user_preference,
             user_type=user_type,
-            enriched_podcast_links=enriched_podcast_links,
             wants_details=wants_details,
             current_topic=current_topic,
             mode=activated_mode,
-            is_low_confidence=is_low_confidence
         )
-        # Invoke LLM
-        print(f"DEBUG FINAL PROMPT:\n{final_prompt}\n--- END PROMPT ---")
         response = openai.chat.completions.create(
             model=GEN_MODEL,
-            messages=[
-                {"role": "system", "content": final_prompt},
-                {"role": "user", "content": question}
-            ]
         )
         answer_text = response.choices[0].message.content.strip()
@@ -867,21 +939,23 @@ def process_question(question: str, current_session_id: str):
 # GRADIO INTERFACE
 # ============================================================================
-def chat_with_bot(message, history):
     """
     Process message directly without Flask API
     Args:
         message: User's current message
-        history: Chat history (list of message dictionaries)
     Returns:
-        Updated history with new exchange
     """
-    global session_id
     if not message.strip():
-        return history
     try:
         # Process question directly
@@ -889,16 +963,15 @@ def chat_with_bot(message, history):
     except Exception as e:
         bot_reply = f"❌ Error: {str(e)}"
-    # Append to history in Gradio 6.0 format
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": bot_reply})
-    return history
 def reset_session():
     """Reset session ID for new conversation"""
-    global session_id
-    session_id = str(uuid.uuid4())
-    return [] #, f"🔄 New session started: {session_id[:8]}..."
 # Create Gradio interface
 with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
@@ -931,11 +1004,14 @@ with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
         clear_btn = gr.Button("Clear Chat 🗑️", scale=1)
         reset_btn = gr.Button("New Session 🔄", scale=1)
     # Event handlers
     submit_btn.click(
         fn=chat_with_bot,
-        inputs=[msg, chatbot],
-        outputs=[chatbot]
     ).then(
         fn=lambda: "",
         inputs=None,
@@ -944,8 +1020,8 @@ with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
     msg.submit(
         fn=chat_with_bot,
-        inputs=[msg, chatbot],
-        outputs=[chatbot]
     ).then(
         fn=lambda: "",
         inputs=None,
@@ -961,16 +1037,9 @@ with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
     reset_btn.click(
         fn=reset_session,
         inputs=None,
-        outputs=[chatbot]
     )
 # Launch the app
-if __name__ == "__main__":
-    print("\n" + "="*60)
-    print("🎬 Get Scene Studios Chatbot")
-    print("="*60)
-    print("\n✅ No Flask API needed - all processing is done directly!")
-    print("🌐 Gradio interface will open in your browser")
-    print("="*60 + "\n")
     demo.launch()

 import os
 from datetime import datetime, timedelta
 import uuid
+import random
 from typing import Dict
 from config import (
     get_embedding, cosine_similarity, find_top_k_matches,
     classify_intent, should_include_email, classify_user_type
 )
 from database import (
     fetch_all_embeddings,
     fetch_row_by_id,
     fetch_all_faq_embeddings,
     get_session_state,
     update_session_state,
+    log_question,
+    get_recent_history
 )
+from scraper import scrape_workshops_from_squarespace
 # ============================================================================
 # CONFIGURATION
 openai.api_key = OPENAI_API_KEY
+# Removed global session_id for multi-user compatibility
 # Cache for workshop data and embeddings
 workshop_cache = {
     markdown = f"🎧 [Watch {guest_name}'s episode here]({base_url}) - {short_summary}"
     return [markdown]
+def build_enhanced_prompt(user_question, context_results, top_workshops, user_preference=None, user_type='unknown', enriched_podcast_links=None, wants_details=False, current_topic=None, mode="Mode B", is_low_confidence=False, is_faq_match=False):
     """Builds the system prompt with strict formatting rules."""
     # Dynamic Links from Structured Knowledge
         # Mandatory Hyperlink Enforcement
         workshop_text = f"We are constantly updating our schedule! You can view and [register for upcoming {label}workshops here]({link})."
+    # Pass multiple podcast options to the LLM for variety
+    podcast_options = ""
     if not enriched_podcast_links:
+        podcast_options = "Our latest industry insights are available on YouTube: https://www.youtube.com/@GetSceneStudios"
     else:
+        # Provide up to 3 options
+        podcast_options = "\n".join(enriched_podcast_links[:3])
     # --- EMOTIONAL / SUPPORT MODE CHECK ---
     is_emotional = detect_response_type(user_question) == "support"
     elif user_type == 'current_student':
         user_type_instruction = "USER TYPE: EXISTING STUDENT. Focus on GSP membership benefits, advanced mentorships (WAM), and specialized recurring workshops."
+    # --- FAQ MATCH MODE (Highest Priority) ---
+    if is_faq_match:
+        prompt = f"""{PERSONA_INSTRUCTION}
+{truth_sheet_snippet}
+{BUSINESS_RULES_INSTRUCTION}
+{user_type_instruction}
+{context_snippet}{retrieved_info}
+CRITICAL INSTRUCTIONS (FAQ MODE):
+- You are answering a question that has a direct match in our FAQ.
+- Answer the user's question directly and punchily using ONLY the provided information.
+- **DO NOT** use the structured 1. 2. 3. format.
+- **DO NOT** ask a routing question.
+- **MANDATORY: Use direct hyperlinks.** For ANY mention of signing up, classes, kids programs, or the free class, you MUST include the direct [Title](Link) format.
+- Focus on being a helpful guide. {preference_instruction}
+CRITICAL ROLE GUARD (FINAL AUTHORITY):
+- Corey Lawson: Instructor/Actor [NOT an Agent]
+- Jacob Lawson: Agent/Owner [NOT an Instructor]
+- Jesse Malinowski: Founder/Mentor [NOT an Agent]
+- Alex White: Agent [NOT an Instructor/Mentor]
+- THE TRUTH SHEET IS THE ABSOLUTE AUTHORITY.
+USER'S QUESTION: {user_question}
+REQUIRED RESPONSE FORMAT:
+[Punchy, helpful answer based on FAQ with relevant links]{email_contact}"""
+        return prompt
     if mode == "Mode A":
         # Recommendation Mode: Existing checklist applies
         prompt = f"""{PERSONA_INSTRUCTION}
 - For each recommendation, add a tiny bit of "mentor advice" on why it helps.
 - Use ONLY the provided links - do not invent recommendations.
 - **MANDATORY: Use direct hyperlinks.** For ANY mention of signing up, classes, kids programs, the Summit, or the free class, you MUST include the direct [Title](Link) format.
+- **CRITICAL: PRESERVE URLS.** You MUST include the full URL in parentheses `(https://...)`. DO NOT output just the bracketed text `[Title]`. If you fail to include the URL, the link will be broken.
 - **NEVER say "check our website"** or "visit the link below". Embed the link directly into the relevant part of your mentor advice.
 - Focus on clean, readable formatting.{preference_instruction}
 Here's your path forward:
 1. Free Online Class (Mandatory First Step): {free_class_url}
 2. Recommended Podcast Episode (For Industry Mindset):
+{podcast_options}
 3. Recommended Workshop/Next Step:
 {workshop_text}{email_contact}
 - Answer the user's question directly using the provided information but keep it punchy—**no essays**.
 - **MANDATORY: Provide direct hyperlinks** for ANY mention of registration, classes, kids programs, the Summit, or more information. Use EXACTLY these links as relevant:
     - Free Online Class: [{free_class_url}]({free_class_url})
+    - Recommended for you: {podcast_options}
     - Upcoming Workshops: {workshop_text}
     - Southeast Actor Summit: [Southeast Actor Summit Registration](https://www.getscenestudios.com/southeast-actor-summit)
 - **NEVER say "go to the website"** or "check our site". Always provide the specific hyperlink directly in your answer.
 USER'S QUESTION: {user_question}
 [Routing Question]
 [Helpful, punchy response with links]
+**IMPORTANT: You MUST choose the most relevant podcast from the list provided above and include its FULL Markdown link including the URL in your response.**
 [Next step guidance]{email_contact}"""
     return prompt
         'pricing': ['price', 'cost', 'pricing', '$', 'money', 'payment', 'fee'],
         'classes': ['class', 'workshop', 'training', 'course', 'learn'],
         'membership': ['membership', 'join', 'member', 'gsp', 'plus'],
+        'podcast': ['podcast', 'podcasts', 'youtube', 'watch', 'listen', 'episode', 'episodes'],
         'technical': ['self-tape', 'equipment', 'lighting', 'editing', 'camera']
     }
             top_faqs.append((score, entry_id, question_text, answer_text))
             top_faqs.sort(reverse=True)
+        faq_threshold = 0.50
+        ambiguous_threshold = 0.60
         is_low_confidence = False  # Default safe initialization
         context_results = None
+        is_faq_match = False
         if top_faqs and top_faqs[0][0] >= faq_threshold:
             best_score, faq_id, question_text, answer_text = top_faqs[0]
             print(f"DEBUG: Processing FAQ match through LLM and Truth Sheet rules...")
             context_results = answer_text
+            is_faq_match = True
         elif activated_mode == "Mode A":
             # Mode A: Any score < 0.85 triggers Clarification -> Email
+            # EXCEPTION: If they specifically ask for podcasts or recommendations, let it through to LLM path
+            is_recommendation_query = any(k in question.lower() for k in ['podcast', 'reccomend', 'recommend', 'path', 'help', 'advice', 'guide'])
             clarification_count = session_state.get('clarification_count', 0)
+            if clarification_count == 0 and not is_recommendation_query:
                  update_session_state(current_session_id, increment_clarification=True, increment_count=False)
                  return "I want to make sure I give you the best advice. Are you looking for classes in [Atlanta](https://www.getscenestudios.com/instudio), [Online](https://www.getscenestudios.com/online), or something else like getting an agent? You can also start right now with our [Free Online Class](https://www.getscenestudios.com/online)!"
+            elif clarification_count > 0 and not is_recommendation_query:
                  update_session_state(current_session_id, reset_clarification=True)
                  return "I'm still not quite sure, and I want to make sure you get the right answer! Please email our team at info@getscenestudios.com and we'll help you directly. In the meantime, you can explore or [register for our Online Path](https://www.getscenestudios.com/online) or [In-Studio classes in Atlanta](https://www.getscenestudios.com/instudio)."
+            # Else: is_recommendation_query is True, fall through to LLM path
         elif top_faqs and top_faqs[0][0] >= ambiguous_threshold:
+            # Mode B: Ambiguous Score -> Use best FAQ match as context for LLM
+            # Instead of asking "Did you mean?", answer naturally using the FAQ content
+            best_score, faq_id, question_text, answer_text = top_faqs[0]
+            print(f"DEBUG: Ambiguous FAQ match (score={best_score:.2f}), using as LLM context: {question_text[:60]}...")
+            context_results = answer_text
+            is_faq_match = True
         else:
             # 5. HALLUCINATION GUARD: Check if query is acting-related before blocking
             has_session_context = (current_topic is not None) or (user_preference is not None)
             FOLLOWUP_KEYWORDS = ['yes', 'no', 'sure', 'okay', 'thanks', 'thank you', 'please', 'go ahead', 'continue', 'more']
+            ACTING_KEYWORDS = ['class', 'workshop', 'coaching', 'studio', 'acting', 'online', 'person', 'atlanta', 'training', 'prefer', 'preference', 'format', 'recommendation', 'online class', 'online workshop','instudio class','instudio workshop', 'actor', 'scene', 'audition', 'theatre', 'film', 'tv', 'commercial', 'agent', 'rep', 'manager', 'instructor', 'role', 'auditing', 'audit', 'representation', 'summit', 'sign up', 'sign-up', 'register', 'enroll', 'schedule', 'cancel', 'reschedule', 'how do i', 'podcast', 'podcasts', 'youtube', 'episode', 'episodes', 'watch']
             is_acting_related = (
                 len(categories) > 0 or
         top_workshops = find_top_workshops(user_embedding, k=3)
         top_podcasts = find_top_k_matches(user_embedding, podcast_data, k=3)
+        # Get chat history for rotation logic
+        chat_history = get_recent_history(current_session_id, limit=5)
+        history_text = " ".join([m['content'] for m in chat_history]).lower()
         enriched_podcast_links = []
         for _, podcast_id, _ in top_podcasts:
             row = fetch_row_by_id("podcast_episodes", podcast_id)
+            links = generate_enriched_links(row)
+            enriched_podcast_links.extend(links)
         if not enriched_podcast_links:
             fallback = fetch_row_by_id("podcast_episodes", podcast_data[0][0])
             enriched_podcast_links = generate_enriched_links(fallback)
+        # Diversity Logic: Shuffle and prioritize unseen podcasts
+        random.shuffle(enriched_podcast_links)
+        seen_links = []
+        unseen_links = []
+        for link in enriched_podcast_links:
+            # Extract guest name or unique part to check history
+            # e.g. "🎧 [Watch Haillie Ricardo's episode here]..."
+            match = re.search(r'Watch (.*)\'s episode', link)
+            if match:
+                guest_name = match.group(1).lower()
+                if guest_name in history_text:
+                    seen_links.append(link)
+                else:
+                    unseen_links.append(link)
+            else:
+                unseen_links.append(link)
+        # Combine: Unseen first, then seen
+        final_podcast_options = unseen_links + seen_links
         # Brevity & Detail Detection
         wants_details = any(syn in question.lower() for syn in DETAIL_SYNONYMS)
             top_workshops,
             user_preference=user_preference,
             user_type=user_type,
+            enriched_podcast_links=final_podcast_options,
             wants_details=wants_details,
             current_topic=current_topic,
             mode=activated_mode,
+            is_low_confidence=is_low_confidence,
+            is_faq_match=is_faq_match
         )
+        # LLM messages
+        messages = [{"role": "system", "content": final_prompt}]
+        messages.extend(chat_history)
+        messages.append({"role": "user", "content": question})
         response = openai.chat.completions.create(
             model=GEN_MODEL,
+            messages=messages
         )
         answer_text = response.choices[0].message.content.strip()
 # GRADIO INTERFACE
 # ============================================================================
+def chat_with_bot(message, history, session_id):
     """
     Process message directly without Flask API
     Args:
         message: User's current message
+        history: Chat history
+        session_id: Per-user session ID state
     Returns:
+        Updated history and session_id
     """
+    if not session_id:
+        session_id = str(uuid.uuid4())
     if not message.strip():
+        return history, session_id
     try:
         # Process question directly
     except Exception as e:
         bot_reply = f"❌ Error: {str(e)}"
+    # Append to history
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": bot_reply})
+    return history, session_id
 def reset_session():
     """Reset session ID for new conversation"""
+    new_id = str(uuid.uuid4())
+    return [], new_id
 # Create Gradio interface
 with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
         clear_btn = gr.Button("Clear Chat 🗑️", scale=1)
         reset_btn = gr.Button("New Session 🔄", scale=1)
+    # Session state
+    session_state = gr.State("")
     # Event handlers
     submit_btn.click(
         fn=chat_with_bot,
+        inputs=[msg, chatbot, session_state],
+        outputs=[chatbot, session_state]
     ).then(
         fn=lambda: "",
         inputs=None,
     msg.submit(
         fn=chat_with_bot,
+        inputs=[msg, chatbot, session_state],
+        outputs=[chatbot, session_state]
     ).then(
         fn=lambda: "",
         inputs=None,
     reset_btn.click(
         fn=reset_session,
         inputs=None,
+        outputs=[chatbot, session_state]
     )
 # Launch the app
+if __name__ == "__main__":
     demo.launch()