Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- admin_app.py +122 -0
- config.py +68 -0
- database.py +253 -0
- utils.py +163 -0
admin_app.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
from database import (
|
| 5 |
+
fetch_all_faq_metadata, fetch_all_podcast_metadata,
|
| 6 |
+
add_faq_entry, update_faq_entry, delete_faq_entry,
|
| 7 |
+
bulk_update_faqs, bulk_update_podcasts
|
| 8 |
+
)
|
| 9 |
+
from utils import recalculate_all_embeddings
|
| 10 |
+
from config import OPENAI_API_KEY
|
| 11 |
+
|
| 12 |
+
# Basic Admin Credentials
|
| 13 |
+
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
|
| 16 |
+
# Load environment variables
|
| 17 |
+
load_dotenv()
|
| 18 |
+
|
| 19 |
+
# Basic Admin Credentials (MUST be set in Hugging Face Secrets or .env)
|
| 20 |
+
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
|
| 21 |
+
ADMIN_PASS = os.environ.get("ADMIN_PASS")
|
| 22 |
+
|
| 23 |
+
if not ADMIN_PASS:
|
| 24 |
+
raise ValueError("CRITICAL SECURITY ERROR: ADMIN_PASS environment variable is not set. Please add it to your Hugging Face Secrets.")
|
| 25 |
+
|
| 26 |
+
def get_faqs():
|
| 27 |
+
data = fetch_all_faq_metadata()
|
| 28 |
+
return pd.DataFrame(data)
|
| 29 |
+
|
| 30 |
+
def get_podcasts():
|
| 31 |
+
data = fetch_all_podcast_metadata()
|
| 32 |
+
return pd.DataFrame(data)
|
| 33 |
+
|
| 34 |
+
def handle_faq_upload(file):
|
| 35 |
+
if file is None:
|
| 36 |
+
return "No file uploaded."
|
| 37 |
+
try:
|
| 38 |
+
df = pd.read_csv(file.name) if file.name.endswith('.csv') else pd.read_excel(file.name)
|
| 39 |
+
bulk_update_faqs(df.to_dict('records'))
|
| 40 |
+
return f"Successfully uploaded {len(df)} FAQs. Don't forget to Sync & Embed!"
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return f"Error: {e}"
|
| 43 |
+
|
| 44 |
+
def handle_podcast_upload(file):
|
| 45 |
+
if file is None:
|
| 46 |
+
return "No file uploaded."
|
| 47 |
+
try:
|
| 48 |
+
df = pd.read_csv(file.name) if file.name.endswith('.csv') else pd.read_excel(file.name)
|
| 49 |
+
bulk_update_podcasts(df.to_dict('records'))
|
| 50 |
+
return f"Successfully uploaded {len(df)} Podcasts. Don't forget to Sync & Embed!"
|
| 51 |
+
except Exception as e:
|
| 52 |
+
return f"Error: {e}"
|
| 53 |
+
|
| 54 |
+
def run_sync():
|
| 55 |
+
try:
|
| 56 |
+
recalculate_all_embeddings()
|
| 57 |
+
return "Sync Complete! All missing embeddings have been generated."
|
| 58 |
+
except Exception as e:
|
| 59 |
+
return f"Sync Failed: {e}"
|
| 60 |
+
|
| 61 |
+
with gr.Blocks(title="Get Scene Admin Dashboard") as demo:
|
| 62 |
+
gr.Markdown("# 🎭 Get Scene Admin Dashboard")
|
| 63 |
+
gr.Markdown("Manage FAQs, Podcasts, and Knowledge Embeddings.")
|
| 64 |
+
|
| 65 |
+
with gr.Tabs():
|
| 66 |
+
# Tab 1: FAQs
|
| 67 |
+
with gr.TabItem("Manage FAQs"):
|
| 68 |
+
with gr.Row():
|
| 69 |
+
faq_df = gr.Dataframe(
|
| 70 |
+
value=get_faqs(),
|
| 71 |
+
headers=["id", "question", "answer"],
|
| 72 |
+
datatype=["number", "str", "str"],
|
| 73 |
+
interactive=True,
|
| 74 |
+
label="FAQ Database"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
with gr.Row():
|
| 78 |
+
with gr.Column():
|
| 79 |
+
gr.Markdown("### Add New FAQ")
|
| 80 |
+
new_q = gr.Textbox(label="Question")
|
| 81 |
+
new_a = gr.TextArea(label="Answer")
|
| 82 |
+
add_btn = gr.Button("Add Entry", variant="primary")
|
| 83 |
+
|
| 84 |
+
with gr.Column():
|
| 85 |
+
gr.Markdown("### Bulk Upload")
|
| 86 |
+
faq_file = gr.File(label="Upload CSV/Excel (Columns: Question, Answer)")
|
| 87 |
+
upload_faq_btn = gr.Button("Bulk Upload FAQs")
|
| 88 |
+
faq_upload_status = gr.Textbox(label="Status", interactive=False)
|
| 89 |
+
|
| 90 |
+
def add_and_refresh(q, a):
|
| 91 |
+
add_faq_entry(q, a)
|
| 92 |
+
return get_faqs(), "", ""
|
| 93 |
+
|
| 94 |
+
add_btn.click(add_and_refresh, [new_q, new_a], [faq_df, new_q, new_a])
|
| 95 |
+
upload_faq_btn.click(handle_faq_upload, [faq_file], [faq_upload_status])
|
| 96 |
+
|
| 97 |
+
# Tab 2: Podcasts
|
| 98 |
+
with gr.TabItem("Podcasts"):
|
| 99 |
+
pod_df = gr.Dataframe(
|
| 100 |
+
value=get_podcasts(),
|
| 101 |
+
headers=["id", "guest_name", "youtube_url", "summary"],
|
| 102 |
+
datatype=["number", "str", "str", "str"],
|
| 103 |
+
label="Podcast Episodes"
|
| 104 |
+
)
|
| 105 |
+
gr.Markdown("### Bulk Upload Podcasts")
|
| 106 |
+
pod_file = gr.File(label="Upload CSV/Excel (Columns: Guest Name, YouTube URL, Summary)")
|
| 107 |
+
upload_pod_btn = gr.Button("Bulk Upload Podcasts")
|
| 108 |
+
pod_upload_status = gr.Textbox(label="Status", interactive=False)
|
| 109 |
+
|
| 110 |
+
upload_pod_btn.click(handle_podcast_upload, [pod_file], [pod_upload_status])
|
| 111 |
+
|
| 112 |
+
# Tab 3: Sync
|
| 113 |
+
with gr.TabItem("Sync & Embed"):
|
| 114 |
+
gr.Markdown("### Recalculate Embeddings")
|
| 115 |
+
gr.Markdown("When you change text or upload new data, the 'embeddings' (AI understanding) must be recalculated for the chatbot to recognize the new information.")
|
| 116 |
+
sync_btn = gr.Button("🔄 Sync & Recalculate Embeddings", variant="primary", scale=2)
|
| 117 |
+
sync_status = gr.Textbox(label="Sync Status", interactive=False)
|
| 118 |
+
|
| 119 |
+
sync_btn.click(run_sync, None, [sync_status])
|
| 120 |
+
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
demo.launch(auth=(ADMIN_USER, ADMIN_PASS), server_name="0.0.0.0")
|
config.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from datetime import timedelta
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
# API Keys
|
| 8 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 9 |
+
|
| 10 |
+
# Database
|
| 11 |
+
DB_PATH = "getscene_ai.sqlite"
|
| 12 |
+
|
| 13 |
+
# Models
|
| 14 |
+
EMBED_MODEL = "text-embedding-3-small"
|
| 15 |
+
GEN_MODEL = "gpt-4o"
|
| 16 |
+
FAST_MODEL = "gpt-4o-mini"
|
| 17 |
+
|
| 18 |
+
# Caching
|
| 19 |
+
CACHE_DURATION = timedelta(hours=24)
|
| 20 |
+
|
| 21 |
+
# Keyword Lists
|
| 22 |
+
EMOTIONAL_KEYWORDS = [
|
| 23 |
+
'stuck', 'frustrated', 'discouraged', 'overwhelmed', 'scared',
|
| 24 |
+
'nervous', 'anxious', 'worried', 'fear', 'doubt', 'confidence',
|
| 25 |
+
'insecure', 'lost', 'confused', 'struggling', 'hard time',
|
| 26 |
+
'giving up', 'burnout', 'rejection', 'failed', 'can\'t',
|
| 27 |
+
'feeling', 'feel', 'emotional', 'depressed', 'sad', 'unmotivated',
|
| 28 |
+
'hopeless', 'stressed', 'pressure', 'imposter'
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
ACTION_KEYWORDS = [
|
| 32 |
+
'get an agent', 'find agent', 'need agent', 'want agent', 'sign with agent',
|
| 33 |
+
'more auditions', 'book', 'booking', 'callbacks', 'improve',
|
| 34 |
+
'better', 'self-tape', 'materials', 'headshots', 'reel',
|
| 35 |
+
'network', 'connections', 'industry', 'career', 'strategy',
|
| 36 |
+
'agent prep', 'total agent prep', 'workshop', 'class', 'training',
|
| 37 |
+
'results', 'success', 'grow', 'advance', 'level up'
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
POLICY_KEYWORDS = [
|
| 41 |
+
'refund', 'refunds', 'money back',
|
| 42 |
+
'attend', 'attendance', 'miss', 'missed', 'missing', 'absent',
|
| 43 |
+
'late', 'lateness', 'tardy',
|
| 44 |
+
'reschedule', 'change date', 'move class',
|
| 45 |
+
'credit', 'credits',
|
| 46 |
+
'cancel', 'cancellation', 'canceling',
|
| 47 |
+
'policy', 'policies'
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
EMAIL_ONLY_KEYWORDS = [
|
| 51 |
+
'payment', 'pay', 'billing', 'charge', 'refund', 'money back',
|
| 52 |
+
'attend', 'attendance', 'miss', 'missed', 'late', 'reschedule',
|
| 53 |
+
'account', 'login', 'password', 'sign in', 'membership'
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
DETAIL_SYNONYMS = [
|
| 57 |
+
'detail', 'details', 'explain', 'elaborate', 'tell me more',
|
| 58 |
+
'more info', 'describe', 'thorough', 'comprehensive'
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
PERSONA_INSTRUCTION = """
|
| 62 |
+
You are a warm, encouraging mentor at Get Scene Studios. Your goal is to help actors navigate their careers with confidence.
|
| 63 |
+
- User Context: The user is already on getscenestudios.com. Behave as if you are a guide right there with them.
|
| 64 |
+
- Negative Constraint: NEVER use the phrase "Visit the website" or "Check our site". Instead, use "You can see here..." or "Click this link below..." or similar language that implies current presence.
|
| 65 |
+
- Sound natural and human, not scripted or robotic. Use conversational transitions like "I'd suggest starting with..." or "A great way to approach this is..."
|
| 66 |
+
- Be encouraging but practical. Acknowledge that the acting journey is a marathon, not a sprint.
|
| 67 |
+
- Help the user THINK: Instead of just giving an answer, add a brief "mentorship flourish" that explains the value of a recommendation (e.g., "This workshop is great because it gets you comfortable with the pressure of a real callback.")
|
| 68 |
+
"""
|
database.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import json
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
from typing import List, Dict, Any, Tuple
|
| 5 |
+
from config import DB_PATH
|
| 6 |
+
|
| 7 |
+
@contextmanager
|
| 8 |
+
def get_db_connection():
|
| 9 |
+
"""Context manager for database connections."""
|
| 10 |
+
conn = sqlite3.connect(DB_PATH)
|
| 11 |
+
conn.row_factory = sqlite3.Row
|
| 12 |
+
try:
|
| 13 |
+
yield conn
|
| 14 |
+
finally:
|
| 15 |
+
conn.close()
|
| 16 |
+
|
| 17 |
+
def fetch_all_embeddings(table: str) -> List[Tuple[int, str, List[float]]]:
|
| 18 |
+
"""Fetch all embeddings from a table."""
|
| 19 |
+
with get_db_connection() as conn:
|
| 20 |
+
cur = conn.cursor()
|
| 21 |
+
cur.execute(f"SELECT id, full_text, embedding FROM {table}")
|
| 22 |
+
rows = cur.fetchall()
|
| 23 |
+
|
| 24 |
+
parsed = []
|
| 25 |
+
for row in rows:
|
| 26 |
+
try:
|
| 27 |
+
parsed.append((row['id'], row['full_text'], json.loads(row['embedding'])))
|
| 28 |
+
except (json.JSONDecodeError, TypeError):
|
| 29 |
+
continue
|
| 30 |
+
return parsed
|
| 31 |
+
|
| 32 |
+
def fetch_row_by_id(table: str, row_id: int) -> Dict[str, Any]:
|
| 33 |
+
"""Fetch a single row by ID."""
|
| 34 |
+
with get_db_connection() as conn:
|
| 35 |
+
cur = conn.cursor()
|
| 36 |
+
cur.execute(f"SELECT * FROM {table} WHERE id = ?", (row_id,))
|
| 37 |
+
row = cur.fetchone()
|
| 38 |
+
return dict(row) if row else {}
|
| 39 |
+
|
| 40 |
+
def fetch_all_faq_embeddings() -> List[Tuple[int, str, str, List[float]]]:
|
| 41 |
+
"""Fetch all FAQ embeddings."""
|
| 42 |
+
with get_db_connection() as conn:
|
| 43 |
+
cur = conn.cursor()
|
| 44 |
+
cur.execute("SELECT id, question, answer, embedding FROM faq_entries")
|
| 45 |
+
rows = cur.fetchall()
|
| 46 |
+
|
| 47 |
+
parsed = []
|
| 48 |
+
for row in rows:
|
| 49 |
+
try:
|
| 50 |
+
parsed.append((row['id'], row['question'], row['answer'], json.loads(row['embedding'])))
|
| 51 |
+
except (json.JSONDecodeError, TypeError):
|
| 52 |
+
continue
|
| 53 |
+
return parsed
|
| 54 |
+
|
| 55 |
+
def log_question(
|
| 56 |
+
question: str,
|
| 57 |
+
session_id: str = None,
|
| 58 |
+
category: str = None,
|
| 59 |
+
answer: str = None,
|
| 60 |
+
detected_mode: str = None,
|
| 61 |
+
routing_question: str = None,
|
| 62 |
+
rule_triggered: str = None,
|
| 63 |
+
link_provided: bool = False
|
| 64 |
+
):
|
| 65 |
+
"""Log a user question to the database with comprehensive observability metadata.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
question: The user's question
|
| 69 |
+
session_id: Session identifier
|
| 70 |
+
category: Question category (e.g., 'faq_match', 'llm_generated', 'policy_violation')
|
| 71 |
+
answer: The bot's response
|
| 72 |
+
detected_mode: Operating mode ('Mode A' or 'Mode B')
|
| 73 |
+
routing_question: The routing question asked (if any)
|
| 74 |
+
rule_triggered: Business rule that was triggered (e.g., 'audit_rule', 'free_class_first')
|
| 75 |
+
link_provided: Whether a direct link was included in the response
|
| 76 |
+
"""
|
| 77 |
+
with get_db_connection() as conn:
|
| 78 |
+
cur = conn.cursor()
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
cur.execute("""
|
| 82 |
+
INSERT INTO question_logs (
|
| 83 |
+
session_id, question, category, answer,
|
| 84 |
+
detected_mode, routing_question, rule_triggered, link_provided
|
| 85 |
+
)
|
| 86 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
| 87 |
+
""", (
|
| 88 |
+
session_id, question, category, answer,
|
| 89 |
+
detected_mode, routing_question, rule_triggered,
|
| 90 |
+
1 if link_provided else 0
|
| 91 |
+
))
|
| 92 |
+
except sqlite3.OperationalError as e:
|
| 93 |
+
# Fallback for older schema versions (shouldn't happen after migration)
|
| 94 |
+
print(f"⚠️ Logging error: {e}. Falling back to basic logging.")
|
| 95 |
+
cur.execute("INSERT INTO question_logs (question) VALUES (?)", (question,))
|
| 96 |
+
|
| 97 |
+
conn.commit()
|
| 98 |
+
|
| 99 |
+
def get_session_state(session_id: str) -> Dict[str, Any]:
|
| 100 |
+
"""Get session state from DB"""
|
| 101 |
+
with get_db_connection() as conn:
|
| 102 |
+
cur = conn.cursor()
|
| 103 |
+
cur.execute("SELECT * FROM user_sessions WHERE session_id = ?", (session_id,))
|
| 104 |
+
row = cur.fetchone()
|
| 105 |
+
if row:
|
| 106 |
+
return dict(row)
|
| 107 |
+
return {"preference": None, "msg_count": 0, "clarification_count": 0, "knowledge_context": "{}"}
|
| 108 |
+
|
| 109 |
+
def update_session_state(session_id: str, preference: str = None, increment_count: bool = True, increment_clarification: bool = False, reset_clarification: bool = False, knowledge_update: Dict = None):
|
| 110 |
+
"""Update session state with Knowledge Dictionary support"""
|
| 111 |
+
with get_db_connection() as conn:
|
| 112 |
+
cur = conn.cursor()
|
| 113 |
+
|
| 114 |
+
# Check if exists
|
| 115 |
+
cur.execute("SELECT preference, msg_count, clarification_count, knowledge_context FROM user_sessions WHERE session_id = ?", (session_id,))
|
| 116 |
+
row = cur.fetchone()
|
| 117 |
+
|
| 118 |
+
current_knowledge = {}
|
| 119 |
+
if row:
|
| 120 |
+
curr_pref, curr_count, curr_clarification, curr_knowledge_json = row
|
| 121 |
+
try:
|
| 122 |
+
current_knowledge = json.loads(curr_knowledge_json)
|
| 123 |
+
except:
|
| 124 |
+
current_knowledge = {}
|
| 125 |
+
|
| 126 |
+
new_pref = preference if preference else curr_pref
|
| 127 |
+
new_count = curr_count + 1 if increment_count else curr_count
|
| 128 |
+
|
| 129 |
+
# 10-Message Memory Rule: Reset if we hit the limit
|
| 130 |
+
if new_count > 10:
|
| 131 |
+
print(f"🔄 Session {session_id} reached 10 messages. Resetting memory context.")
|
| 132 |
+
new_count = 1
|
| 133 |
+
new_pref = None
|
| 134 |
+
current_knowledge = {}
|
| 135 |
+
new_clarification = 0
|
| 136 |
+
else:
|
| 137 |
+
new_clarification = curr_clarification
|
| 138 |
+
if reset_clarification:
|
| 139 |
+
new_clarification = 0
|
| 140 |
+
elif increment_clarification:
|
| 141 |
+
new_clarification = curr_clarification + 1
|
| 142 |
+
|
| 143 |
+
# Merge knowledge updates
|
| 144 |
+
if knowledge_update:
|
| 145 |
+
current_knowledge.update(knowledge_update)
|
| 146 |
+
|
| 147 |
+
new_knowledge_json = json.dumps(current_knowledge)
|
| 148 |
+
|
| 149 |
+
cur.execute("""
|
| 150 |
+
UPDATE user_sessions
|
| 151 |
+
SET preference = ?, msg_count = ?, clarification_count = ?, knowledge_context = ?, last_updated = CURRENT_TIMESTAMP
|
| 152 |
+
WHERE session_id = ?
|
| 153 |
+
""", (new_pref, new_count, new_clarification, new_knowledge_json, session_id))
|
| 154 |
+
else:
|
| 155 |
+
new_pref = preference
|
| 156 |
+
new_count = 1 if increment_count else 0
|
| 157 |
+
new_clarification = 1 if increment_clarification else 0
|
| 158 |
+
|
| 159 |
+
if knowledge_update:
|
| 160 |
+
current_knowledge.update(knowledge_update)
|
| 161 |
+
new_knowledge_json = json.dumps(current_knowledge)
|
| 162 |
+
|
| 163 |
+
cur.execute("""
|
| 164 |
+
INSERT INTO user_sessions (session_id, preference, msg_count, clarification_count, knowledge_context)
|
| 165 |
+
VALUES (?, ?, ?, ?, ?)
|
| 166 |
+
""", (session_id, new_pref, new_count, new_clarification, new_knowledge_json))
|
| 167 |
+
|
| 168 |
+
conn.commit()
|
| 169 |
+
|
| 170 |
+
def update_faq_entry(faq_id: int, question: str, answer: str):
|
| 171 |
+
"""Update an existing FAQ entry."""
|
| 172 |
+
with get_db_connection() as conn:
|
| 173 |
+
cur = conn.cursor()
|
| 174 |
+
cur.execute(
|
| 175 |
+
"UPDATE faq_entries SET question = ?, answer = ?, embedding = NULL WHERE id = ?",
|
| 176 |
+
(question, answer, faq_id)
|
| 177 |
+
)
|
| 178 |
+
conn.commit()
|
| 179 |
+
|
| 180 |
+
def delete_faq_entry(faq_id: int):
|
| 181 |
+
"""Delete an FAQ entry."""
|
| 182 |
+
with get_db_connection() as conn:
|
| 183 |
+
cur = conn.cursor()
|
| 184 |
+
cur.execute("DELETE FROM faq_entries WHERE id = ?", (faq_id,))
|
| 185 |
+
conn.commit()
|
| 186 |
+
|
| 187 |
+
def add_faq_entry(question: str, answer: str):
|
| 188 |
+
"""Add a new FAQ entry."""
|
| 189 |
+
with get_db_connection() as conn:
|
| 190 |
+
cur = conn.cursor()
|
| 191 |
+
cur.execute(
|
| 192 |
+
"INSERT INTO faq_entries (question, answer) VALUES (?, ?)",
|
| 193 |
+
(question, answer)
|
| 194 |
+
)
|
| 195 |
+
conn.commit()
|
| 196 |
+
|
| 197 |
+
def bulk_update_faqs(entries: List[Dict[str, str]]):
|
| 198 |
+
"""Bulk update FAQs from a list of dictionaries."""
|
| 199 |
+
with get_db_connection() as conn:
|
| 200 |
+
cur = conn.cursor()
|
| 201 |
+
for entry in entries:
|
| 202 |
+
question = entry.get('Question') or entry.get('question')
|
| 203 |
+
answer = entry.get('Answer') or entry.get('answer')
|
| 204 |
+
if question and answer:
|
| 205 |
+
cur.execute(
|
| 206 |
+
"INSERT INTO faq_entries (question, answer) VALUES (?, ?)",
|
| 207 |
+
(question, answer)
|
| 208 |
+
)
|
| 209 |
+
conn.commit()
|
| 210 |
+
|
| 211 |
+
def bulk_update_podcasts(entries: List[Dict[str, str]]):
|
| 212 |
+
"""Bulk update Podcasts from a list of dictionaries."""
|
| 213 |
+
with get_db_connection() as conn:
|
| 214 |
+
cur = conn.cursor()
|
| 215 |
+
for entry in entries:
|
| 216 |
+
guest = entry.get('Guest Name') or entry.get('guest_name')
|
| 217 |
+
url = entry.get('YouTube URL') or entry.get('youtube_url')
|
| 218 |
+
summary = entry.get('Summary') or entry.get('summary')
|
| 219 |
+
if guest and url and summary:
|
| 220 |
+
# Format full_text as required by the existing logic
|
| 221 |
+
full_text = f"Guest: {guest}. Summary: {summary}"
|
| 222 |
+
# Store summary in highlight_json as a simple list for compatibility
|
| 223 |
+
h_json = json.dumps([{"summary": summary}])
|
| 224 |
+
cur.execute(
|
| 225 |
+
"INSERT INTO podcast_episodes (guest_name, youtube_url, highlight_json, full_text) VALUES (?, ?, ?, ?)",
|
| 226 |
+
(guest, url, h_json, full_text)
|
| 227 |
+
)
|
| 228 |
+
conn.commit()
|
| 229 |
+
|
| 230 |
+
def fetch_all_podcast_metadata() -> List[Dict[str, Any]]:
|
| 231 |
+
"""Fetch all podcast metadata for the admin table."""
|
| 232 |
+
with get_db_connection() as conn:
|
| 233 |
+
cur = conn.cursor()
|
| 234 |
+
cur.execute("SELECT id, guest_name, youtube_url, highlight_json FROM podcast_episodes")
|
| 235 |
+
rows = cur.fetchall()
|
| 236 |
+
results = []
|
| 237 |
+
for row in rows:
|
| 238 |
+
d = dict(row)
|
| 239 |
+
# Try to extract plain summary from JSON for the table
|
| 240 |
+
try:
|
| 241 |
+
h = json.loads(d['highlight_json'])
|
| 242 |
+
d['summary'] = h[0]['summary'] if h and isinstance(h, list) else d['highlight_json']
|
| 243 |
+
except:
|
| 244 |
+
d['summary'] = d['highlight_json']
|
| 245 |
+
results.append(d)
|
| 246 |
+
return results
|
| 247 |
+
|
| 248 |
+
def fetch_all_faq_metadata() -> List[Dict[str, Any]]:
|
| 249 |
+
"""Fetch all FAQ metadata for the admin table."""
|
| 250 |
+
with get_db_connection() as conn:
|
| 251 |
+
cur = conn.cursor()
|
| 252 |
+
cur.execute("SELECT id, question, answer FROM faq_entries")
|
| 253 |
+
return [dict(row) for row in cur.fetchall()]
|
utils.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import numpy as np
|
| 3 |
+
import re
|
| 4 |
+
from typing import List, Tuple
|
| 5 |
+
from config import EMBED_MODEL
|
| 6 |
+
|
| 7 |
+
def get_embedding(text: str) -> List[float]:
|
| 8 |
+
"""Generate embedding for a given text."""
|
| 9 |
+
text_strip = text.replace("\n", " ").strip()
|
| 10 |
+
response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
|
| 11 |
+
return response.data[0].embedding
|
| 12 |
+
|
| 13 |
+
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
| 14 |
+
"""Calculate cosine similarity between two vectors."""
|
| 15 |
+
a = np.array(a)
|
| 16 |
+
b = np.array(b)
|
| 17 |
+
if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
|
| 18 |
+
return 0.0
|
| 19 |
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
| 20 |
+
|
| 21 |
+
def clean_time(time_str: str) -> str:
|
| 22 |
+
"""Clean up time string."""
|
| 23 |
+
if not time_str:
|
| 24 |
+
return ""
|
| 25 |
+
|
| 26 |
+
time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE)
|
| 27 |
+
if time_match:
|
| 28 |
+
hour = time_match.group(1)
|
| 29 |
+
minute = time_match.group(2) or "00"
|
| 30 |
+
ampm = time_match.group(3).upper()
|
| 31 |
+
return f"{hour}:{minute} {ampm}"
|
| 32 |
+
|
| 33 |
+
return time_str.strip()
|
| 34 |
+
|
| 35 |
+
def find_top_k_matches(user_embedding, dataset, k=3):
|
| 36 |
+
"""Find top k matching entries from a dataset."""
|
| 37 |
+
scored = []
|
| 38 |
+
for entry_id, text, emb in dataset:
|
| 39 |
+
score = cosine_similarity(user_embedding, emb)
|
| 40 |
+
scored.append((score, entry_id, text))
|
| 41 |
+
scored.sort(reverse=True)
|
| 42 |
+
return scored[:k]
|
| 43 |
+
|
| 44 |
+
def classify_intent(question: str) -> str:
|
| 45 |
+
"""
|
| 46 |
+
Classify the user's intent into:
|
| 47 |
+
Mode A: Recommendation Mode (Workshops, Dates, Availability, Recommendations)
|
| 48 |
+
Mode B: Front Desk Mode (Default - Everything else)
|
| 49 |
+
"""
|
| 50 |
+
prompt = f"""Classify the following user question into one of two modes:
|
| 51 |
+
1. "Mode A - Recommendation Mode": Use this if the user is asking about workshops, specific dates, what's available this month, asking for recommendations, or career goals (like getting an agent).
|
| 52 |
+
2. "Mode B - Front Desk Mode": Use this for broad introductory questions, kids classes, signing up, summit, instructor roles, auditing, online vs in-studio, general policies, or specific questions about existing classes.
|
| 53 |
+
|
| 54 |
+
User Question: "{question}"
|
| 55 |
+
|
| 56 |
+
Response must be exactly "Mode A" or "Mode B"."""
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
response = openai.chat.completions.create(
|
| 60 |
+
model="gpt-4o-mini",
|
| 61 |
+
messages=[{"role": "user", "content": prompt}],
|
| 62 |
+
temperature=0,
|
| 63 |
+
max_tokens=5
|
| 64 |
+
)
|
| 65 |
+
prediction = response.choices[0].message.content.strip()
|
| 66 |
+
if "Mode A" in prediction:
|
| 67 |
+
return "Mode A"
|
| 68 |
+
return "Mode B"
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f"Error in intent classification: {e}")
|
| 71 |
+
return "Mode B" # Default to Front Desk Mode
|
| 72 |
+
|
| 73 |
+
def should_include_email(question: str) -> bool:
|
| 74 |
+
"""
|
| 75 |
+
Determine if the contact email should be shown based on user intent.
|
| 76 |
+
Allowed for: Payments, Refunds, Attendance issues, Account problems.
|
| 77 |
+
"""
|
| 78 |
+
from config import EMAIL_ONLY_KEYWORDS
|
| 79 |
+
import re
|
| 80 |
+
|
| 81 |
+
question_lower = question.lower()
|
| 82 |
+
for word in EMAIL_ONLY_KEYWORDS:
|
| 83 |
+
pattern = rf'\b{re.escape(word)}\b'
|
| 84 |
+
if re.search(pattern, question_lower):
|
| 85 |
+
return True
|
| 86 |
+
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
+
def classify_user_type(question: str, history: List[dict] = None) -> str:
|
| 90 |
+
"""
|
| 91 |
+
Classify the user type into:
|
| 92 |
+
- new_actor
|
| 93 |
+
- experienced_actor
|
| 94 |
+
- parent
|
| 95 |
+
- current_student
|
| 96 |
+
- unknown
|
| 97 |
+
"""
|
| 98 |
+
history_str = ""
|
| 99 |
+
if history:
|
| 100 |
+
history_str = "\nConversation context:\n" + "\n".join([f"{m['role']}: {m['content'][:100]}..." for m in history[-3:]])
|
| 101 |
+
|
| 102 |
+
prompt = f"""Classify the user into exactly one of these categories based on their question and context:
|
| 103 |
+
1. "new_actor": Just starting out, has no experience, or is asking how to begin.
|
| 104 |
+
2. "experienced_actor": Already has credits, mentions agents, looking for advanced workshops, or refers to their career progress.
|
| 105 |
+
3. "parent": Asking on behalf of their child, mentions "my kid", "my son", "my daughter", "teens".
|
| 106 |
+
4. "current_student": Refers to past/current classes at Get Scene, mentions a specific GSP membership, or asks about recurring student workshops.
|
| 107 |
+
5. "unknown": Not enough information yet.
|
| 108 |
+
|
| 109 |
+
User Question: "{question}"{history_str}
|
| 110 |
+
|
| 111 |
+
Response must be exactly one of: new_actor, experienced_actor, parent, current_student, unknown."""
|
| 112 |
+
|
| 113 |
+
try:
|
| 114 |
+
response = openai.chat.completions.create(
|
| 115 |
+
model="gpt-4o-mini",
|
| 116 |
+
messages=[{"role": "user", "content": prompt}],
|
| 117 |
+
temperature=0,
|
| 118 |
+
max_tokens=10
|
| 119 |
+
)
|
| 120 |
+
prediction = response.choices[0].message.content.strip().lower()
|
| 121 |
+
valid_types = ["new_actor", "experienced_actor", "parent", "current_student", "unknown"]
|
| 122 |
+
for t in valid_types:
|
| 123 |
+
if t in prediction:
|
| 124 |
+
return t
|
| 125 |
+
return "unknown"
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"Error in user type classification: {e}")
|
| 128 |
+
return "unknown"
|
| 129 |
+
|
| 130 |
+
def recalculate_all_embeddings():
|
| 131 |
+
"""Recalculate embeddings for all entries in faq_entries and podcast_episodes that are missing embeddings."""
|
| 132 |
+
from database import get_db_connection
|
| 133 |
+
import json
|
| 134 |
+
|
| 135 |
+
with get_db_connection() as conn:
|
| 136 |
+
cur = conn.cursor()
|
| 137 |
+
|
| 138 |
+
# 1. Update FAQs
|
| 139 |
+
print("Starting FAQ embedding recalculation...")
|
| 140 |
+
cur.execute("SELECT id, question FROM faq_entries WHERE embedding IS NULL")
|
| 141 |
+
faqs = cur.fetchall()
|
| 142 |
+
for faq_id, question in faqs:
|
| 143 |
+
try:
|
| 144 |
+
emb = get_embedding(question)
|
| 145 |
+
cur.execute("UPDATE faq_entries SET embedding = ? WHERE id = ?", (json.dumps(emb), faq_id))
|
| 146 |
+
print(f" ✓ Updated FAQ ID {faq_id}")
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f" ✗ Error updating FAQ ID {faq_id}: {e}")
|
| 149 |
+
|
| 150 |
+
# 2. Update Podcasts
|
| 151 |
+
print("Starting Podcast embedding recalculation...")
|
| 152 |
+
cur.execute("SELECT id, full_text FROM podcast_episodes WHERE embedding IS NULL")
|
| 153 |
+
podcasts = cur.fetchall()
|
| 154 |
+
for pod_id, full_text in podcasts:
|
| 155 |
+
try:
|
| 156 |
+
emb = get_embedding(full_text)
|
| 157 |
+
cur.execute("UPDATE podcast_episodes SET embedding = ? WHERE id = ?", (json.dumps(emb), pod_id))
|
| 158 |
+
print(f" ✓ Updated Podcast ID {pod_id}")
|
| 159 |
+
except Exception as e:
|
| 160 |
+
print(f" ✗ Error updating Podcast ID {pod_id}: {e}")
|
| 161 |
+
|
| 162 |
+
conn.commit()
|
| 163 |
+
print("Embedding recalculation complete.")
|