saim1309 commited on
Commit
42db8ec
·
verified ·
1 Parent(s): 9b9e546

Upload 4 files

Browse files
Files changed (4) hide show
  1. admin_app.py +122 -0
  2. config.py +68 -0
  3. database.py +253 -0
  4. utils.py +163 -0
admin_app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ from database import (
5
+ fetch_all_faq_metadata, fetch_all_podcast_metadata,
6
+ add_faq_entry, update_faq_entry, delete_faq_entry,
7
+ bulk_update_faqs, bulk_update_podcasts
8
+ )
9
+ from utils import recalculate_all_embeddings
10
+ from config import OPENAI_API_KEY
11
+
12
+ # Basic Admin Credentials
13
+
14
+ from dotenv import load_dotenv
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ # Basic Admin Credentials (MUST be set in Hugging Face Secrets or .env)
20
+ ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
21
+ ADMIN_PASS = os.environ.get("ADMIN_PASS")
22
+
23
+ if not ADMIN_PASS:
24
+ raise ValueError("CRITICAL SECURITY ERROR: ADMIN_PASS environment variable is not set. Please add it to your Hugging Face Secrets.")
25
+
26
+ def get_faqs():
27
+ data = fetch_all_faq_metadata()
28
+ return pd.DataFrame(data)
29
+
30
+ def get_podcasts():
31
+ data = fetch_all_podcast_metadata()
32
+ return pd.DataFrame(data)
33
+
34
+ def handle_faq_upload(file):
35
+ if file is None:
36
+ return "No file uploaded."
37
+ try:
38
+ df = pd.read_csv(file.name) if file.name.endswith('.csv') else pd.read_excel(file.name)
39
+ bulk_update_faqs(df.to_dict('records'))
40
+ return f"Successfully uploaded {len(df)} FAQs. Don't forget to Sync & Embed!"
41
+ except Exception as e:
42
+ return f"Error: {e}"
43
+
44
+ def handle_podcast_upload(file):
45
+ if file is None:
46
+ return "No file uploaded."
47
+ try:
48
+ df = pd.read_csv(file.name) if file.name.endswith('.csv') else pd.read_excel(file.name)
49
+ bulk_update_podcasts(df.to_dict('records'))
50
+ return f"Successfully uploaded {len(df)} Podcasts. Don't forget to Sync & Embed!"
51
+ except Exception as e:
52
+ return f"Error: {e}"
53
+
54
+ def run_sync():
55
+ try:
56
+ recalculate_all_embeddings()
57
+ return "Sync Complete! All missing embeddings have been generated."
58
+ except Exception as e:
59
+ return f"Sync Failed: {e}"
60
+
61
+ with gr.Blocks(title="Get Scene Admin Dashboard") as demo:
62
+ gr.Markdown("# 🎭 Get Scene Admin Dashboard")
63
+ gr.Markdown("Manage FAQs, Podcasts, and Knowledge Embeddings.")
64
+
65
+ with gr.Tabs():
66
+ # Tab 1: FAQs
67
+ with gr.TabItem("Manage FAQs"):
68
+ with gr.Row():
69
+ faq_df = gr.Dataframe(
70
+ value=get_faqs(),
71
+ headers=["id", "question", "answer"],
72
+ datatype=["number", "str", "str"],
73
+ interactive=True,
74
+ label="FAQ Database"
75
+ )
76
+
77
+ with gr.Row():
78
+ with gr.Column():
79
+ gr.Markdown("### Add New FAQ")
80
+ new_q = gr.Textbox(label="Question")
81
+ new_a = gr.TextArea(label="Answer")
82
+ add_btn = gr.Button("Add Entry", variant="primary")
83
+
84
+ with gr.Column():
85
+ gr.Markdown("### Bulk Upload")
86
+ faq_file = gr.File(label="Upload CSV/Excel (Columns: Question, Answer)")
87
+ upload_faq_btn = gr.Button("Bulk Upload FAQs")
88
+ faq_upload_status = gr.Textbox(label="Status", interactive=False)
89
+
90
+ def add_and_refresh(q, a):
91
+ add_faq_entry(q, a)
92
+ return get_faqs(), "", ""
93
+
94
+ add_btn.click(add_and_refresh, [new_q, new_a], [faq_df, new_q, new_a])
95
+ upload_faq_btn.click(handle_faq_upload, [faq_file], [faq_upload_status])
96
+
97
+ # Tab 2: Podcasts
98
+ with gr.TabItem("Podcasts"):
99
+ pod_df = gr.Dataframe(
100
+ value=get_podcasts(),
101
+ headers=["id", "guest_name", "youtube_url", "summary"],
102
+ datatype=["number", "str", "str", "str"],
103
+ label="Podcast Episodes"
104
+ )
105
+ gr.Markdown("### Bulk Upload Podcasts")
106
+ pod_file = gr.File(label="Upload CSV/Excel (Columns: Guest Name, YouTube URL, Summary)")
107
+ upload_pod_btn = gr.Button("Bulk Upload Podcasts")
108
+ pod_upload_status = gr.Textbox(label="Status", interactive=False)
109
+
110
+ upload_pod_btn.click(handle_podcast_upload, [pod_file], [pod_upload_status])
111
+
112
+ # Tab 3: Sync
113
+ with gr.TabItem("Sync & Embed"):
114
+ gr.Markdown("### Recalculate Embeddings")
115
+ gr.Markdown("When you change text or upload new data, the 'embeddings' (AI understanding) must be recalculated for the chatbot to recognize the new information.")
116
+ sync_btn = gr.Button("🔄 Sync & Recalculate Embeddings", variant="primary", scale=2)
117
+ sync_status = gr.Textbox(label="Sync Status", interactive=False)
118
+
119
+ sync_btn.click(run_sync, None, [sync_status])
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch(auth=(ADMIN_USER, ADMIN_PASS), server_name="0.0.0.0")
config.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import timedelta
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ # API Keys
8
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
+
10
+ # Database
11
+ DB_PATH = "getscene_ai.sqlite"
12
+
13
+ # Models
14
+ EMBED_MODEL = "text-embedding-3-small"
15
+ GEN_MODEL = "gpt-4o"
16
+ FAST_MODEL = "gpt-4o-mini"
17
+
18
+ # Caching
19
+ CACHE_DURATION = timedelta(hours=24)
20
+
21
+ # Keyword Lists
22
+ EMOTIONAL_KEYWORDS = [
23
+ 'stuck', 'frustrated', 'discouraged', 'overwhelmed', 'scared',
24
+ 'nervous', 'anxious', 'worried', 'fear', 'doubt', 'confidence',
25
+ 'insecure', 'lost', 'confused', 'struggling', 'hard time',
26
+ 'giving up', 'burnout', 'rejection', 'failed', 'can\'t',
27
+ 'feeling', 'feel', 'emotional', 'depressed', 'sad', 'unmotivated',
28
+ 'hopeless', 'stressed', 'pressure', 'imposter'
29
+ ]
30
+
31
+ ACTION_KEYWORDS = [
32
+ 'get an agent', 'find agent', 'need agent', 'want agent', 'sign with agent',
33
+ 'more auditions', 'book', 'booking', 'callbacks', 'improve',
34
+ 'better', 'self-tape', 'materials', 'headshots', 'reel',
35
+ 'network', 'connections', 'industry', 'career', 'strategy',
36
+ 'agent prep', 'total agent prep', 'workshop', 'class', 'training',
37
+ 'results', 'success', 'grow', 'advance', 'level up'
38
+ ]
39
+
40
+ POLICY_KEYWORDS = [
41
+ 'refund', 'refunds', 'money back',
42
+ 'attend', 'attendance', 'miss', 'missed', 'missing', 'absent',
43
+ 'late', 'lateness', 'tardy',
44
+ 'reschedule', 'change date', 'move class',
45
+ 'credit', 'credits',
46
+ 'cancel', 'cancellation', 'canceling',
47
+ 'policy', 'policies'
48
+ ]
49
+
50
+ EMAIL_ONLY_KEYWORDS = [
51
+ 'payment', 'pay', 'billing', 'charge', 'refund', 'money back',
52
+ 'attend', 'attendance', 'miss', 'missed', 'late', 'reschedule',
53
+ 'account', 'login', 'password', 'sign in', 'membership'
54
+ ]
55
+
56
+ DETAIL_SYNONYMS = [
57
+ 'detail', 'details', 'explain', 'elaborate', 'tell me more',
58
+ 'more info', 'describe', 'thorough', 'comprehensive'
59
+ ]
60
+
61
+ PERSONA_INSTRUCTION = """
62
+ You are a warm, encouraging mentor at Get Scene Studios. Your goal is to help actors navigate their careers with confidence.
63
+ - User Context: The user is already on getscenestudios.com. Behave as if you are a guide right there with them.
64
+ - Negative Constraint: NEVER use the phrase "Visit the website" or "Check our site". Instead, use "You can see here..." or "Click this link below..." or similar language that implies current presence.
65
+ - Sound natural and human, not scripted or robotic. Use conversational transitions like "I'd suggest starting with..." or "A great way to approach this is..."
66
+ - Be encouraging but practical. Acknowledge that the acting journey is a marathon, not a sprint.
67
+ - Help the user THINK: Instead of just giving an answer, add a brief "mentorship flourish" that explains the value of a recommendation (e.g., "This workshop is great because it gets you comfortable with the pressure of a real callback.")
68
+ """
database.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import json
3
+ from contextlib import contextmanager
4
+ from typing import List, Dict, Any, Tuple
5
+ from config import DB_PATH
6
+
7
+ @contextmanager
8
+ def get_db_connection():
9
+ """Context manager for database connections."""
10
+ conn = sqlite3.connect(DB_PATH)
11
+ conn.row_factory = sqlite3.Row
12
+ try:
13
+ yield conn
14
+ finally:
15
+ conn.close()
16
+
17
+ def fetch_all_embeddings(table: str) -> List[Tuple[int, str, List[float]]]:
18
+ """Fetch all embeddings from a table."""
19
+ with get_db_connection() as conn:
20
+ cur = conn.cursor()
21
+ cur.execute(f"SELECT id, full_text, embedding FROM {table}")
22
+ rows = cur.fetchall()
23
+
24
+ parsed = []
25
+ for row in rows:
26
+ try:
27
+ parsed.append((row['id'], row['full_text'], json.loads(row['embedding'])))
28
+ except (json.JSONDecodeError, TypeError):
29
+ continue
30
+ return parsed
31
+
32
+ def fetch_row_by_id(table: str, row_id: int) -> Dict[str, Any]:
33
+ """Fetch a single row by ID."""
34
+ with get_db_connection() as conn:
35
+ cur = conn.cursor()
36
+ cur.execute(f"SELECT * FROM {table} WHERE id = ?", (row_id,))
37
+ row = cur.fetchone()
38
+ return dict(row) if row else {}
39
+
40
+ def fetch_all_faq_embeddings() -> List[Tuple[int, str, str, List[float]]]:
41
+ """Fetch all FAQ embeddings."""
42
+ with get_db_connection() as conn:
43
+ cur = conn.cursor()
44
+ cur.execute("SELECT id, question, answer, embedding FROM faq_entries")
45
+ rows = cur.fetchall()
46
+
47
+ parsed = []
48
+ for row in rows:
49
+ try:
50
+ parsed.append((row['id'], row['question'], row['answer'], json.loads(row['embedding'])))
51
+ except (json.JSONDecodeError, TypeError):
52
+ continue
53
+ return parsed
54
+
55
+ def log_question(
56
+ question: str,
57
+ session_id: str = None,
58
+ category: str = None,
59
+ answer: str = None,
60
+ detected_mode: str = None,
61
+ routing_question: str = None,
62
+ rule_triggered: str = None,
63
+ link_provided: bool = False
64
+ ):
65
+ """Log a user question to the database with comprehensive observability metadata.
66
+
67
+ Args:
68
+ question: The user's question
69
+ session_id: Session identifier
70
+ category: Question category (e.g., 'faq_match', 'llm_generated', 'policy_violation')
71
+ answer: The bot's response
72
+ detected_mode: Operating mode ('Mode A' or 'Mode B')
73
+ routing_question: The routing question asked (if any)
74
+ rule_triggered: Business rule that was triggered (e.g., 'audit_rule', 'free_class_first')
75
+ link_provided: Whether a direct link was included in the response
76
+ """
77
+ with get_db_connection() as conn:
78
+ cur = conn.cursor()
79
+
80
+ try:
81
+ cur.execute("""
82
+ INSERT INTO question_logs (
83
+ session_id, question, category, answer,
84
+ detected_mode, routing_question, rule_triggered, link_provided
85
+ )
86
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
87
+ """, (
88
+ session_id, question, category, answer,
89
+ detected_mode, routing_question, rule_triggered,
90
+ 1 if link_provided else 0
91
+ ))
92
+ except sqlite3.OperationalError as e:
93
+ # Fallback for older schema versions (shouldn't happen after migration)
94
+ print(f"⚠️ Logging error: {e}. Falling back to basic logging.")
95
+ cur.execute("INSERT INTO question_logs (question) VALUES (?)", (question,))
96
+
97
+ conn.commit()
98
+
99
+ def get_session_state(session_id: str) -> Dict[str, Any]:
100
+ """Get session state from DB"""
101
+ with get_db_connection() as conn:
102
+ cur = conn.cursor()
103
+ cur.execute("SELECT * FROM user_sessions WHERE session_id = ?", (session_id,))
104
+ row = cur.fetchone()
105
+ if row:
106
+ return dict(row)
107
+ return {"preference": None, "msg_count": 0, "clarification_count": 0, "knowledge_context": "{}"}
108
+
109
+ def update_session_state(session_id: str, preference: str = None, increment_count: bool = True, increment_clarification: bool = False, reset_clarification: bool = False, knowledge_update: Dict = None):
110
+ """Update session state with Knowledge Dictionary support"""
111
+ with get_db_connection() as conn:
112
+ cur = conn.cursor()
113
+
114
+ # Check if exists
115
+ cur.execute("SELECT preference, msg_count, clarification_count, knowledge_context FROM user_sessions WHERE session_id = ?", (session_id,))
116
+ row = cur.fetchone()
117
+
118
+ current_knowledge = {}
119
+ if row:
120
+ curr_pref, curr_count, curr_clarification, curr_knowledge_json = row
121
+ try:
122
+ current_knowledge = json.loads(curr_knowledge_json)
123
+ except:
124
+ current_knowledge = {}
125
+
126
+ new_pref = preference if preference else curr_pref
127
+ new_count = curr_count + 1 if increment_count else curr_count
128
+
129
+ # 10-Message Memory Rule: Reset if we hit the limit
130
+ if new_count > 10:
131
+ print(f"🔄 Session {session_id} reached 10 messages. Resetting memory context.")
132
+ new_count = 1
133
+ new_pref = None
134
+ current_knowledge = {}
135
+ new_clarification = 0
136
+ else:
137
+ new_clarification = curr_clarification
138
+ if reset_clarification:
139
+ new_clarification = 0
140
+ elif increment_clarification:
141
+ new_clarification = curr_clarification + 1
142
+
143
+ # Merge knowledge updates
144
+ if knowledge_update:
145
+ current_knowledge.update(knowledge_update)
146
+
147
+ new_knowledge_json = json.dumps(current_knowledge)
148
+
149
+ cur.execute("""
150
+ UPDATE user_sessions
151
+ SET preference = ?, msg_count = ?, clarification_count = ?, knowledge_context = ?, last_updated = CURRENT_TIMESTAMP
152
+ WHERE session_id = ?
153
+ """, (new_pref, new_count, new_clarification, new_knowledge_json, session_id))
154
+ else:
155
+ new_pref = preference
156
+ new_count = 1 if increment_count else 0
157
+ new_clarification = 1 if increment_clarification else 0
158
+
159
+ if knowledge_update:
160
+ current_knowledge.update(knowledge_update)
161
+ new_knowledge_json = json.dumps(current_knowledge)
162
+
163
+ cur.execute("""
164
+ INSERT INTO user_sessions (session_id, preference, msg_count, clarification_count, knowledge_context)
165
+ VALUES (?, ?, ?, ?, ?)
166
+ """, (session_id, new_pref, new_count, new_clarification, new_knowledge_json))
167
+
168
+ conn.commit()
169
+
170
+ def update_faq_entry(faq_id: int, question: str, answer: str):
171
+ """Update an existing FAQ entry."""
172
+ with get_db_connection() as conn:
173
+ cur = conn.cursor()
174
+ cur.execute(
175
+ "UPDATE faq_entries SET question = ?, answer = ?, embedding = NULL WHERE id = ?",
176
+ (question, answer, faq_id)
177
+ )
178
+ conn.commit()
179
+
180
+ def delete_faq_entry(faq_id: int):
181
+ """Delete an FAQ entry."""
182
+ with get_db_connection() as conn:
183
+ cur = conn.cursor()
184
+ cur.execute("DELETE FROM faq_entries WHERE id = ?", (faq_id,))
185
+ conn.commit()
186
+
187
+ def add_faq_entry(question: str, answer: str):
188
+ """Add a new FAQ entry."""
189
+ with get_db_connection() as conn:
190
+ cur = conn.cursor()
191
+ cur.execute(
192
+ "INSERT INTO faq_entries (question, answer) VALUES (?, ?)",
193
+ (question, answer)
194
+ )
195
+ conn.commit()
196
+
197
+ def bulk_update_faqs(entries: List[Dict[str, str]]):
198
+ """Bulk update FAQs from a list of dictionaries."""
199
+ with get_db_connection() as conn:
200
+ cur = conn.cursor()
201
+ for entry in entries:
202
+ question = entry.get('Question') or entry.get('question')
203
+ answer = entry.get('Answer') or entry.get('answer')
204
+ if question and answer:
205
+ cur.execute(
206
+ "INSERT INTO faq_entries (question, answer) VALUES (?, ?)",
207
+ (question, answer)
208
+ )
209
+ conn.commit()
210
+
211
+ def bulk_update_podcasts(entries: List[Dict[str, str]]):
212
+ """Bulk update Podcasts from a list of dictionaries."""
213
+ with get_db_connection() as conn:
214
+ cur = conn.cursor()
215
+ for entry in entries:
216
+ guest = entry.get('Guest Name') or entry.get('guest_name')
217
+ url = entry.get('YouTube URL') or entry.get('youtube_url')
218
+ summary = entry.get('Summary') or entry.get('summary')
219
+ if guest and url and summary:
220
+ # Format full_text as required by the existing logic
221
+ full_text = f"Guest: {guest}. Summary: {summary}"
222
+ # Store summary in highlight_json as a simple list for compatibility
223
+ h_json = json.dumps([{"summary": summary}])
224
+ cur.execute(
225
+ "INSERT INTO podcast_episodes (guest_name, youtube_url, highlight_json, full_text) VALUES (?, ?, ?, ?)",
226
+ (guest, url, h_json, full_text)
227
+ )
228
+ conn.commit()
229
+
230
+ def fetch_all_podcast_metadata() -> List[Dict[str, Any]]:
231
+ """Fetch all podcast metadata for the admin table."""
232
+ with get_db_connection() as conn:
233
+ cur = conn.cursor()
234
+ cur.execute("SELECT id, guest_name, youtube_url, highlight_json FROM podcast_episodes")
235
+ rows = cur.fetchall()
236
+ results = []
237
+ for row in rows:
238
+ d = dict(row)
239
+ # Try to extract plain summary from JSON for the table
240
+ try:
241
+ h = json.loads(d['highlight_json'])
242
+ d['summary'] = h[0]['summary'] if h and isinstance(h, list) else d['highlight_json']
243
+ except:
244
+ d['summary'] = d['highlight_json']
245
+ results.append(d)
246
+ return results
247
+
248
+ def fetch_all_faq_metadata() -> List[Dict[str, Any]]:
249
+ """Fetch all FAQ metadata for the admin table."""
250
+ with get_db_connection() as conn:
251
+ cur = conn.cursor()
252
+ cur.execute("SELECT id, question, answer FROM faq_entries")
253
+ return [dict(row) for row in cur.fetchall()]
utils.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import numpy as np
3
+ import re
4
+ from typing import List, Tuple
5
+ from config import EMBED_MODEL
6
+
7
+ def get_embedding(text: str) -> List[float]:
8
+ """Generate embedding for a given text."""
9
+ text_strip = text.replace("\n", " ").strip()
10
+ response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
11
+ return response.data[0].embedding
12
+
13
+ def cosine_similarity(a: List[float], b: List[float]) -> float:
14
+ """Calculate cosine similarity between two vectors."""
15
+ a = np.array(a)
16
+ b = np.array(b)
17
+ if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
18
+ return 0.0
19
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
20
+
21
+ def clean_time(time_str: str) -> str:
22
+ """Clean up time string."""
23
+ if not time_str:
24
+ return ""
25
+
26
+ time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE)
27
+ if time_match:
28
+ hour = time_match.group(1)
29
+ minute = time_match.group(2) or "00"
30
+ ampm = time_match.group(3).upper()
31
+ return f"{hour}:{minute} {ampm}"
32
+
33
+ return time_str.strip()
34
+
35
+ def find_top_k_matches(user_embedding, dataset, k=3):
36
+ """Find top k matching entries from a dataset."""
37
+ scored = []
38
+ for entry_id, text, emb in dataset:
39
+ score = cosine_similarity(user_embedding, emb)
40
+ scored.append((score, entry_id, text))
41
+ scored.sort(reverse=True)
42
+ return scored[:k]
43
+
44
+ def classify_intent(question: str) -> str:
45
+ """
46
+ Classify the user's intent into:
47
+ Mode A: Recommendation Mode (Workshops, Dates, Availability, Recommendations)
48
+ Mode B: Front Desk Mode (Default - Everything else)
49
+ """
50
+ prompt = f"""Classify the following user question into one of two modes:
51
+ 1. "Mode A - Recommendation Mode": Use this if the user is asking about workshops, specific dates, what's available this month, asking for recommendations, or career goals (like getting an agent).
52
+ 2. "Mode B - Front Desk Mode": Use this for broad introductory questions, kids classes, signing up, summit, instructor roles, auditing, online vs in-studio, general policies, or specific questions about existing classes.
53
+
54
+ User Question: "{question}"
55
+
56
+ Response must be exactly "Mode A" or "Mode B"."""
57
+
58
+ try:
59
+ response = openai.chat.completions.create(
60
+ model="gpt-4o-mini",
61
+ messages=[{"role": "user", "content": prompt}],
62
+ temperature=0,
63
+ max_tokens=5
64
+ )
65
+ prediction = response.choices[0].message.content.strip()
66
+ if "Mode A" in prediction:
67
+ return "Mode A"
68
+ return "Mode B"
69
+ except Exception as e:
70
+ print(f"Error in intent classification: {e}")
71
+ return "Mode B" # Default to Front Desk Mode
72
+
73
+ def should_include_email(question: str) -> bool:
74
+ """
75
+ Determine if the contact email should be shown based on user intent.
76
+ Allowed for: Payments, Refunds, Attendance issues, Account problems.
77
+ """
78
+ from config import EMAIL_ONLY_KEYWORDS
79
+ import re
80
+
81
+ question_lower = question.lower()
82
+ for word in EMAIL_ONLY_KEYWORDS:
83
+ pattern = rf'\b{re.escape(word)}\b'
84
+ if re.search(pattern, question_lower):
85
+ return True
86
+
87
+ return False
88
+
89
+ def classify_user_type(question: str, history: List[dict] = None) -> str:
90
+ """
91
+ Classify the user type into:
92
+ - new_actor
93
+ - experienced_actor
94
+ - parent
95
+ - current_student
96
+ - unknown
97
+ """
98
+ history_str = ""
99
+ if history:
100
+ history_str = "\nConversation context:\n" + "\n".join([f"{m['role']}: {m['content'][:100]}..." for m in history[-3:]])
101
+
102
+ prompt = f"""Classify the user into exactly one of these categories based on their question and context:
103
+ 1. "new_actor": Just starting out, has no experience, or is asking how to begin.
104
+ 2. "experienced_actor": Already has credits, mentions agents, looking for advanced workshops, or refers to their career progress.
105
+ 3. "parent": Asking on behalf of their child, mentions "my kid", "my son", "my daughter", "teens".
106
+ 4. "current_student": Refers to past/current classes at Get Scene, mentions a specific GSP membership, or asks about recurring student workshops.
107
+ 5. "unknown": Not enough information yet.
108
+
109
+ User Question: "{question}"{history_str}
110
+
111
+ Response must be exactly one of: new_actor, experienced_actor, parent, current_student, unknown."""
112
+
113
+ try:
114
+ response = openai.chat.completions.create(
115
+ model="gpt-4o-mini",
116
+ messages=[{"role": "user", "content": prompt}],
117
+ temperature=0,
118
+ max_tokens=10
119
+ )
120
+ prediction = response.choices[0].message.content.strip().lower()
121
+ valid_types = ["new_actor", "experienced_actor", "parent", "current_student", "unknown"]
122
+ for t in valid_types:
123
+ if t in prediction:
124
+ return t
125
+ return "unknown"
126
+ except Exception as e:
127
+ print(f"Error in user type classification: {e}")
128
+ return "unknown"
129
+
130
+ def recalculate_all_embeddings():
131
+ """Recalculate embeddings for all entries in faq_entries and podcast_episodes that are missing embeddings."""
132
+ from database import get_db_connection
133
+ import json
134
+
135
+ with get_db_connection() as conn:
136
+ cur = conn.cursor()
137
+
138
+ # 1. Update FAQs
139
+ print("Starting FAQ embedding recalculation...")
140
+ cur.execute("SELECT id, question FROM faq_entries WHERE embedding IS NULL")
141
+ faqs = cur.fetchall()
142
+ for faq_id, question in faqs:
143
+ try:
144
+ emb = get_embedding(question)
145
+ cur.execute("UPDATE faq_entries SET embedding = ? WHERE id = ?", (json.dumps(emb), faq_id))
146
+ print(f" ✓ Updated FAQ ID {faq_id}")
147
+ except Exception as e:
148
+ print(f" ✗ Error updating FAQ ID {faq_id}: {e}")
149
+
150
+ # 2. Update Podcasts
151
+ print("Starting Podcast embedding recalculation...")
152
+ cur.execute("SELECT id, full_text FROM podcast_episodes WHERE embedding IS NULL")
153
+ podcasts = cur.fetchall()
154
+ for pod_id, full_text in podcasts:
155
+ try:
156
+ emb = get_embedding(full_text)
157
+ cur.execute("UPDATE podcast_episodes SET embedding = ? WHERE id = ?", (json.dumps(emb), pod_id))
158
+ print(f" ✓ Updated Podcast ID {pod_id}")
159
+ except Exception as e:
160
+ print(f" ✗ Error updating Podcast ID {pod_id}: {e}")
161
+
162
+ conn.commit()
163
+ print("Embedding recalculation complete.")