saim1309 commited on
Commit
75944f0
·
verified ·
1 Parent(s): d55ca03

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -840
app.py DELETED
@@ -1,840 +0,0 @@
1
- import gradio as gr
2
- import openai
3
- import json
4
- from datetime import datetime, timedelta
5
- import uuid
6
- from typing import Dict
7
-
8
- from config import OPENAI_API_KEY, DB_PATH, EMBED_MODEL
9
- from utils import get_embedding, cosine_similarity, find_top_k_matches
10
- from scraper import scrape_workshops_from_squarespace
11
- from database import (
12
- fetch_all_embeddings,
13
- fetch_row_by_id,
14
- fetch_all_faq_embeddings,
15
- get_session_state,
16
- update_session_state,
17
- log_question
18
- )
19
-
20
- # ============================================================================
21
- # CONFIGURATION
22
- # ============================================================================
23
-
24
- if not OPENAI_API_KEY:
25
- raise ValueError("OPENAI_API_KEY not found in .env file")
26
-
27
- openai.api_key = OPENAI_API_KEY
28
-
29
-
30
- # Store session ID for the conversation
31
- session_id = str(uuid.uuid4())
32
-
33
- # Cache for workshop data and embeddings
34
- workshop_cache = {
35
- 'data': [],
36
- 'embeddings': [],
37
- 'last_updated': None,
38
- 'cache_duration': timedelta(hours=24)
39
- }
40
-
41
- # ============================================================================
42
- # KEYWORD LISTS FOR ROUTING
43
- # ============================================================================
44
-
45
- EMOTIONAL_KEYWORDS = [
46
- 'stuck', 'frustrated', 'discouraged', 'overwhelmed', 'scared',
47
- 'nervous', 'anxious', 'worried', 'fear', 'doubt', 'confidence',
48
- 'insecure', 'lost', 'confused', 'struggling', 'hard time',
49
- 'giving up', 'burnout', 'rejection', 'failed', 'can\'t',
50
- 'feeling', 'feel', 'emotional', 'depressed', 'sad', 'unmotivated',
51
- 'hopeless', 'stressed', 'pressure', 'imposter'
52
- ]
53
-
54
- ACTION_KEYWORDS = [
55
- 'get an agent', 'find agent', 'need agent', 'want agent', 'sign with agent',
56
- 'more auditions', 'book', 'booking', 'callbacks', 'improve',
57
- 'better', 'self-tape', 'materials', 'headshots', 'reel',
58
- 'network', 'connections', 'industry', 'career', 'strategy',
59
- 'agent prep', 'total agent prep', 'workshop', 'class', 'training',
60
- 'results', 'success', 'grow', 'advance', 'level up'
61
- ]
62
-
63
- POLICY_KEYWORDS = [
64
- 'refund', 'refunds', 'money back',
65
- 'attend', 'attendance', 'miss', 'missed', 'missing', 'absent',
66
- 'late', 'lateness', 'tardy',
67
- 'reschedule', 'change date', 'move class',
68
- 'credit', 'credits',
69
- 'cancel', 'cancellation', 'canceling',
70
- 'policy', 'policies'
71
- ]
72
-
73
- DETAIL_SYNONYMS = [
74
- 'detail', 'details', 'explain', 'elaborate', 'tell me more',
75
- 'more info', 'describe', 'thorough', 'comprehensive'
76
- ]
77
-
78
- PERSONA_INSTRUCTION = """
79
- You are a warm, encouraging mentor at Get Scene Studios. Your goal is to help actors navigate their careers with confidence.
80
- - Sound natural and human, not scripted or robotic. Use conversational transitions like "I'd suggest starting with..." or "A great way to approach this is..."
81
- - Be encouraging but practical. Acknowledge that the acting journey is a marathon, not a sprint.
82
- - Help the user THINK: Instead of just giving an answer, add a brief "mentorship flourish" that explains the value of a recommendation (e.g., "This workshop is great because it gets you comfortable with the pressure of a real callback.")
83
- """
84
-
85
- # ============================================================================
86
- # HELPER FUNCTIONS
87
- # ============================================================================
88
-
89
- def calculate_workshop_confidence(w: Dict) -> float:
90
- """Calculate confidence score of retrieved workshop data"""
91
- score = 0.0
92
- if w.get('title'): score += 0.3
93
- if w.get('instructor_name'): score += 0.3
94
- if w.get('date'): score += 0.2
95
- if w.get('time'): score += 0.1
96
- if w.get('source_url'): score += 0.1
97
- return round(score, 2)
98
-
99
- # ============================================================================
100
- # WORKSHOP FUNCTIONS
101
- # ============================================================================
102
-
103
- def get_current_workshops():
104
- """Get current workshops with caching"""
105
- global workshop_cache
106
-
107
- now = datetime.now()
108
-
109
- # Check if cache is still valid
110
- if (workshop_cache['last_updated'] and
111
- now - workshop_cache['last_updated'] < workshop_cache['cache_duration'] and
112
- workshop_cache['data']):
113
- print("Using cached workshop data")
114
- return workshop_cache['data'], workshop_cache['embeddings']
115
-
116
- print("Fetching fresh workshop data...")
117
-
118
- # Use robust Squarespace scraping system
119
- online_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/online")
120
- instudio_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/instudio")
121
-
122
- all_workshops = online_workshops + instudio_workshops
123
-
124
- # Data Integrity: Validate and score workshops
125
- valid_workshops = []
126
- total_score = 0
127
- for w in all_workshops:
128
- conf = calculate_workshop_confidence(w)
129
- if conf >= 0.8:
130
- valid_workshops.append(w)
131
- total_score += conf
132
- else:
133
- print(f"⚠️ Rejecting weak record (Confidence: {conf}): {w.get('title', 'Unknown')}", flush=True)
134
-
135
- avg_conf = total_score / len(valid_workshops) if valid_workshops else 0
136
- print(f"📊 DATA INTEGRITY: Found {len(all_workshops)} total, {len(valid_workshops)} valid (Confidence >= 0.8)", flush=True)
137
- print(f"📈 Retrieval Confidence: {avg_conf:.2f} (Average)", flush=True)
138
-
139
- all_workshops = valid_workshops
140
-
141
- if not all_workshops:
142
- if workshop_cache['data']:
143
- print("Scraping failed, using cached data")
144
- return workshop_cache['data'], workshop_cache['embeddings']
145
- else:
146
- print("No workshop data available")
147
- return [], []
148
-
149
- # Generate embeddings for workshops
150
- workshop_embeddings = []
151
- for workshop in all_workshops:
152
- try:
153
- embedding = get_embedding(workshop['full_text'])
154
- workshop_embeddings.append(embedding)
155
- except Exception as e:
156
- print(f"Error generating embedding for workshop: {e}")
157
- workshop_embeddings.append([0] * 1536)
158
-
159
- # Update cache
160
- workshop_cache['data'] = all_workshops
161
- workshop_cache['embeddings'] = workshop_embeddings
162
- workshop_cache['last_updated'] = now
163
-
164
- print(f"Cached {len(all_workshops)} workshops")
165
- return all_workshops, workshop_embeddings
166
-
167
- def find_top_workshops(user_embedding, k=3):
168
- """Find top matching workshops using real-time data"""
169
- workshops, workshop_embeddings = get_current_workshops()
170
-
171
- if not workshops:
172
- return []
173
-
174
- scored = []
175
- for i, (workshop, emb) in enumerate(zip(workshops, workshop_embeddings)):
176
- try:
177
- score = cosine_similarity(user_embedding, emb)
178
- scored.append((score, i, workshop['full_text'], workshop))
179
- except Exception as e:
180
- print(f"Error calculating similarity: {e}")
181
- continue
182
-
183
- scored.sort(reverse=True)
184
- return scored[:k]
185
-
186
- # ============================================================================
187
- # PROMPT BUILDING FUNCTIONS
188
- # ============================================================================
189
-
190
- def generate_enriched_links(row):
191
- base_url = row.get("youtube_url")
192
- guest_name = row.get("guest_name", "")
193
- highlights = json.loads(row.get("highlight_json", "[]"))
194
- summary = highlights[0]["summary"] if highlights else ""
195
-
196
- # Truncate summary to first sentence only
197
- if summary:
198
- first_sentence = summary.split('.')[0] + '.'
199
-
200
- if len(first_sentence) > 120:
201
- short_summary = first_sentence[:117] + "..."
202
- else:
203
- short_summary = first_sentence
204
- else:
205
- short_summary = "Industry insights for actors"
206
-
207
- markdown = f"🎧 [Watch {guest_name}'s episode here]({base_url}) - {short_summary}"
208
- return [markdown]
209
-
210
- def build_enhanced_prompt(user_question, context_results, top_workshops, user_preference=None, enriched_podcast_links=None, wants_details=False, current_topic=None):
211
- """Builds the system prompt with strict formatting rules."""
212
-
213
- # Free classes are ONLY available online (never in-studio)
214
- free_class_url = "https://www.getscenestudios.com/online"
215
-
216
- # helper for clean links
217
- def format_workshop(w):
218
- if not w.get('title') or not w.get('instructor_name') or not w.get('date'):
219
- return None
220
-
221
- link = "https://www.getscenestudios.com/instudio" if "/instudio" in w.get('source_url', '') else "https://www.getscenestudios.com/online"
222
-
223
- # User Preference Filtering
224
- w_type = "Online" if "online" in w.get('source_url', '') else "In-Studio"
225
- if user_preference:
226
- if user_preference.lower() != w_type.lower():
227
- return None
228
-
229
- # Calculate confidence using logic (already present in HF app.py at line 89)
230
- confidence = calculate_workshop_confidence(w)
231
- if confidence < 0.70:
232
- return None
233
-
234
- # R2: Force format inclusion into the title link for robustness
235
- display_title = f"{w['title']} ({w_type})"
236
- return f"- [{display_title}]({link}) with {w['instructor_name']} on {w['date']} at {w.get('time', '')}"
237
-
238
- # Prepare workshop list (Top 3 max to display, but check top 10 for better filtering)
239
- workshop_lines = []
240
- if top_workshops:
241
- for _, _, _, w_data in top_workshops[:10]: # Check top 10, take top 3 valid after filtering
242
- formatted = format_workshop(w_data)
243
- if formatted:
244
- workshop_lines.append(formatted)
245
-
246
-
247
- workshop_text = ""
248
- if workshop_lines:
249
- workshop_text = "\n".join(workshop_lines[:3])
250
- else:
251
- # Improved fallback to avoid generic/placeholder-like feeling
252
- label = f"{user_preference.capitalize()} " if user_preference else ""
253
- link = "https://www.getscenestudios.com/online" if user_preference == 'online' else "https://www.getscenestudios.com/instudio" if user_preference == 'instudio' else "https://www.getscenestudios.com/online"
254
- workshop_text = f"We are constantly updating our schedule! Check our current {label}availability and latest workshops at {link}"
255
-
256
- # Handle missing podcast data strictly
257
- if not enriched_podcast_links:
258
- single_podcast = "Our latest industry insights are available on YouTube: https://www.youtube.com/@GetSceneStudios"
259
- else:
260
- single_podcast = enriched_podcast_links[0]
261
-
262
- # --- EMOTIONAL / SUPPORT MODE CHECK ---
263
- is_emotional = detect_response_type(user_question) == "support"
264
-
265
- if is_emotional:
266
- prompt = f"""{PERSONA_INSTRUCTION}
267
-
268
- You are acting in SUPPORT MODE.
269
-
270
- CRITICAL INSTRUCTIONS:
271
- 1. ACKNOWLEDGE their feelings first (e.g., "I hear how frustrating it is to feel stuck...").
272
- 2. Provide SUPPORTIVE language (2-3 sentences max).
273
- 3. Offer EXACTLY ONE gentle follow-up resource: either the podcast OR the free class.
274
- 4. DO NOT suggest paid workshops or upsell in this response.
275
- 5. KEEP IT BRIEF (≤150 words).
276
-
277
- USER'S QUESTION: {user_question}
278
-
279
- REQUIRED RESPONSE FORMAT:
280
- [Your empathetic, supportive acknowledgment]
281
-
282
- Here's a free resource that might help you move forward:
283
- [Pick ONE: {single_podcast} OR Free Class at {free_class_url}]
284
-
285
- Questions? Contact info@getscenestudios.com"""
286
- return prompt
287
-
288
- # --- STANDARD LOGIC FOR CONTEXT SNIPPET ---
289
- question_lower = user_question.lower()
290
- context_snippet = ""
291
-
292
- # Priority 1: Direct Keywords in current question
293
- detected_topic = None
294
- if any(word in question_lower for word in ['agent', 'representation', 'rep', 'manager']):
295
- detected_topic = 'agent'
296
- elif any(word in question_lower for word in ['beginner', 'new', 'start', 'beginning']):
297
- detected_topic = 'beginner'
298
- elif any(word in question_lower for word in ['callback', 'audition', 'tape', 'self-tape', 'booking']):
299
- detected_topic = 'audition'
300
- elif any(word in question_lower for word in ['mentorship', 'coaching']):
301
- detected_topic = 'mentorship'
302
- elif any(word in question_lower for word in ['price', 'cost', 'how much']):
303
- detected_topic = 'pricing'
304
- elif any(word in question_lower for word in ['class', 'workshop', 'training', 'learn']):
305
- detected_topic = 'classes'
306
- elif any(word in question_lower for word in ['membership', 'gsp', 'plus']):
307
- detected_topic = 'membership'
308
-
309
- # Priority 2: Fallback to session context if current question is ambiguous
310
- if not detected_topic and current_topic:
311
- topic_map = {
312
- 'agent_seeking': 'agent',
313
- 'beginner': 'beginner',
314
- 'audition_help': 'audition',
315
- 'mentorship': 'mentorship',
316
- 'pricing': 'pricing',
317
- 'classes': 'classes',
318
- 'membership': 'membership'
319
- }
320
- detected_topic = topic_map.get(current_topic)
321
-
322
- # Assign snippet based on topic
323
- if detected_topic == 'agent':
324
- context_snippet = "Get Scene Studios has helped 1000+ actors land representation. Total Agent Prep offers live practice with working agents (age 16+, limited to 12 actors)."
325
- elif detected_topic == 'beginner':
326
- context_snippet = "Get Scene Studios specializes in getting actors audition-ready fast with camera technique and professional self-tape skills."
327
- elif detected_topic == 'audition':
328
- context_snippet = "Get Scene offers Crush the Callback (Zoom simulation) and Perfect Submission (self-tape mastery) for actors refining their technique."
329
- elif detected_topic == 'mentorship':
330
- context_snippet = "Working Actor Mentorship is a 6-month program ($3,000) with structured feedback and industry access."
331
- elif detected_topic == 'pricing':
332
- context_snippet = "Get Scene Studios pricing varies by program. Most workshops cap at 12-14 actors for personalized feedback."
333
- elif detected_topic == 'classes':
334
- link = "https://www.getscenestudios.com/online" if user_preference == 'online' else "https://www.getscenestudios.com/instudio"
335
- context_snippet = f"Get Scene Studios offers world-class {user_preference or ''} acting workshops. Our sessions focus on camera technique and industry readiness. Full details at {link}."
336
- elif detected_topic == 'membership':
337
- context_snippet = "Get Scene Plus (GSP) is our membership program that provides ongoing access to industry pros and audition insights."
338
- else:
339
- context_snippet = "Get Scene Studios (founded by Jesse Malinowski) offers training for TV/film actors at all levels."
340
-
341
- preference_instruction = ""
342
- if not user_preference:
343
- preference_instruction = """
344
- IMPORTANT: We need to know if the user prefers "Online" or "In-Studio" workshops.
345
- If their question implies a location or they haven't specified, ask: "Are you looking for Online or In-Studio training?" as part of your response.
346
- """
347
- else:
348
- preference_instruction = f"""
349
- USER PREFERENCE KNOWN: {user_preference.upper()}
350
- 1. DO NOT ask "Online or In-Studio" again.
351
- 2. Ensure your recommendations align with {user_preference.upper()} where possible.
352
- """
353
-
354
- # Brevity & Cognitive Load: Direct instructions based on user intent
355
- detail_instruction = "Answer the user's question briefly (2-3 sentences max, ≤150 words total)."
356
- if wants_details:
357
- target = f" regarding {detected_topic or 'the current recommendations'}"
358
- detail_instruction = f"Provide a detailed and thorough explanation for the user's request{target}. Focus on being helpful and providing deep value as a mentor."
359
-
360
- prompt = f"""{PERSONA_INSTRUCTION}
361
-
362
- {context_snippet}
363
-
364
- CRITICAL INSTRUCTIONS:
365
- - {detail_instruction}
366
- - Use natural, human transitions between your answer and the recommendations.
367
- - For each recommendation, add a tiny bit of "mentor advice" on why it helps.
368
- - Then ALWAYS provide exactly these three numbered recommendations (1. 2. 3.):
369
- - Use ONLY the provided links - do not invent recommendations
370
- - Every workshop Title MUST be followed by its format in parentheses, e.g., "Workshop Name (Online)" or "Workshop Name (In-Studio)".
371
- - Focus on clean, readable formatting.{preference_instruction}
372
-
373
- USER'S QUESTION: {user_question}
374
-
375
- REQUIRED RESPONSE FORMAT:
376
- [Your brief answer to their question, ≤150 words total]
377
-
378
- Here's your path forward:
379
- 1. Free class (start here, no credit card required): {free_class_url}
380
- 2. Recommended podcast episode:
381
- {single_podcast}
382
- 3. Relevant paid workshop:
383
- {workshop_text}
384
-
385
- Questions? Contact info@getscenestudios.com"""
386
-
387
- return prompt
388
-
389
- # ============================================================================
390
- # DETECTION FUNCTIONS
391
- # ============================================================================
392
-
393
- def detect_question_category(question):
394
- """Categorize user questions for better context injection"""
395
- question_lower = question.lower()
396
-
397
- categories = {
398
- 'agent_seeking': ['agent', 'representation', 'rep', 'manager', 'get an agent'],
399
- 'beginner': ['beginner', 'new', 'start', 'beginning', 'first time', 'never acted'],
400
- 'audition_help': ['audition', 'callback', 'tape', 'self-tape', 'submission'],
401
- 'mentorship': ['mentorship', 'coaching', 'intensive', 'mentor', 'one-on-one'],
402
- 'pricing': ['price', 'cost', 'pricing', '$', 'money', 'payment', 'fee'],
403
- 'classes': ['class', 'workshop', 'training', 'course', 'learn'],
404
- 'membership': ['membership', 'join', 'member', 'gsp', 'plus'],
405
- 'technical': ['self-tape', 'equipment', 'lighting', 'editing', 'camera']
406
- }
407
-
408
- detected = []
409
- for category, keywords in categories.items():
410
- if any(keyword in question_lower for keyword in keywords):
411
- detected.append(category)
412
-
413
- return detected
414
-
415
- def detect_response_type(question):
416
- """Detect if question is emotional/support vs action/results oriented"""
417
- question_lower = question.lower()
418
-
419
- emotional_count = sum(1 for word in EMOTIONAL_KEYWORDS if word in question_lower)
420
- action_count = sum(1 for word in ACTION_KEYWORDS if word in question_lower)
421
-
422
- if emotional_count > 0 and emotional_count >= action_count:
423
- return "support"
424
- return "standard"
425
-
426
- def detect_policy_issue(question):
427
- """Detect if question violates hard policy rules (refunds, attendance, etc.) using word boundaries"""
428
- import re
429
- question_lower = question.lower()
430
- for word in POLICY_KEYWORDS:
431
- # User regex word boundaries to prevent substring matches (e.g., 'submission' matching 'miss')
432
- pattern = rf'\b{re.escape(word)}\b'
433
- if re.search(pattern, question_lower):
434
- return True
435
- return False
436
-
437
- def detect_preference(question):
438
- """Detect if user is stating a preference"""
439
- q_lower = question.lower()
440
- if 'online' in q_lower and 'studio' not in q_lower:
441
- return 'online'
442
- if ('studio' in q_lower or 'person' in q_lower or 'atlanta' in q_lower) and 'online' not in q_lower:
443
- return 'instudio'
444
- return None
445
-
446
- def get_contextual_business_info(categories):
447
- """Return relevant business information based on detected question categories"""
448
-
449
- context_map = {
450
- 'agent_seeking': {
451
- 'programs': ['Total Agent Prep', 'Working Actor Mentorship'],
452
- 'key_info': 'Live pitch practice with real agents, Actors Access optimization',
453
- 'journey': 'Total Agent Prep → GSP → Mentorship for sustained progress'
454
- },
455
- 'beginner': {
456
- 'programs': ['Free Classes', 'Get Scene 360', 'Get Scene Plus'],
457
- 'key_info': 'Start with holistic foundation, build consistency',
458
- 'journey': 'Free class → Get Scene 360 → GSP membership'
459
- },
460
- 'audition_help': {
461
- 'programs': ['Perfect Submission', 'Crush the Callback', 'Audition Insight'],
462
- 'key_info': 'Self-tape mastery, callback simulation, pro feedback',
463
- 'journey': 'Perfect Submission → GSP for ongoing Audition Insight'
464
- },
465
- 'mentorship': {
466
- 'programs': ['Working Actor Mentorship'],
467
- 'key_info': '6-month intensive with structured feedback and accountability',
468
- 'journey': 'Ready for commitment → WAM → Advanced workshops'
469
- }
470
- }
471
-
472
- relevant_info = {}
473
- for category in categories:
474
- if category in context_map:
475
- relevant_info[category] = context_map[category]
476
-
477
- return relevant_info
478
-
479
- # ============================================================================
480
- # MAIN CHATBOT LOGIC
481
- # ============================================================================
482
-
483
- def update_knowledge_from_question(session_id: str, question: str):
484
- """Extract attributes and update knowledge dictionary"""
485
- updates = {}
486
-
487
- # Extract Format
488
- pref = detect_preference(question)
489
- if pref:
490
- updates['format'] = pref
491
-
492
- # Extract Topic
493
- cats = detect_question_category(question)
494
- if cats:
495
- # Prioritize specific topics over generic ones
496
- priority_topics = ['agent_seeking', 'beginner', 'audition_help', 'mentorship', 'pricing']
497
- for topic in priority_topics:
498
- if topic in cats:
499
- updates['topic'] = topic
500
- break
501
- if 'topic' not in updates and cats:
502
- updates['topic'] = cats[0]
503
-
504
- if updates:
505
- update_session_state(session_id, knowledge_update=updates, increment_count=False)
506
- return updates
507
- return {}
508
-
509
- def process_question(question: str, current_session_id: str):
510
- """Main function to process user questions - replaces Flask /ask endpoint"""
511
-
512
- if not question:
513
- return "Question is required"
514
-
515
- # 0. HARD POLICY CHECK
516
- if detect_policy_issue(question):
517
- log_question(question, current_session_id)
518
-
519
- return "Please email info@getscenestudios.com."
520
-
521
- # 1. Handle Session & Knowledge State
522
- update_knowledge_from_question(current_session_id, question)
523
-
524
- session_state = get_session_state(current_session_id)
525
-
526
- try:
527
- knowledge = json.loads(session_state.get('knowledge_context', '{}'))
528
- except:
529
- knowledge = {}
530
-
531
- user_preference = knowledge.get('format')
532
- current_topic = knowledge.get('topic')
533
-
534
- if not user_preference:
535
- user_preference = session_state.get('preference')
536
-
537
- update_session_state(current_session_id, increment_count=True)
538
-
539
- # Create embedding of user question
540
- user_embedding = get_embedding(question)
541
-
542
- # Check FAQ embeddings first
543
- faq_data = fetch_all_faq_embeddings()
544
- top_faqs = []
545
-
546
- for entry_id, question_text, answer_text, emb in faq_data:
547
- score = cosine_similarity(user_embedding, emb)
548
- top_faqs.append((score, entry_id, question_text, answer_text))
549
- top_faqs.sort(reverse=True)
550
-
551
- faq_threshold = 0.50 # Lowered from 0.85 to capture direct matches better
552
- ambiguous_threshold = 0.60 # Lowered from 0.70
553
-
554
- # If high-confidence FAQ match found
555
- if top_faqs and top_faqs[0][0] >= faq_threshold:
556
- update_session_state(current_session_id, reset_clarification=True, increment_count=False)
557
-
558
- best_score, faq_id, question_text, answer_text = top_faqs[0]
559
-
560
- mentor_framing_start = "That's a great question! Here's the information on that:"
561
- mentor_framing_end = "I hope that clears things up! Remember, every bit of knowledge helps you steer your career in the right direction."
562
-
563
- enhanced_answer = f"{mentor_framing_start}\n\n{answer_text}"
564
-
565
- # R5: Policy Guard for FAQ answers using word boundaries
566
- # policy_violation = False
567
- # import re
568
- # for word in POLICY_KEYWORDS:
569
- # pattern = rf'\b{re.escape(word)}\b'
570
- # if re.search(pattern, enhanced_answer.lower()):
571
- # policy_violation = True
572
- # break
573
-
574
- # if policy_violation:
575
- # enhanced_answer = "Please email info@getscenestudios.com for assistance with this."
576
- # else:
577
- categories = detect_question_category(question)
578
- contextual_info = get_contextual_business_info(categories)
579
-
580
- if contextual_info:
581
- next_steps = []
582
- for category, info in contextual_info.items():
583
- next_steps.append(f"A great next step for you: {info['journey']}")
584
-
585
- if next_steps:
586
- enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
587
-
588
- enhanced_answer += f"\n\n{mentor_framing_end}\n\nQuestions? Contact info@getscenestudios.com"
589
-
590
- # Log question
591
- log_question(question, current_session_id, answer=enhanced_answer)
592
-
593
- return enhanced_answer
594
-
595
- elif top_faqs and top_faqs[0][0] >= ambiguous_threshold:
596
- # AMBIGUOUS ZONE
597
- needs_clarification = False
598
- best_match_q = top_faqs[0][2]
599
-
600
- # 1. Never clarify if the best match question is identical to the user question
601
- if question.lower().strip('?') == best_match_q.lower().strip('?'):
602
- needs_clarification = False
603
- else:
604
- # 2. Check Format logic (only if locational)
605
- is_locational = any(w in question.lower() for w in ['online', 'studio', 'person', 'atlanta', 'location', 'where'])
606
- if is_locational and not user_preference:
607
- needs_clarification = True
608
-
609
- # 3. Check Topic logic (only if generic)
610
- is_generic_query = any(w in question.lower() for w in ['price', 'cost', 'how much', 'schedule', 'when'])
611
- if is_generic_query and not current_topic:
612
- needs_clarification = True
613
-
614
- # 4. Force resolve if already asked once
615
- clarification_count = session_state.get('clarification_count', 0)
616
- if clarification_count > 0:
617
- needs_clarification = False
618
-
619
- if needs_clarification:
620
- update_session_state(current_session_id, increment_clarification=True, increment_count=False)
621
- best_match_q = top_faqs[0][2]
622
- return f"Did you mean: {best_match_q}?"
623
-
624
- # Auto-Resolve
625
- update_session_state(current_session_id, reset_clarification=True, increment_count=False)
626
-
627
- best_score, faq_id, question_text, answer_text = top_faqs[0]
628
-
629
- categories = detect_question_category(question)
630
- contextual_info = get_contextual_business_info(categories)
631
-
632
- enhanced_answer = answer_text
633
- if contextual_info:
634
- next_steps = []
635
- for category, info in contextual_info.items():
636
- next_steps.append(f"Next step: Consider {info['journey']}")
637
-
638
- if next_steps:
639
- enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
640
- enhanced_answer += f"\n\nQuestions? Contact info@getscenestudios.com"
641
-
642
- log_question(question, current_session_id, answer=enhanced_answer)
643
-
644
- return enhanced_answer
645
-
646
- else:
647
- # 3. HALLUCINATION GUARD
648
- categories = detect_question_category(question)
649
-
650
- has_session_context = (current_topic is not None) or (user_preference is not None)
651
-
652
- FOLLOWUP_KEYWORDS = ['yes', 'no', 'sure', 'okay', 'thanks', 'thank you', 'please', 'go ahead', 'continue', 'more']
653
- is_acting_related = (
654
- len(categories) > 0 or
655
- detect_response_type(question) == "support" or
656
- any(k in question.lower() for k in ACTION_KEYWORDS) or
657
- any(k in question.lower() for k in DETAIL_SYNONYMS) or
658
- any(k in question.lower() for k in ['class', 'workshop', 'coaching', 'studio', 'acting', 'online', 'person', 'atlanta', 'training', 'prefer', 'preference', 'format', 'recommendation', 'online class', 'online workshop','instudio class','instudio workshop', 'actor', 'scene', 'audition', 'theatre', 'film', 'tv', 'commercial', 'agent', 'rep', 'manager']) or
659
- (has_session_context and any(k == question.lower().strip('.!') for k in FOLLOWUP_KEYWORDS))
660
- )
661
-
662
- if not is_acting_related:
663
- return "I'm not exactly sure about that. Please email info@getscenestudios.com so a member of our team can get you the most accurate answer!"
664
-
665
- # 4. LLM PATH
666
- update_session_state(current_session_id, reset_clarification=True, increment_count=False)
667
- podcast_data = fetch_all_embeddings("podcast_episodes")
668
- top_workshops = find_top_workshops(user_embedding, k=10)
669
- top_podcasts = find_top_k_matches(user_embedding, podcast_data, k=3)
670
-
671
- enriched_podcast_links = []
672
- for _, podcast_id, _ in top_podcasts:
673
- row = fetch_row_by_id("podcast_episodes", podcast_id)
674
- enriched_podcast_links.extend(generate_enriched_links(row))
675
-
676
- if not enriched_podcast_links:
677
- fallback = fetch_row_by_id("podcast_episodes", podcast_data[0][0])
678
- enriched_podcast_links = generate_enriched_links(fallback)
679
-
680
- # 5. Brevity & Detail Detection
681
- wants_details = any(syn in question.lower() for syn in DETAIL_SYNONYMS)
682
-
683
- final_prompt = build_enhanced_prompt(
684
- question,
685
- None,
686
- top_workshops,
687
- user_preference=user_preference,
688
- enriched_podcast_links=enriched_podcast_links,
689
- wants_details=wants_details,
690
- current_topic=current_topic
691
- )
692
-
693
- response = openai.chat.completions.create(
694
- model="gpt-4",
695
- messages=[
696
- {"role": "system", "content": final_prompt},
697
- {"role": "user", "content": question}
698
- ]
699
- )
700
-
701
- # Log question
702
- log_question(question, current_session_id)
703
-
704
- return response.choices[0].message.content.strip()
705
-
706
- # ============================================================================
707
- # GRADIO INTERFACE
708
- # ============================================================================
709
-
710
- def chat_with_bot(message, history):
711
- """
712
- Process message directly without Flask API
713
-
714
- Args:
715
- message: User's current message
716
- history: Chat history (list of message dictionaries)
717
-
718
- Returns:
719
- Updated history with new exchange
720
- """
721
- global session_id
722
-
723
- if not message.strip():
724
- return history
725
-
726
- try:
727
- # Process question directly
728
- bot_reply = process_question(message, session_id)
729
- except Exception as e:
730
- bot_reply = f"❌ Error: {str(e)}"
731
-
732
- # Append to history in Gradio 6.0 format
733
- history.append({"role": "user", "content": message})
734
- history.append({"role": "assistant", "content": bot_reply})
735
- return history
736
-
737
- def reset_session():
738
- """Reset session ID for new conversation"""
739
- global session_id
740
- session_id = str(uuid.uuid4())
741
- return [] #, f"🔄 New session started: {session_id[:8]}..."
742
-
743
- # Create Gradio interface
744
- with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
745
-
746
- gr.Markdown(
747
- """
748
- # 🎬 Get Scene Studios AI Chatbot
749
-
750
- Ask questions about acting classes, workshops and more!
751
- """
752
- )
753
-
754
- # # Session info display
755
- # session_info = gr.Textbox(
756
- # label="Current Session ID",
757
- # value=f"Session: {session_id[:8]}...",
758
- # interactive=False,
759
- # scale=1
760
- # )
761
-
762
- # Chatbot interface
763
- chatbot = gr.Chatbot(
764
- label="Conversation",
765
- height=500
766
- )
767
-
768
- # Input area
769
- with gr.Row():
770
- msg = gr.Textbox(
771
- label="Your Message",
772
- lines=2,
773
- scale=4
774
- )
775
- submit_btn = gr.Button("Send 📤", scale=1, variant="primary")
776
-
777
- # Action buttons
778
- with gr.Row():
779
- clear_btn = gr.Button("Clear Chat 🗑️", scale=1)
780
- reset_btn = gr.Button("New Session 🔄", scale=1)
781
-
782
- # Example questions
783
- # gr.Examples(
784
- # examples=[
785
- # "How much does it cost?",
786
- # "I want to get an agent",
787
- # "I'm a beginner, where should I start?",
788
- # "Tell me about your workshops",
789
- # "Do you have online classes?",
790
- # "What's the difference between Perfect Submission and Crush the Callback?",
791
- # "I prefer in-studio training",
792
- # "Tell me about mentorship programs"
793
- # ],
794
- # inputs=msg,
795
- # label="💡 Try these example questions:"
796
- # )
797
-
798
- # Event handlers
799
- submit_btn.click(
800
- fn=chat_with_bot,
801
- inputs=[msg, chatbot],
802
- outputs=[chatbot]
803
- ).then(
804
- fn=lambda: "",
805
- inputs=None,
806
- outputs=[msg]
807
- )
808
-
809
- msg.submit(
810
- fn=chat_with_bot,
811
- inputs=[msg, chatbot],
812
- outputs=[chatbot]
813
- ).then(
814
- fn=lambda: "",
815
- inputs=None,
816
- outputs=[msg]
817
- )
818
-
819
- clear_btn.click(
820
- fn=lambda: [],
821
- inputs=None,
822
- outputs=[chatbot]
823
- )
824
-
825
- reset_btn.click(
826
- fn=reset_session,
827
- inputs=None,
828
- outputs=[chatbot] #, session_info]
829
- )
830
-
831
- # Launch the app
832
- if __name__ == "__main__":
833
- print("\n" + "="*60)
834
- print("🎬 Get Scene Studios Chatbot")
835
- print("="*60)
836
- print("\n✅ No Flask API needed - all processing is done directly!")
837
- print("🌐 Gradio interface will open in your browser")
838
- print("="*60 + "\n")
839
-
840
- demo.launch()