| import json |
| import re |
|
|
|
|
| def load_schema(schema_path): |
| """Load the user profile schema from a JSON file.""" |
| with open(schema_path, 'r', encoding='utf-8') as f: |
| return json.load(f) |
|
|
|
|
| def create_empty_profile(): |
| """ |
| Create an empty user profile with all fields set to null/empty. |
| This represents a user we know nothing about yet. |
| """ |
| return { |
| "demographics": { |
| "population": None, |
| "identity_factors": [], |
| "language": None, |
| "pronouns": None |
| }, |
| "logistics": { |
| "zipcode": None, |
| "region": None, |
| "profession": None, |
| "accessibility_needs": [], |
| "insurance": None, |
| "treatment_history": None |
| }, |
| "status": { |
| "current_state": None, |
| "crisis_level": None, |
| "temporary_factors": [] |
| }, |
| "clinical": { |
| "primary_focus": None, |
| "substances": [] |
| }, |
| "preferences": { |
| "setting": None, |
| "therapy_approach": None, |
| "scheduling": [], |
| "barriers": [], |
| "contact_channel": None |
| } |
| } |
|
|
|
|
| def extract_profile_updates(schema, user_input): |
| """ |
| Scan user input against the schema and return a dict of detected profile updates. |
| |
| For 'single' type fields, returns the first matched option value. |
| For 'multi' type fields, returns a list of all matched option values. |
| For 'extracted' type fields (zipcode, region, treatment_history), uses |
| pattern matching or returns raw text snippets. |
| |
| Args: |
| schema: The loaded profile schema dict. |
| user_input: The user's message text. |
| |
| Returns: |
| dict: Nested dict mirroring the profile structure, containing only |
| fields where matches were found. |
| """ |
| input_lower = user_input.lower() |
| updates = {} |
|
|
| for category_name, category in schema.items(): |
| category_updates = {} |
|
|
| for field_name, field_def in category.items(): |
| field_type = field_def.get("type") |
|
|
| if field_type == "extracted": |
| |
| value = _extract_field(field_name, field_def, user_input, input_lower) |
| if value is not None: |
| category_updates[field_name] = value |
|
|
| elif field_type in ("single", "multi"): |
| matches = [] |
| for option in field_def.get("options", []): |
| for keyword in option.get("keywords", []): |
| if keyword and keyword.lower() in input_lower: |
| matches.append(option["value"]) |
| break |
|
|
| if matches: |
| if field_type == "single": |
| category_updates[field_name] = matches[0] |
| else: |
| category_updates[field_name] = matches |
|
|
| if category_updates: |
| updates[category_name] = category_updates |
|
|
| return updates |
|
|
|
|
| def _extract_field(field_name, field_def, user_input, input_lower): |
| """Handle extraction for non-option fields like zipcode and treatment_history.""" |
| if field_name == "zipcode": |
| pattern = field_def.get("pattern", r"\b\d{5}\b") |
| match = re.search(pattern, user_input) |
| if match: |
| return match.group() |
| return None |
|
|
| if field_name == "region": |
| |
| |
| geo_patterns = [ |
| r"\bin\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", |
| r"\bnear\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", |
| r"\bfrom\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", |
| ] |
| for pattern in geo_patterns: |
| match = re.search(pattern, user_input) |
| if match: |
| return match.group(1) |
| return None |
|
|
| if field_name == "treatment_history": |
| history_keywords = ["rehab", "treatment before", "been to", "tried", |
| "previous treatment", "went to", "was in", |
| "12-step", "residential before", "relapsed"] |
| for keyword in history_keywords: |
| if keyword in input_lower: |
| return user_input |
| return None |
|
|
| return None |
|
|
|
|
| def merge_profile(profile, updates): |
| """ |
| Merge new updates into the existing profile. |
| |
| - For 'single' fields (non-list values): new values overwrite old ones. |
| - For 'multi' fields (list values): new values are appended (no duplicates). |
| - None values in updates are ignored (don't clear existing data). |
| |
| Args: |
| profile: The current user profile dict (modified in place). |
| updates: The updates dict from extract_profile_updates(). |
| |
| Returns: |
| dict: The updated profile (same object as input). |
| """ |
| for category_name, category_updates in updates.items(): |
| if category_name not in profile: |
| continue |
|
|
| for field_name, new_value in category_updates.items(): |
| if field_name not in profile[category_name]: |
| continue |
|
|
| if new_value is None: |
| continue |
|
|
| existing = profile[category_name][field_name] |
|
|
| if isinstance(existing, list) and isinstance(new_value, list): |
| |
| for v in new_value: |
| if v not in existing: |
| existing.append(v) |
| elif isinstance(existing, list) and not isinstance(new_value, list): |
| |
| if new_value not in existing: |
| existing.append(new_value) |
| else: |
| |
| profile[category_name][field_name] = new_value |
|
|
| return profile |
|
|
|
|
| def profile_to_summary(profile): |
| """ |
| Convert a user profile dict into a concise text summary for injection |
| into the system prompt. Only includes fields that have been filled in. |
| |
| Returns: |
| str: A human-readable summary, or empty string if profile is empty. |
| """ |
| lines = [] |
|
|
| category_labels = { |
| "demographics": "Demographics", |
| "logistics": "Logistics & History", |
| "status": "Current Status", |
| "clinical": "Clinical Needs", |
| "preferences": "Preferences & Barriers" |
| } |
|
|
| for category_name, category_label in category_labels.items(): |
| category = profile.get(category_name, {}) |
| category_lines = [] |
|
|
| for field_name, value in category.items(): |
| if value is None: |
| continue |
| if isinstance(value, list) and len(value) == 0: |
| continue |
|
|
| |
| display_name = field_name.replace("_", " ").title() |
|
|
| if isinstance(value, list): |
| category_lines.append(f" - {display_name}: {', '.join(str(v) for v in value)}") |
| else: |
| category_lines.append(f" - {display_name}: {value}") |
|
|
| if category_lines: |
| lines.append(f"[{category_label}]") |
| lines.extend(category_lines) |
|
|
| if not lines: |
| return "" |
|
|
| header = ( |
| "USER PROFILE (already collected — DO NOT ask the user again for any of these details):\n" |
| ) |
| return header + "\n".join(lines) |
|
|