convosim-ui / models /business_logic_utils /prompt_generation.py
aftorresc's picture
Llama 3.1 Update and Spanish Scenarios (#17)
f755aca verified
import random
from string import Formatter
from typing import Dict, Any, Union
from .config import DEFAULT_NAMES, TEMPLATE, SEED, SUBSEEDS, SUPPORTED_LANGUAGES, LATIN_AMERICAN_COUNTRIES
def get_random_default_name(language: str = "en", gender: str = None) -> str:
"""
Return a random default name based on the specified language.
If the language is not found or the list is empty, fall back to English.
"""
names_for_lang = DEFAULT_NAMES.get(language)
if not names_for_lang:
names_for_lang = DEFAULT_NAMES.get("en", [])
if not names_for_lang:
return "DefaultName"
return random.choice(names_for_lang)
def _get_subseed_description_(
scenario_config: Dict[str, str],
subseed_name: str,
SUBSEED_VALUES: Dict[str, Any],
language: str
) -> str:
if subseed_name not in scenario_config:
raise Exception(f"{subseed_name} not in scenario config")
subseed_value = scenario_config[subseed_name]
subseed_dict = SUBSEED_VALUES.get(subseed_value, {})
descriptions_dict = subseed_dict.get("description", {})
# Fallback to English or empty
descriptions_list = descriptions_dict.get(language, descriptions_dict.get("en", []))
subseed_descrip = random.choice(descriptions_list) if descriptions_list else ""
# Nested placeholders (e.g. {plan_details}, {injury_method})
format_opts = [fn for _, fn, _, _ in Formatter().parse(subseed_descrip) if fn is not None]
format_values = {}
for opt_name in format_opts:
nested_dict = subseed_dict.get(opt_name, {})
opt_list = nested_dict.get(language, nested_dict.get("en", []))
format_values[opt_name] = random.choice(opt_list) if opt_list else ""
return subseed_descrip.format(**format_values)
def get_seed_description(
scenario_config: Dict[str, Any],
texter_name: str,
SUBSEEDS: Dict[str, Any],
SEED: Union[str, Dict[str, str]],
language: str = "en"
) -> str:
"""
Format the SEED with appropriate parameters from scenario_config in the given language.
"""
# 1. Pick the correct SEED text based on the requested language
if isinstance(SEED, dict):
seed_text = SEED.get(language, SEED.get("en", ""))
else:
seed_text = SEED # single string
# 2. Find all placeholders in seed_text (e.g. {texter_name}, {crisis_desc}, {country}, etc.)
subseed_names = [fn for _, fn, _, _ in Formatter().parse(seed_text) if fn is not None]
# 3. Prepare a dictionary to fill those placeholders
subseeds = {}
for subname in subseed_names:
if subname == "texter_name":
# Already known: from function argument
subseeds[subname] = texter_name
elif subname == "country":
# SPECIAL LOGIC:
# Only the Spanish seed has {country}, so let's pick from LATIN_AMERICAN_COUNTRIES if language=es
if language == "es":
subseeds[subname] = random.choice(LATIN_AMERICAN_COUNTRIES)
else:
# If for some reason "country" shows up in English or other languages,
# we can set it to a default or empty string
subseeds[subname] = "Unknown"
else:
# For placeholders like "crisis_desc", "personality_desc", etc.
if subname.endswith('_desc'):
base_name = subname[:-5]
else:
base_name = subname
subseeds[subname] = _get_subseed_description_(
scenario_config,
base_name,
SUBSEEDS.get(base_name, {}),
language
)
# 4. Fill in the placeholders in seed_text
return seed_text.format(**subseeds)
def get_template(
language: str = "en",
texter_name: str = None,
SEED: Union[str, Dict[str, str]] = SEED,
**kwargs
) -> str:
"""
Generate the conversation template in the specified language, substituting scenario details.
"""
template_key = f"{language.upper()}_template"
template = TEMPLATE.get(template_key, {}).get("description", "")
if not template:
raise ValueError(f"Template for language '{language}' not found.")
# If no texter_name is provided, pick a default name for the given language
if not texter_name:
texter_name = get_random_default_name(language=language)
# Merge scenario config from **kwargs
scenario_config = kwargs.copy()
# Generate the scenario seed text
scenario_seed = get_seed_description(
scenario_config=scenario_config,
texter_name=texter_name,
SUBSEEDS=SUBSEEDS,
SEED=SEED,
language=language
)
# Fill the {current_seed} placeholder in the template
formatted_template = template.format(current_seed=scenario_seed)
# Clean up empty lines and spacing
cleaned_output = "\n".join(
line.strip() for line in formatted_template.split("\n") if line.strip()
)
return cleaned_output