Spaces:
Running
Running
File size: 5,021 Bytes
1e91476 f755aca 1e91476 f755aca 1e91476 45eec60 f755aca 1e91476 45eec60 1e91476 45eec60 f755aca 45eec60 1e91476 45eec60 f755aca 1e91476 f755aca 1e91476 f755aca 1e91476 f755aca 1e91476 f755aca 1e91476 f755aca 45eec60 f755aca 45eec60 f755aca 1e91476 f755aca 1e91476 f755aca 1e91476 f755aca 1e91476 f755aca 1e91476 45eec60 f755aca 45eec60 f755aca 45eec60 f755aca 1e91476 45eec60 f755aca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import random
from string import Formatter
from typing import Dict, Any, Union
from .config import DEFAULT_NAMES, TEMPLATE, SEED, SUBSEEDS, SUPPORTED_LANGUAGES, LATIN_AMERICAN_COUNTRIES
def get_random_default_name(language: str = "en", gender: str = None) -> str:
"""
Return a random default name based on the specified language.
If the language is not found or the list is empty, fall back to English.
"""
names_for_lang = DEFAULT_NAMES.get(language)
if not names_for_lang:
names_for_lang = DEFAULT_NAMES.get("en", [])
if not names_for_lang:
return "DefaultName"
return random.choice(names_for_lang)
def _get_subseed_description_(
scenario_config: Dict[str, str],
subseed_name: str,
SUBSEED_VALUES: Dict[str, Any],
language: str
) -> str:
if subseed_name not in scenario_config:
raise Exception(f"{subseed_name} not in scenario config")
subseed_value = scenario_config[subseed_name]
subseed_dict = SUBSEED_VALUES.get(subseed_value, {})
descriptions_dict = subseed_dict.get("description", {})
# Fallback to English or empty
descriptions_list = descriptions_dict.get(language, descriptions_dict.get("en", []))
subseed_descrip = random.choice(descriptions_list) if descriptions_list else ""
# Nested placeholders (e.g. {plan_details}, {injury_method})
format_opts = [fn for _, fn, _, _ in Formatter().parse(subseed_descrip) if fn is not None]
format_values = {}
for opt_name in format_opts:
nested_dict = subseed_dict.get(opt_name, {})
opt_list = nested_dict.get(language, nested_dict.get("en", []))
format_values[opt_name] = random.choice(opt_list) if opt_list else ""
return subseed_descrip.format(**format_values)
def get_seed_description(
scenario_config: Dict[str, Any],
texter_name: str,
SUBSEEDS: Dict[str, Any],
SEED: Union[str, Dict[str, str]],
language: str = "en"
) -> str:
"""
Format the SEED with appropriate parameters from scenario_config in the given language.
"""
# 1. Pick the correct SEED text based on the requested language
if isinstance(SEED, dict):
seed_text = SEED.get(language, SEED.get("en", ""))
else:
seed_text = SEED # single string
# 2. Find all placeholders in seed_text (e.g. {texter_name}, {crisis_desc}, {country}, etc.)
subseed_names = [fn for _, fn, _, _ in Formatter().parse(seed_text) if fn is not None]
# 3. Prepare a dictionary to fill those placeholders
subseeds = {}
for subname in subseed_names:
if subname == "texter_name":
# Already known: from function argument
subseeds[subname] = texter_name
elif subname == "country":
# SPECIAL LOGIC:
# Only the Spanish seed has {country}, so let's pick from LATIN_AMERICAN_COUNTRIES if language=es
if language == "es":
subseeds[subname] = random.choice(LATIN_AMERICAN_COUNTRIES)
else:
# If for some reason "country" shows up in English or other languages,
# we can set it to a default or empty string
subseeds[subname] = "Unknown"
else:
# For placeholders like "crisis_desc", "personality_desc", etc.
if subname.endswith('_desc'):
base_name = subname[:-5]
else:
base_name = subname
subseeds[subname] = _get_subseed_description_(
scenario_config,
base_name,
SUBSEEDS.get(base_name, {}),
language
)
# 4. Fill in the placeholders in seed_text
return seed_text.format(**subseeds)
def get_template(
language: str = "en",
texter_name: str = None,
SEED: Union[str, Dict[str, str]] = SEED,
**kwargs
) -> str:
"""
Generate the conversation template in the specified language, substituting scenario details.
"""
template_key = f"{language.upper()}_template"
template = TEMPLATE.get(template_key, {}).get("description", "")
if not template:
raise ValueError(f"Template for language '{language}' not found.")
# If no texter_name is provided, pick a default name for the given language
if not texter_name:
texter_name = get_random_default_name(language=language)
# Merge scenario config from **kwargs
scenario_config = kwargs.copy()
# Generate the scenario seed text
scenario_seed = get_seed_description(
scenario_config=scenario_config,
texter_name=texter_name,
SUBSEEDS=SUBSEEDS,
SEED=SEED,
language=language
)
# Fill the {current_seed} placeholder in the template
formatted_template = template.format(current_seed=scenario_seed)
# Clean up empty lines and spacing
cleaned_output = "\n".join(
line.strip() for line in formatted_template.split("\n") if line.strip()
)
return cleaned_output |