| | |
| |
|
| | import json |
| | from typing import List, Dict, Any |
| |
|
| | from .config import OUTPUT_JSONL, DEFAULT_SAMPLE_COUNT |
| | from .dictionary_access import load_all_dictionaries |
| |
|
| | |
| | from .templates_basic import generate_emotional_expression_samples |
| | from .templates_actions import generate_action_object_samples |
| | from .templates_context import generate_context_rich_samples |
| | from .templates_context_advanced import generate_advanced_context_samples |
| |
|
| | |
| | from .templates_identity import generate_identity_samples |
| | from .templates_intent import generate_intent_samples |
| | from .templates_behavior import generate_behavior_samples |
| | from .templates_memory import generate_memory_samples |
| | from .templates_thought import generate_thought_samples |
| | from .templates_safety_response import generate_safety_response_samples |
| |
|
| | |
| | from .training_builder import build_training_sample, save_samples |
| |
|
| |
|
| | def main(): |
| | dictionaries = load_all_dictionaries() |
| |
|
| | |
| | |
| | |
| | emotional = generate_emotional_expression_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 6, |
| | source_language="en", |
| | ) |
| |
|
| | actions = generate_action_object_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 6, |
| | source_language="en", |
| | ) |
| |
|
| | context_rich = generate_context_rich_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 6, |
| | source_language="en", |
| | ) |
| |
|
| | advanced = generate_advanced_context_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 6, |
| | source_language="en", |
| | ) |
| |
|
| | |
| | |
| | |
| | identity = generate_identity_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 12, |
| | source_language="en", |
| | ) |
| |
|
| | intent = generate_intent_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 12, |
| | source_language="en", |
| | ) |
| |
|
| | behavior = generate_behavior_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 12, |
| | source_language="en", |
| | ) |
| |
|
| | memory = generate_memory_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 12, |
| | source_language="en", |
| | ) |
| |
|
| | thought = generate_thought_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 12, |
| | source_language="en", |
| | ) |
| |
|
| | safety = generate_safety_response_samples( |
| | dictionaries=dictionaries, |
| | count=DEFAULT_SAMPLE_COUNT // 12, |
| | source_language="en", |
| | ) |
| |
|
| | |
| | raw_samples = ( |
| | emotional |
| | + actions |
| | + context_rich |
| | + advanced |
| | + identity |
| | + intent |
| | + behavior |
| | + memory |
| | + thought |
| | + safety |
| | ) |
| |
|
| | |
| | |
| | |
| | training_samples = [] |
| |
|
| | for sample in raw_samples: |
| | user_text = sample.get("input", "") |
| | glyphic_output = sample.get("glyphic", "") |
| | realized_output = sample.get("output", "") |
| |
|
| | identity_text = sample.get("identity", "A helpful, aligned Glyphic agent.") |
| | emotion = sample.get("emotion", "neutral") |
| | sensory = sample.get("sensory", "none") |
| | social = sample.get("social", "alone") |
| |
|
| | intent_dict = sample.get("intent", { |
| | "goal": "assist", |
| | "urgency": "1", |
| | "focus": "support" |
| | }) |
| |
|
| | behavior_dict = sample.get("behavior", { |
| | "tone": "warm", |
| | "pacing": "steady", |
| | "depth": "medium", |
| | "style": "natural", |
| | "clarity": "high" |
| | }) |
| |
|
| | memory_summary = sample.get("memory", "") |
| | thought_chain = sample.get("thought_chain", "") |
| |
|
| | training_sample = build_training_sample( |
| | user_text=user_text, |
| | identity=identity_text, |
| | emotion=emotion, |
| | sensory=sensory, |
| | social=social, |
| | intent=intent_dict, |
| | behavior=behavior_dict, |
| | memory_summary=memory_summary, |
| | thought_chain=thought_chain, |
| | glyphic_output=glyphic_output, |
| | realized_output=realized_output |
| | ) |
| |
|
| | training_samples.append(training_sample) |
| |
|
| | |
| | |
| | |
| | save_samples(str(OUTPUT_JSONL), training_samples) |
| |
|
| | print(f"Wrote {len(training_samples)} Glyphic training samples to {OUTPUT_JSONL}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|
| |
|