oceansweep's picture
Upload 155 files
43cd37c verified
raw
history blame
14.5 kB
# ccv3_parser.py
#
#
# Imports
from typing import Any, Dict, List, Optional, Union
import re
#
# External Imports
#
# Local Imports
from App_Function_Libraries.Personas.models import Lorebook, Asset, CharacterCardV3, CharacterCardV3Data, Decorator, \
LorebookEntry
from App_Function_Libraries.Personas.utils import validate_iso_639_1, extract_json_from_charx, parse_json_file, \
extract_text_chunks_from_png, decode_base64
#
############################################################################################################
#
# Functions:
class CCv3ParserError(Exception):
"""Custom exception for CCv3 Parser errors."""
pass
class CharacterCardV3Parser:
REQUIRED_SPEC = 'chara_card_v3'
REQUIRED_VERSION = '3.0'
def __init__(self, input_data: Union[str, bytes], input_type: str):
"""
Initialize the parser with input data.
:param input_data: The input data as a string or bytes.
:param input_type: The type of the input data: 'json', 'png', 'apng', 'charx'.
"""
self.input_data = input_data
self.input_type = input_type.lower()
self.character_card: Optional[CharacterCardV3] = None
def parse(self):
"""Main method to parse the input data based on its type."""
if self.input_type == 'json':
self.parse_json_input()
elif self.input_type in ['png', 'apng']:
self.parse_png_apng_input()
elif self.input_type == 'charx':
self.parse_charx_input()
else:
raise CCv3ParserError(f"Unsupported input type: {self.input_type}")
def parse_json_input(self):
"""Parse JSON input directly."""
try:
data = parse_json_file(
self.input_data.encode('utf-8') if isinstance(self.input_data, str) else self.input_data)
self.character_card = self._build_character_card(data)
except Exception as e:
raise CCv3ParserError(f"Failed to parse JSON input: {e}")
def parse_png_apng_input(self):
"""Parse PNG or APNG input by extracting 'ccv3' tEXt chunk."""
try:
text_chunks = extract_text_chunks_from_png(self.input_data)
if 'ccv3' not in text_chunks:
raise CCv3ParserError("PNG/APNG does not contain 'ccv3' tEXt chunk.")
ccv3_base64 = text_chunks['ccv3']
ccv3_json_bytes = decode_base64(ccv3_base64)
data = parse_json_file(ccv3_json_bytes)
self.character_card = self._build_character_card(data)
except Exception as e:
raise CCv3ParserError(f"Failed to parse PNG/APNG input: {e}")
def parse_charx_input(self):
"""Parse CHARX input by extracting 'card.json' from the ZIP archive."""
try:
data = extract_json_from_charx(self.input_data)
self.character_card = self._build_character_card(data)
except Exception as e:
raise CCv3ParserError(f"Failed to parse CHARX input: {e}")
def _build_character_card(self, data: Dict[str, Any]) -> CharacterCardV3:
"""Build the CharacterCardV3 object from parsed data."""
# Validate required fields
spec = data.get('spec')
spec_version = data.get('spec_version')
if spec != self.REQUIRED_SPEC:
raise CCv3ParserError(f"Invalid spec: Expected '{self.REQUIRED_SPEC}', got '{spec}'")
if spec_version != self.REQUIRED_VERSION:
# As per spec, should not reject but handle versions
# For now, proceed if version is >=3.0
try:
version_float = float(spec_version)
if version_float < 3.0:
raise CCv3ParserError(f"Unsupported spec_version: '{spec_version}' (must be >= '3.0')")
except ValueError:
raise CCv3ParserError(f"Invalid spec_version format: '{spec_version}'")
data_field = data.get('data')
if not data_field:
raise CCv3ParserError("Missing 'data' field in CharacterCardV3 object.")
# Extract required fields
required_fields = ['name', 'description', 'tags', 'creator', 'character_version',
'mes_example', 'extensions', 'system_prompt',
'post_history_instructions', 'first_mes',
'alternate_greetings', 'personality', 'scenario',
'creator_notes', 'group_only_greetings']
for field_name in required_fields:
if field_name not in data_field:
raise CCv3ParserError(f"Missing required field in data: '{field_name}'")
# Parse assets
assets_data = data_field.get('assets', [{
'type': 'icon',
'uri': 'ccdefault:',
'name': 'main',
'ext': 'png'
}])
assets = self._parse_assets(assets_data)
# Parse creator_notes_multilingual
creator_notes_multilingual = data_field.get('creator_notes_multilingual')
if creator_notes_multilingual:
if not isinstance(creator_notes_multilingual, dict):
raise CCv3ParserError("'creator_notes_multilingual' must be a dictionary.")
# Validate ISO 639-1 codes
for lang_code in creator_notes_multilingual.keys():
if not validate_iso_639_1(lang_code):
raise CCv3ParserError(f"Invalid language code in 'creator_notes_multilingual': '{lang_code}'")
# Parse character_book
character_book_data = data_field.get('character_book')
character_book = self._parse_lorebook(character_book_data) if character_book_data else None
# Build CharacterCardV3Data
character_card_data = CharacterCardV3Data(
name=data_field['name'],
description=data_field['description'],
tags=data_field['tags'],
creator=data_field['creator'],
character_version=data_field['character_version'],
mes_example=data_field['mes_example'],
extensions=data_field['extensions'],
system_prompt=data_field['system_prompt'],
post_history_instructions=data_field['post_history_instructions'],
first_mes=data_field['first_mes'],
alternate_greetings=data_field['alternate_greetings'],
personality=data_field['personality'],
scenario=data_field['scenario'],
creator_notes=data_field['creator_notes'],
character_book=character_book,
assets=assets,
nickname=data_field.get('nickname'),
creator_notes_multilingual=creator_notes_multilingual,
source=data_field.get('source'),
group_only_greetings=data_field['group_only_greetings'],
creation_date=data_field.get('creation_date'),
modification_date=data_field.get('modification_date')
)
return CharacterCardV3(
spec=spec,
spec_version=spec_version,
data=character_card_data
)
def _parse_assets(self, assets_data: List[Dict[str, Any]]) -> List[Asset]:
"""Parse and validate assets."""
assets = []
for asset_data in assets_data:
# Validate required fields
for field in ['type', 'uri', 'ext']:
if field not in asset_data:
raise CCv3ParserError(f"Asset missing required field: '{field}'")
if not isinstance(asset_data[field], str):
raise CCv3ParserError(f"Asset field '{field}' must be a string.")
# Optional 'name'
name = asset_data.get('name', '')
# Validate 'ext'
ext = asset_data['ext'].lower()
if not re.match(r'^[a-z0-9]+$', ext):
raise CCv3ParserError(f"Invalid file extension in asset: '{ext}'")
# Append to assets list
assets.append(Asset(
type=asset_data['type'],
uri=asset_data['uri'],
name=name,
ext=ext
))
return assets
def _parse_lorebook(self, lorebook_data: Dict[str, Any]) -> Lorebook:
"""Parse and validate Lorebook object."""
# Validate Lorebook fields
if not isinstance(lorebook_data, dict):
raise CCv3ParserError("Lorebook must be a JSON object.")
# Extract fields with defaults
name = lorebook_data.get('name')
description = lorebook_data.get('description')
scan_depth = lorebook_data.get('scan_depth')
token_budget = lorebook_data.get('token_budget')
recursive_scanning = lorebook_data.get('recursive_scanning')
extensions = lorebook_data.get('extensions', {})
entries_data = lorebook_data.get('entries', [])
# Parse entries
entries = self._parse_lorebook_entries(entries_data)
return Lorebook(
name=name,
description=description,
scan_depth=scan_depth,
token_budget=token_budget,
recursive_scanning=recursive_scanning,
extensions=extensions,
entries=entries
)
def _parse_lorebook_entries(self, entries_data: List[Dict[str, Any]]) -> List[LorebookEntry]:
"""Parse and validate Lorebook entries."""
entries = []
for entry_data in entries_data:
# Validate required fields
for field in ['keys', 'content', 'enabled', 'insertion_order']:
if field not in entry_data:
raise CCv3ParserError(f"Lorebook entry missing required field: '{field}'")
if not isinstance(entry_data['keys'], list) or not all(isinstance(k, str) for k in entry_data['keys']):
raise CCv3ParserError("'keys' field in Lorebook entry must be a list of strings.")
if not isinstance(entry_data['content'], str):
raise CCv3ParserError("'content' field in Lorebook entry must be a string.")
if not isinstance(entry_data['enabled'], bool):
raise CCv3ParserError("'enabled' field in Lorebook entry must be a boolean.")
if not isinstance(entry_data['insertion_order'], (int, float)):
raise CCv3ParserError("'insertion_order' field in Lorebook entry must be a number.")
# Optional fields
use_regex = entry_data.get('use_regex', False)
constant = entry_data.get('constant')
selective = entry_data.get('selective')
secondary_keys = entry_data.get('secondary_keys')
position = entry_data.get('position')
name = entry_data.get('name')
priority = entry_data.get('priority')
entry_id = entry_data.get('id')
comment = entry_data.get('comment')
if selective and not isinstance(selective, bool):
raise CCv3ParserError("'selective' field in Lorebook entry must be a boolean.")
if secondary_keys:
if not isinstance(secondary_keys, list) or not all(isinstance(k, str) for k in secondary_keys):
raise CCv3ParserError("'secondary_keys' field in Lorebook entry must be a list of strings.")
if position and not isinstance(position, str):
raise CCv3ParserError("'position' field in Lorebook entry must be a string.")
# Parse decorators from content
decorators = self._extract_decorators(entry_data['content'])
# Create LorebookEntry
entries.append(LorebookEntry(
keys=entry_data['keys'],
content=entry_data['content'],
enabled=entry_data['enabled'],
insertion_order=int(entry_data['insertion_order']),
use_regex=use_regex,
constant=constant,
selective=selective,
secondary_keys=secondary_keys,
position=position,
decorators=decorators,
name=name,
priority=priority,
id=entry_id,
comment=comment
))
return entries
def _extract_decorators(self, content: str) -> List[Decorator]:
"""Extract decorators from the content field."""
decorators = []
lines = content.splitlines()
for line in lines:
if line.startswith('@@'):
decorator = self._parse_decorator_line(line)
if decorator:
decorators.append(decorator)
return decorators
def _parse_decorator_line(self, line: str) -> Optional[Decorator]:
"""
Parses a single decorator line.
Example:
@@decorator_name value
@@@fallback_decorator value
"""
fallback = None
if line.startswith('@@@'):
# Fallback decorator
name_value = line.lstrip('@').strip()
parts = name_value.split(' ', 1)
name = parts[0]
value = parts[1] if len(parts) > 1 else None
fallback = Decorator(name=name, value=value)
return fallback
elif line.startswith('@@'):
# Primary decorator
name_value = line.lstrip('@').strip()
parts = name_value.split(' ', 1)
name = parts[0]
value = parts[1] if len(parts) > 1 else None
# Check for fallback decorators in subsequent lines
# This assumes that fallback decorators follow immediately after the primary
# decorator in the content
# For simplicity, not implemented here. You can enhance this based on your needs.
return Decorator(name=name, value=value)
else:
return None
def get_character_card(self) -> Optional[CharacterCardV3]:
"""Returns the parsed CharacterCardV3 object."""
return self.character_card
#
# End of ccv3_parser.py
############################################################################################################