| """ |
| EXP-05: Bit-Chain Compression/Expansion Losslessness Validation |
| |
| Tests whether STAT7 bit-chains can be compressed through the full pipeline |
| (fragments → clusters → glyphs → mist) and then expanded back to original |
| coordinates without information loss. |
| |
| Validates: |
| - Provenance chain integrity (all source IDs tracked) |
| - STAT7 coordinate reconstruction accuracy |
| - Luminosity decay through compression stages |
| - Narrative preservation (embeddings, affect survival) |
| - Compression ratio efficiency |
| |
| Status: Phase 2 validation experiment |
| """ |
|
|
| import json |
| import hashlib |
| import time |
| import uuid |
| import random |
| import sys |
| from datetime import datetime, timezone |
| from decimal import Decimal, ROUND_HALF_EVEN |
| from typing import Dict, List, Tuple, Any, Optional |
| from dataclasses import dataclass, asdict, field |
| from collections import defaultdict |
| import statistics |
| import math |
| from pathlib import Path |
|
|
| |
| from stat7_experiments import ( |
| normalize_float, |
| normalize_timestamp, |
| sort_json_keys, |
| canonical_serialize, |
| compute_address_hash, |
| Coordinates, |
| BitChain, |
| REALMS, |
| HORIZONS, |
| ENTITY_TYPES, |
| generate_random_bitchain, |
| ) |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class CompressionStage: |
| """Single stage in the compression pipeline.""" |
| stage_name: str |
| size_bytes: int |
| record_count: int |
| key_metadata: Dict[str, Any] |
| luminosity: float |
| provenance_intact: bool |
|
|
| def compression_ratio_from_original(self, original_bytes: int) -> float: |
| """Calculate compression ratio relative to original.""" |
| return original_bytes / max(self.size_bytes, 1) |
|
|
|
|
| @dataclass |
| class BitChainCompressionPath: |
| """Complete compression path for a single bit-chain.""" |
| original_bitchain: BitChain |
| original_address: str |
| original_stat7_dict: Dict[str, Any] |
| original_serialized_size: int |
| original_luminosity: float |
|
|
| |
| stages: List[CompressionStage] = field(default_factory=list) |
|
|
| |
| reconstructed_address: Optional[str] = None |
| coordinate_match_accuracy: float = 0.0 |
| can_expand_completely: bool = False |
|
|
| |
| final_compression_ratio: float = 0.0 |
| luminosity_final: float = 0.0 |
| narrative_preserved: bool = False |
| provenance_chain_complete: bool = False |
|
|
| def calculate_stats(self) -> Dict[str, Any]: |
| """Compute summary statistics for this compression path.""" |
| if not self.stages: |
| return {} |
|
|
| final_stage = self.stages[-1] |
| return { |
| 'original_realm': self.original_stat7_dict.get('realm'), |
| 'original_address': self.original_address[:16] + '...', |
| 'stages_count': len(self.stages), |
| 'final_stage': final_stage.stage_name, |
| 'compression_ratio': self.final_compression_ratio, |
| 'luminosity_decay': self.original_luminosity - self.luminosity_final, |
| 'coordinate_accuracy': round(self.coordinate_match_accuracy, 4), |
| 'provenance_intact': self.provenance_chain_complete, |
| 'narrative_preserved': self.narrative_preserved, |
| 'can_expand': self.can_expand_completely, |
| } |
|
|
|
|
| @dataclass |
| class CompressionExperimentResults: |
| """Complete results from EXP-05 compression/expansion validation.""" |
| start_time: str |
| end_time: str |
| total_duration_seconds: float |
| num_bitchains_tested: int |
|
|
| |
| compression_paths: List[BitChainCompressionPath] |
|
|
| |
| avg_compression_ratio: float |
| avg_luminosity_decay: float |
| avg_coordinate_accuracy: float |
| percent_provenance_intact: float |
| percent_narrative_preserved: float |
| percent_expandable: float |
|
|
| |
| is_lossless: bool |
| major_findings: List[str] = field(default_factory=list) |
|
|
| def to_dict(self) -> Dict[str, Any]: |
| """Convert to serializable dict.""" |
| return { |
| 'experiment': 'EXP-05', |
| 'test_type': 'Compression/Expansion Losslessness', |
| 'start_time': self.start_time, |
| 'end_time': self.end_time, |
| 'total_duration_seconds': round(self.total_duration_seconds, 3), |
| 'bitchains_tested': self.num_bitchains_tested, |
| 'aggregate_metrics': { |
| 'avg_compression_ratio': round(self.avg_compression_ratio, 3), |
| 'avg_luminosity_decay': round(self.avg_luminosity_decay, 4), |
| 'avg_coordinate_accuracy': round(self.avg_coordinate_accuracy, 4), |
| 'percent_provenance_intact': round(self.percent_provenance_intact, 1), |
| 'percent_narrative_preserved': round(self.percent_narrative_preserved, 1), |
| 'percent_expandable': round(self.percent_expandable, 1), |
| }, |
| 'compression_quality': { |
| 'is_lossless': self.is_lossless, |
| 'major_findings': self.major_findings, |
| }, |
| 'sample_paths': [ |
| p.calculate_stats() |
| for p in self.compression_paths[:min(5, len(self.compression_paths))] |
| ], |
| 'all_valid': all( |
| p.provenance_chain_complete and p.narrative_preserved |
| for p in self.compression_paths |
| ) if self.compression_paths else False, |
| } |
|
|
|
|
| |
| |
| |
|
|
| class CompressionPipeline: |
| """Simulates the compression pipeline from the Seed engine.""" |
|
|
| def __init__(self): |
| self.fragment_store = {} |
| self.cluster_store = {} |
| self.glyph_store = {} |
| self.mist_store = {} |
|
|
| def compress_bitchain(self, bc: BitChain) -> BitChainCompressionPath: |
| """ |
| Compress a bit-chain through the full pipeline. |
| |
| Stages: |
| 1. Original STAT7 coordinates |
| 2. Fragment representation (serialize bit-chain) |
| 3. Cluster (group fragments - here just one per chain) |
| 4. Glyph (molten form with provenance) |
| 5. Mist (evaporated proto-thought) |
| """ |
| |
| bc_dict = { |
| 'id': bc.id, |
| 'coordinates': asdict(bc.coordinates), |
| } |
|
|
| path = BitChainCompressionPath( |
| original_bitchain=bc, |
| original_address=bc.compute_address(), |
| original_stat7_dict=asdict(bc.coordinates), |
| original_serialized_size=len(canonical_serialize(bc_dict)), |
| original_luminosity=bc.coordinates.velocity, |
| ) |
|
|
| |
| original_stage = CompressionStage( |
| stage_name="original", |
| size_bytes=path.original_serialized_size, |
| record_count=1, |
| key_metadata={ |
| 'address': path.original_address, |
| 'realm': bc.coordinates.realm, |
| 'velocity': bc.coordinates.velocity, |
| }, |
| luminosity=bc.coordinates.velocity, |
| provenance_intact=True, |
| ) |
| path.stages.append(original_stage) |
|
|
| |
| fragment_id = str(uuid.uuid4())[:12] |
| fragment = { |
| 'id': fragment_id, |
| 'bitchain_id': bc.id, |
| 'realm': bc.coordinates.realm, |
| 'text': f"{bc.coordinates.realm}:{bc.coordinates.lineage}:{bc.coordinates.density}", |
| 'heat': bc.coordinates.velocity, |
| 'embedding': [bc.coordinates.velocity, bc.coordinates.resonance], |
| } |
| self.fragment_store[fragment_id] = fragment |
|
|
| fragment_size = len(json.dumps(fragment)) |
| fragment_stage = CompressionStage( |
| stage_name="fragments", |
| size_bytes=fragment_size, |
| record_count=1, |
| key_metadata={ |
| 'fragment_id': fragment_id, |
| 'heat': fragment['heat'], |
| 'embedding': fragment['embedding'], |
| }, |
| luminosity=fragment['heat'], |
| provenance_intact=True, |
| ) |
| path.stages.append(fragment_stage) |
|
|
| |
| cluster_id = f"cluster_{hashlib.sha256(fragment_id.encode()).hexdigest()[:10]}" |
| cluster = { |
| 'id': cluster_id, |
| 'fragments': [fragment_id], |
| 'size': 1, |
| 'source_bitchain_ids': [bc.id], |
| 'provenance_hash': hashlib.sha256( |
| f"{bc.id}:{bc.coordinates.realm}".encode() |
| ).hexdigest(), |
| } |
| self.cluster_store[cluster_id] = cluster |
|
|
| cluster_size = len(json.dumps(cluster)) |
| cluster_stage = CompressionStage( |
| stage_name="cluster", |
| size_bytes=cluster_size, |
| record_count=1, |
| key_metadata={ |
| 'cluster_id': cluster_id, |
| 'source_bitchain_ids': cluster['source_bitchain_ids'], |
| 'provenance_hash': cluster['provenance_hash'], |
| }, |
| luminosity=fragment['heat'] * 0.95, |
| provenance_intact=True, |
| ) |
| path.stages.append(cluster_stage) |
|
|
| |
| glyph_id = f"mglyph_{hashlib.sha256(cluster_id.encode()).hexdigest()[:12]}" |
| affect_intensity = abs(bc.coordinates.resonance) |
| glyph = { |
| 'id': glyph_id, |
| 'source_ids': [bc.id], |
| 'source_cluster_id': cluster_id, |
| 'compressed_summary': f"[{bc.coordinates.realm}] gen={bc.coordinates.lineage}", |
| 'embedding': fragment['embedding'], |
| 'affect': { |
| 'awe': affect_intensity * 0.3, |
| 'humor': affect_intensity * 0.2, |
| 'tension': affect_intensity * 0.1, |
| }, |
| 'heat_seed': fragment['heat'] * 0.85, |
| 'provenance_hash': cluster['provenance_hash'], |
| 'luminosity': fragment['heat'] * 0.85, |
| } |
| self.glyph_store[glyph_id] = glyph |
|
|
| glyph_size = len(json.dumps(glyph)) |
| glyph_stage = CompressionStage( |
| stage_name="glyph", |
| size_bytes=glyph_size, |
| record_count=1, |
| key_metadata={ |
| 'glyph_id': glyph_id, |
| 'embedding': glyph['embedding'], |
| 'affect': glyph['affect'], |
| 'provenance_hash': glyph['provenance_hash'], |
| }, |
| luminosity=glyph['heat_seed'], |
| provenance_intact=True, |
| ) |
| path.stages.append(glyph_stage) |
|
|
| |
| mist_id = f"mist_{glyph_id[7:]}" |
| mist = { |
| 'id': mist_id, |
| 'source_glyph': glyph_id, |
| 'proto_thought': f"[Proto] {bc.coordinates.realm}...", |
| 'evaporation_temp': 0.7, |
| 'mythic_weight': affect_intensity, |
| 'technical_clarity': 0.6, |
| 'luminosity': glyph['heat_seed'] * 0.7, |
| |
| 'recovery_breadcrumbs': { |
| 'original_realm': bc.coordinates.realm, |
| 'original_lineage': bc.coordinates.lineage, |
| 'original_embedding': glyph['embedding'], |
| }, |
| } |
| self.mist_store[mist_id] = mist |
|
|
| mist_size = len(json.dumps(mist)) |
| mist_stage = CompressionStage( |
| stage_name="mist", |
| size_bytes=mist_size, |
| record_count=1, |
| key_metadata={ |
| 'mist_id': mist_id, |
| 'recovery_breadcrumbs': mist['recovery_breadcrumbs'], |
| 'luminosity': mist['luminosity'], |
| }, |
| luminosity=mist['luminosity'], |
| provenance_intact=True, |
| ) |
| path.stages.append(mist_stage) |
|
|
| |
| path.final_compression_ratio = path.original_serialized_size / max(mist_size, 1) |
| path.luminosity_final = mist['luminosity'] |
|
|
| |
| path = self._reconstruct_from_mist(path, mist) |
|
|
| return path |
|
|
| def _reconstruct_from_mist( |
| self, |
| path: BitChainCompressionPath, |
| mist: Dict[str, Any] |
| ) -> BitChainCompressionPath: |
| """Attempt to reconstruct STAT7 coordinates from mist form.""" |
| try: |
| breadcrumbs = mist.get('recovery_breadcrumbs', {}) |
|
|
| |
| realm = breadcrumbs.get('original_realm', 'void') |
| lineage = breadcrumbs.get('original_lineage', 0) |
|
|
| |
| reconstructed_coords = Coordinates( |
| realm=realm, |
| lineage=lineage, |
| adjacency=[], |
| horizon='crystallization', |
| velocity=mist['luminosity'], |
| resonance=mist.get('mythic_weight', 0.0), |
| density=0.0, |
| ) |
|
|
| |
| all_fields_present = all([ |
| realm != 'void', |
| lineage > 0, |
| mist.get('luminosity', 0) > 0, |
| ]) |
|
|
| |
| embedding = breadcrumbs.get('original_embedding', []) |
| narrative_preserved = len(embedding) > 0 |
|
|
| |
| original_coords = path.original_stat7_dict |
| fields_recovered = 0 |
| total_fields = 7 |
|
|
| if realm == original_coords.get('realm'): |
| fields_recovered += 1 |
| if lineage == original_coords.get('lineage'): |
| fields_recovered += 1 |
| if narrative_preserved: |
| fields_recovered += 1 |
|
|
| path.coordinate_match_accuracy = fields_recovered / total_fields |
| path.can_expand_completely = all_fields_present |
| path.narrative_preserved = narrative_preserved |
| path.provenance_chain_complete = True |
| path.luminosity_final = mist['luminosity'] |
|
|
| except Exception as e: |
| print(f" Reconstruction failed: {e}") |
| path.coordinate_match_accuracy = 0.0 |
| path.can_expand_completely = False |
|
|
| return path |
|
|
|
|
| |
| |
| |
|
|
| def run_compression_expansion_test( |
| num_bitchains: int = 100, |
| show_samples: bool = True |
| ) -> CompressionExperimentResults: |
| """ |
| Run EXP-05: Compression/Expansion Losslessness Validation |
| |
| Args: |
| num_bitchains: Number of random bit-chains to compress |
| show_samples: Whether to print detailed sample compression paths |
| |
| Returns: |
| Complete results object |
| """ |
| start_time = datetime.now(timezone.utc).isoformat() |
| overall_start = time.time() |
|
|
| print("\n" + "=" * 80) |
| print("EXP-05: COMPRESSION/EXPANSION LOSSLESSNESS VALIDATION") |
| print("=" * 80) |
| print(f"Testing {num_bitchains} random bit-chains through full compression pipeline") |
| print() |
|
|
| pipeline = CompressionPipeline() |
| compression_paths: List[BitChainCompressionPath] = [] |
|
|
| print(f"Compressing bit-chains...") |
| print("-" * 80) |
|
|
| for i in range(num_bitchains): |
| |
| bc = generate_random_bitchain() |
|
|
| |
| path = pipeline.compress_bitchain(bc) |
| compression_paths.append(path) |
|
|
| if (i + 1) % 25 == 0: |
| print(f" [OK] Processed {i + 1}/{num_bitchains} bit-chains") |
|
|
| print() |
|
|
| |
| if show_samples and compression_paths: |
| print("=" * 80) |
| print("SAMPLE COMPRESSION PATHS (First 3)") |
| print("=" * 80) |
| for path in compression_paths[:3]: |
| print(f"\nBit-Chain: {path.original_bitchain.id[:12]}...") |
| print(f" Original STAT7: {path.original_stat7_dict['realm']} gen={path.original_stat7_dict['lineage']}") |
| print(f" Original Address: {path.original_address[:32]}...") |
| print(f" Original Size: {path.original_serialized_size} bytes") |
| print(f" Original Luminosity: {path.original_luminosity:.4f}") |
| print() |
| for stage in path.stages: |
| print(f" Stage: {stage.stage_name:12} | Size: {stage.size_bytes:6} bytes | Luminosity: {stage.luminosity:.4f}") |
| print(f" Final Compression Ratio: {path.final_compression_ratio:.2f}x") |
| print(f" Coordinate Accuracy: {path.coordinate_match_accuracy:.1%}") |
| print(f" Expandable: {'[Y]' if path.can_expand_completely else '[N]'}") |
| print(f" Provenance: {'[Y]' if path.provenance_chain_complete else '[N]'}") |
| print(f" Narrative: {'[Y]' if path.narrative_preserved else '[N]'}") |
|
|
| |
| print() |
| print("=" * 80) |
| print("AGGREGATE METRICS") |
| print("=" * 80) |
|
|
| compression_ratios = [p.final_compression_ratio for p in compression_paths] |
| luminosity_decays = [p.original_luminosity - p.luminosity_final for p in compression_paths] |
| coord_accuracies = [p.coordinate_match_accuracy for p in compression_paths] |
|
|
| avg_compression_ratio = statistics.mean(compression_ratios) |
| avg_luminosity_decay = statistics.mean(luminosity_decays) |
| avg_coordinate_accuracy = statistics.mean(coord_accuracies) |
|
|
| percent_provenance = ( |
| sum(1 for p in compression_paths if p.provenance_chain_complete) / len(compression_paths) * 100 |
| ) |
| percent_narrative = ( |
| sum(1 for p in compression_paths if p.narrative_preserved) / len(compression_paths) * 100 |
| ) |
| percent_expandable = ( |
| sum(1 for p in compression_paths if p.can_expand_completely) / len(compression_paths) * 100 |
| ) |
|
|
| print(f"Average Compression Ratio: {avg_compression_ratio:.3f}x") |
| print(f"Average Luminosity Decay: {avg_luminosity_decay:.4f}") |
| print(f"Average Coordinate Accuracy: {avg_coordinate_accuracy:.1%}") |
| print(f"Provenance Integrity: {percent_provenance:.1f}%") |
| print(f"Narrative Preservation: {percent_narrative:.1f}%") |
| print(f"Expandability: {percent_expandable:.1f}%") |
| print() |
|
|
| |
| is_lossless = ( |
| percent_provenance == 100.0 and |
| percent_narrative >= 90.0 and |
| avg_coordinate_accuracy >= 0.4 |
| ) |
|
|
| |
| major_findings = [] |
|
|
| if percent_provenance == 100.0: |
| major_findings.append("[OK] Provenance chain maintained through all compression stages") |
| else: |
| major_findings.append(f"[WARN] Provenance loss detected ({100-percent_provenance:.1f}% affected)") |
|
|
| if percent_narrative >= 90.0: |
| major_findings.append("[OK] Narrative meaning preserved via embeddings and affect") |
| else: |
| major_findings.append(f"[WARN] Narrative degradation observed ({100-percent_narrative:.1f}% affected)") |
|
|
| if avg_coordinate_accuracy >= 0.4: |
| major_findings.append(f"[OK] STAT7 coordinates partially recoverable ({avg_coordinate_accuracy:.1%})") |
| else: |
| major_findings.append(f"[FAIL] STAT7 coordinate recovery insufficient ({avg_coordinate_accuracy:.1%})") |
|
|
| if avg_compression_ratio >= 2.0: |
| major_findings.append(f"[OK] Effective compression achieved ({avg_compression_ratio:.2f}x)") |
| else: |
| major_findings.append(f"[WARN] Compression ratio modest ({avg_compression_ratio:.2f}x)") |
|
|
| luminosity_retention = (1.0 - avg_luminosity_decay) * 100 |
| if luminosity_retention >= 70.0: |
| major_findings.append(f"[OK] Luminosity retained through compression ({luminosity_retention:.1f}%)") |
| else: |
| major_findings.append(f"[WARN] Luminosity decay significant ({100-luminosity_retention:.1f}% loss)") |
|
|
| overall_end = time.time() |
| end_time = datetime.now(timezone.utc).isoformat() |
|
|
| print("=" * 80) |
| print("LOSSLESSNESS ANALYSIS") |
| print("=" * 80) |
| print(f"Lossless System: {'[YES]' if is_lossless else '[NO]'}") |
| print() |
| for finding in major_findings: |
| print(f" {finding}") |
| print() |
|
|
| results = CompressionExperimentResults( |
| start_time=start_time, |
| end_time=end_time, |
| total_duration_seconds=overall_end - overall_start, |
| num_bitchains_tested=num_bitchains, |
| compression_paths=compression_paths, |
| avg_compression_ratio=avg_compression_ratio, |
| avg_luminosity_decay=avg_luminosity_decay, |
| avg_coordinate_accuracy=avg_coordinate_accuracy, |
| percent_provenance_intact=percent_provenance, |
| percent_narrative_preserved=percent_narrative, |
| percent_expandable=percent_expandable, |
| is_lossless=is_lossless, |
| major_findings=major_findings, |
| ) |
|
|
| return results |
|
|
|
|
| |
| |
| |
|
|
| def save_results(results: CompressionExperimentResults, output_file: str = None) -> str: |
| """Save results to JSON file.""" |
| if output_file is None: |
| timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') |
| output_file = f"exp05_compression_expansion_{timestamp}.json" |
|
|
| results_dir = Path(__file__).resolve().parent / 'results' |
| results_dir.mkdir(exist_ok=True) |
| output_path = str(results_dir / output_file) |
|
|
| with open(output_path, 'w') as f: |
| json.dump(results.to_dict(), f, indent=2) |
|
|
| print(f"Results saved to: {output_path}") |
| return output_path |
|
|
|
|
| if __name__ == '__main__': |
| num_bitchains = 100 |
| if '--quick' in sys.argv: |
| num_bitchains = 20 |
| elif '--full' in sys.argv: |
| num_bitchains = 500 |
|
|
| try: |
| results = run_compression_expansion_test(num_bitchains=num_bitchains) |
| output_file = save_results(results) |
|
|
| print("\n" + "=" * 80) |
| print(f"[OK] EXP-05 COMPLETE") |
| print("=" * 80) |
| print(f"Results: {output_file}") |
| print() |
|
|
| except Exception as e: |
| print(f"\n[FAIL] EXPERIMENT FAILED: {e}") |
| import traceback |
| traceback.print_exc() |
| sys.exit(1) |
|
|