Text Generation
Transformers
PyTorch
English
experimental
research
bit-level
transformer
reversible
safety
telemetry
language-modeling
Instructions to use WCNegentropy/BitTransformerLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WCNegentropy/BitTransformerLM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="WCNegentropy/BitTransformerLM")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("WCNegentropy/BitTransformerLM", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use WCNegentropy/BitTransformerLM with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "WCNegentropy/BitTransformerLM" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/WCNegentropy/BitTransformerLM
- SGLang
How to use WCNegentropy/BitTransformerLM with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "WCNegentropy/BitTransformerLM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "WCNegentropy/BitTransformerLM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use WCNegentropy/BitTransformerLM with Docker Model Runner:
docker model run hf.co/WCNegentropy/BitTransformerLM
| """ | |
| BitTransformerLM Dataset Builder & HuggingFace Integration | |
| Creates curated datasets optimized for bit-native transformer training with | |
| comprehensive safety benchmarks, scaling curricula, and progressive complexity. | |
| """ | |
| import os | |
| import json | |
| import gzip | |
| import random | |
| from typing import List, Dict, Any, Optional, Tuple | |
| from pathlib import Path | |
| from datetime import datetime | |
| import tempfile | |
| import torch | |
| import numpy as np | |
| from datasets import Dataset, DatasetDict | |
| from huggingface_hub import HfApi, login, create_repo | |
| from .bit_io import text_to_bits, bits_to_text | |
| from .parity import enforce_parity as _enforce_parity_tensor | |
| from .compression import compress_bits | |
| # from .telemetry import compute_negentropy, compute_lz_complexity, compute_symbiosis | |
| # Simple implementations of telemetry functions for dataset generation | |
| def compute_negentropy(bit_tensor: torch.Tensor) -> float: | |
| """Compute negentropy (departure from randomness) of bit sequence.""" | |
| if len(bit_tensor) == 0: | |
| return 0.0 | |
| # Convert to probabilities | |
| p_1 = bit_tensor.float().mean() | |
| p_0 = 1.0 - p_1 | |
| # Avoid log(0) | |
| p_1 = torch.clamp(p_1, min=1e-7, max=1.0-1e-7) | |
| p_0 = torch.clamp(p_0, min=1e-7, max=1.0-1e-7) | |
| # Shannon entropy | |
| entropy = -(p_1 * torch.log2(p_1) + p_0 * torch.log2(p_0)) | |
| # Negentropy = max_entropy - actual_entropy (normalized 0-1) | |
| max_entropy = 1.0 # For binary | |
| negentropy = (max_entropy - entropy) / max_entropy | |
| return float(negentropy) | |
| def compute_lz_complexity(bits: List[int]) -> float: | |
| """Compute approximation of Lempel-Ziv complexity.""" | |
| if not bits: | |
| return 0.0 | |
| # Simple run-length encoding approximation | |
| runs = [] | |
| if bits: | |
| current_run = 1 | |
| for i in range(1, len(bits)): | |
| if bits[i] == bits[i-1]: | |
| current_run += 1 | |
| else: | |
| runs.append(current_run) | |
| current_run = 1 | |
| runs.append(current_run) | |
| if not runs: | |
| return 0.0 | |
| # Complexity based on number of runs vs sequence length | |
| complexity = len(runs) / len(bits) | |
| return min(1.0, complexity * 2) # Scale to 0-1 range | |
| def compute_symbiosis(bit_tensor1: torch.Tensor, bit_tensor2: torch.Tensor) -> float: | |
| """Compute symbiosis score between two bit sequences.""" | |
| if len(bit_tensor1) != len(bit_tensor2) or len(bit_tensor1) == 0: | |
| return 0.0 | |
| # Simple correlation-based symbiosis | |
| corr = torch.corrcoef(torch.stack([bit_tensor1.float(), bit_tensor2.float()]))[0, 1] | |
| # Handle NaN case | |
| if torch.isnan(corr): | |
| return 0.0 | |
| # Convert correlation to symbiosis score (0-1) | |
| symbiosis = (corr + 1) / 2 # Map [-1,1] to [0,1] | |
| return float(symbiosis) | |
| def enforce_parity(bits: List[int]) -> List[int]: | |
| """Simple parity wrapper for lists.""" | |
| if not bits: | |
| return bits | |
| # Pad to multiple of 9 if needed | |
| while len(bits) % 9 != 0: | |
| bits.append(0) | |
| # Convert to tensor, apply parity, convert back | |
| try: | |
| bits_tensor = torch.tensor(bits, dtype=torch.long) | |
| corrected_tensor, _ = _enforce_parity_tensor(bits_tensor) | |
| return corrected_tensor.tolist() | |
| except: | |
| # If parity fails, just return original bits | |
| return bits | |
| class BitTransformerDatasetBuilder: | |
| """ | |
| Comprehensive dataset builder for BitTransformerLM training. | |
| Generates: | |
| - Binary sequences with parity protection | |
| - Progressive complexity curricula | |
| - Safety benchmark validation sets | |
| - Synthetic bit patterns for robustness | |
| - Compressed sequence variants | |
| """ | |
| def __init__(self, hf_token: str, repo_id: str = "BitTransformerLM"): | |
| """Initialize with HuggingFace credentials.""" | |
| self.hf_token = hf_token | |
| self.repo_id = repo_id | |
| self.api = HfApi() | |
| # Login to HuggingFace | |
| login(token=hf_token) | |
| # Dataset configuration | |
| self.config = { | |
| "version": "1.0.0", | |
| "created": datetime.now().isoformat(), | |
| "model_compatibility": "BitTransformerLM", | |
| "bit_encoding": "parity_protected", | |
| "max_sequence_length": 512, | |
| "total_samples": 50000, | |
| "safety_thresholds": { | |
| "min_negentropy": 0.1, | |
| "max_lz_complexity": 0.9, | |
| "min_symbiosis": 0.3 | |
| } | |
| } | |
| def generate_text_to_bits_data(self, texts: List[str], max_len: int = 512) -> List[Dict]: | |
| """Convert text samples to parity-protected bit sequences.""" | |
| samples = [] | |
| for i, text in enumerate(texts): | |
| try: | |
| # Convert to bits with parity protection | |
| bits = text_to_bits(text)[:max_len] | |
| bits = enforce_parity(bits) | |
| # Pad to consistent length | |
| if len(bits) < max_len: | |
| bits.extend([0] * (max_len - len(bits))) | |
| # Compute safety metrics | |
| bit_tensor = torch.tensor(bits, dtype=torch.float32) | |
| negentropy = compute_negentropy(bit_tensor) | |
| lz_complexity = compute_lz_complexity(bits) | |
| # Create sample record with consistent schema | |
| sample = { | |
| "id": f"text_to_bits_{i:06d}", | |
| "original_text": text[:100] + "..." if len(text) > 100 else text, | |
| "bit_sequence": bits, | |
| "sequence_length": len([b for b in bits if b != 0]), # Non-padding length | |
| "negentropy": float(negentropy), | |
| "lz_complexity": float(lz_complexity), | |
| "has_parity": True, | |
| "category": "text_conversion", | |
| # Optional fields for consistency | |
| "pattern_type": None, | |
| "safety_category": None, | |
| "target_negentropy": None, | |
| "target_complexity": None, | |
| "original_id": None, | |
| "compression_ratio": None, | |
| "original_length": None | |
| } | |
| samples.append(sample) | |
| except Exception as e: | |
| print(f"Error processing text {i}: {e}") | |
| continue | |
| return samples | |
| def generate_synthetic_patterns(self, num_samples: int = 5000, max_len: int = 512) -> List[Dict]: | |
| """Generate synthetic bit patterns for robustness testing.""" | |
| samples = [] | |
| patterns = [ | |
| "alternating", # 0101010101... | |
| "blocks", # 000111000111... | |
| "fibonacci", # Fibonacci-based sequences | |
| "prime_based", # Prime number patterns | |
| "random_walk", # Constrained random walks | |
| "spiral", # Bit spiral patterns | |
| "fractal", # Simple fractal sequences | |
| ] | |
| for i in range(num_samples): | |
| pattern_type = random.choice(patterns) | |
| bits = self._generate_pattern(pattern_type, max_len) | |
| bits = enforce_parity(bits) | |
| # Compute metrics | |
| bit_tensor = torch.tensor(bits, dtype=torch.float32) | |
| negentropy = compute_negentropy(bit_tensor) | |
| lz_complexity = compute_lz_complexity(bits) | |
| sample = { | |
| "id": f"synthetic_{pattern_type}_{i:06d}", | |
| "bit_sequence": bits, | |
| "sequence_length": len([b for b in bits if b != 0]), | |
| "negentropy": float(negentropy), | |
| "lz_complexity": float(lz_complexity), | |
| "pattern_type": pattern_type, | |
| "has_parity": True, | |
| "category": "synthetic_pattern", | |
| # Optional fields for consistency | |
| "original_text": None, | |
| "safety_category": None, | |
| "target_negentropy": None, | |
| "target_complexity": None, | |
| "original_id": None, | |
| "compression_ratio": None, | |
| "original_length": None | |
| } | |
| samples.append(sample) | |
| return samples | |
| def generate_safety_benchmarks(self, num_samples: int = 2000) -> List[Dict]: | |
| """Generate sequences specifically for safety metric validation.""" | |
| samples = [] | |
| # Create sequences with known safety properties | |
| safety_targets = [ | |
| ("low_entropy", {"target_negentropy": 0.05, "target_complexity": 0.2}), | |
| ("medium_entropy", {"target_negentropy": 0.5, "target_complexity": 0.5}), | |
| ("high_entropy", {"target_negentropy": 0.95, "target_complexity": 0.8}), | |
| ("edge_cases", {"target_negentropy": 0.99, "target_complexity": 0.99}), | |
| ] | |
| samples_per_target = num_samples // len(safety_targets) | |
| for safety_type, targets in safety_targets: | |
| for i in range(samples_per_target): | |
| bits = self._generate_safety_controlled_sequence( | |
| targets["target_negentropy"], | |
| targets["target_complexity"] | |
| ) | |
| bits = enforce_parity(bits) | |
| # Verify metrics | |
| bit_tensor = torch.tensor(bits, dtype=torch.float32) | |
| actual_negentropy = compute_negentropy(bit_tensor) | |
| actual_complexity = compute_lz_complexity(bits) | |
| sample = { | |
| "id": f"safety_{safety_type}_{i:06d}", | |
| "bit_sequence": bits, | |
| "sequence_length": len(bits), | |
| "negentropy": float(actual_negentropy), | |
| "lz_complexity": float(actual_complexity), | |
| "target_negentropy": targets["target_negentropy"], | |
| "target_complexity": targets["target_complexity"], | |
| "safety_category": safety_type, | |
| "has_parity": True, | |
| "category": "safety_benchmark", | |
| # Optional fields for consistency | |
| "original_text": None, | |
| "pattern_type": None, | |
| "original_id": None, | |
| "compression_ratio": None, | |
| "original_length": None | |
| } | |
| samples.append(sample) | |
| return samples | |
| def generate_compression_variants(self, base_samples: List[Dict], | |
| compression_ratios: List[float] = [0.5, 0.7, 0.9]) -> List[Dict]: | |
| """Generate compressed variants of base sequences.""" | |
| compressed_samples = [] | |
| for ratio in compression_ratios: | |
| for sample in base_samples[:1000]: # Limit for efficiency | |
| try: | |
| original_bits = sample["bit_sequence"] | |
| # Convert to tensor for compression | |
| bits_tensor = torch.tensor(original_bits, dtype=torch.uint8) | |
| compressed_tensor = compress_bits(bits_tensor) | |
| compressed_bits = compressed_tensor.tolist() | |
| compressed_bits = enforce_parity(compressed_bits) | |
| # Compute metrics for compressed version | |
| bit_tensor = torch.tensor(compressed_bits, dtype=torch.float32) | |
| negentropy = compute_negentropy(bit_tensor) | |
| lz_complexity = compute_lz_complexity(compressed_bits) | |
| compressed_sample = { | |
| "id": f"{sample['id']}_compressed_{ratio}", | |
| "original_id": sample["id"], | |
| "bit_sequence": compressed_bits, | |
| "sequence_length": len(compressed_bits), | |
| "negentropy": float(negentropy), | |
| "lz_complexity": float(lz_complexity), | |
| "compression_ratio": ratio, | |
| "original_length": len(original_bits), | |
| "has_parity": True, | |
| "category": "compressed_variant", | |
| # Optional fields for consistency | |
| "original_text": None, | |
| "pattern_type": None, | |
| "safety_category": None, | |
| "target_negentropy": None, | |
| "target_complexity": None | |
| } | |
| compressed_samples.append(compressed_sample) | |
| except Exception as e: | |
| continue | |
| return compressed_samples | |
| def _generate_pattern(self, pattern_type: str, length: int) -> List[int]: | |
| """Generate specific bit patterns.""" | |
| if pattern_type == "alternating": | |
| return [i % 2 for i in range(length)] | |
| elif pattern_type == "blocks": | |
| block_size = random.randint(3, 8) | |
| pattern = [] | |
| current_bit = 0 | |
| for i in range(length): | |
| if i % block_size == 0: | |
| current_bit = 1 - current_bit | |
| pattern.append(current_bit) | |
| return pattern | |
| elif pattern_type == "fibonacci": | |
| # Fibonacci-inspired bit sequence | |
| fib = [0, 1] | |
| while len(fib) < length: | |
| fib.append((fib[-1] + fib[-2]) % 2) | |
| return fib[:length] | |
| elif pattern_type == "prime_based": | |
| # Prime-number-inspired patterns | |
| primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31] | |
| pattern = [] | |
| for i in range(length): | |
| is_prime_related = any((i + 1) % p == 0 for p in primes[:5]) | |
| pattern.append(1 if is_prime_related else 0) | |
| return pattern | |
| elif pattern_type == "random_walk": | |
| # Constrained random walk | |
| pattern = [random.randint(0, 1)] | |
| for i in range(1, length): | |
| # 70% chance to stay same, 30% to flip | |
| if random.random() < 0.7: | |
| pattern.append(pattern[-1]) | |
| else: | |
| pattern.append(1 - pattern[-1]) | |
| return pattern | |
| else: | |
| # Default to random | |
| return [random.randint(0, 1) for _ in range(length)] | |
| def _generate_safety_controlled_sequence(self, target_negentropy: float, | |
| target_complexity: float, length: int = 256) -> List[int]: | |
| """Generate bit sequence targeting specific safety metrics.""" | |
| # Start with pattern based on targets | |
| if target_negentropy < 0.3: # Low entropy - more structure | |
| base_pattern = [0] * (length // 2) + [1] * (length // 2) | |
| elif target_negentropy > 0.7: # High entropy - more randomness | |
| base_pattern = [random.randint(0, 1) for _ in range(length)] | |
| else: # Medium entropy - mixed | |
| block_size = max(1, int(10 * (1 - target_complexity))) | |
| base_pattern = [] | |
| current = 0 | |
| for i in range(length): | |
| if i % block_size == 0: | |
| current = random.randint(0, 1) | |
| base_pattern.append(current) | |
| # Add noise based on complexity target | |
| noise_level = max(0.1, target_complexity) | |
| final_pattern = [] | |
| for bit in base_pattern: | |
| if random.random() < noise_level: | |
| final_pattern.append(1 - bit) # Flip bit | |
| else: | |
| final_pattern.append(bit) | |
| return final_pattern | |
| def build_complete_dataset(self, source_texts: Optional[List[str]] = None) -> DatasetDict: | |
| """Build the complete BitTransformerLM dataset.""" | |
| print("🚀 Building BitTransformerLM Dataset...") | |
| # Use default texts if none provided | |
| if source_texts is None: | |
| source_texts = self._get_default_texts() | |
| all_samples = [] | |
| # 1. Text-to-bits conversion (40% of dataset) | |
| print("📝 Generating text-to-bits samples...") | |
| text_samples = self.generate_text_to_bits_data(source_texts[:10000]) | |
| all_samples.extend(text_samples) | |
| # 2. Synthetic patterns (30% of dataset) | |
| print("🎨 Generating synthetic patterns...") | |
| synthetic_samples = self.generate_synthetic_patterns(7500) | |
| all_samples.extend(synthetic_samples) | |
| # 3. Safety benchmarks (20% of dataset) | |
| print("🛡️ Generating safety benchmarks...") | |
| safety_samples = self.generate_safety_benchmarks(5000) | |
| all_samples.extend(safety_samples) | |
| # 4. Compression variants (10% of dataset) | |
| print("🗜️ Generating compression variants...") | |
| compression_samples = self.generate_compression_variants(text_samples[:1000]) | |
| all_samples.extend(compression_samples) | |
| # Split into train/validation/test | |
| random.shuffle(all_samples) | |
| total = len(all_samples) | |
| train_split = int(0.8 * total) | |
| val_split = int(0.9 * total) | |
| train_data = all_samples[:train_split] | |
| val_data = all_samples[train_split:val_split] | |
| test_data = all_samples[val_split:] | |
| # Create HuggingFace datasets | |
| dataset_dict = DatasetDict({ | |
| 'train': Dataset.from_list(train_data), | |
| 'validation': Dataset.from_list(val_data), | |
| 'test': Dataset.from_list(test_data) | |
| }) | |
| print(f"✅ Dataset built: {len(train_data)} train, {len(val_data)} val, {len(test_data)} test") | |
| return dataset_dict | |
| def _get_default_texts(self) -> List[str]: | |
| """Get default text corpus for bit conversion.""" | |
| # Sample texts covering various domains | |
| texts = [ | |
| "The quick brown fox jumps over the lazy dog.", | |
| "In the beginning was the Word, and the Word was with God.", | |
| "To be or not to be, that is the question.", | |
| "I think, therefore I am.", | |
| "The only thing we have to fear is fear itself.", | |
| "Ask not what your country can do for you.", | |
| "E = mc²", | |
| "The mitochondria is the powerhouse of the cell.", | |
| "SELECT * FROM users WHERE active = 1;", | |
| "def fibonacci(n): return n if n < 2 else fibonacci(n-1) + fibonacci(n-2)", | |
| "Binary trees are hierarchical data structures.", | |
| "The entropy of a system tends to increase over time.", | |
| ] | |
| # Expand with variations and combinations | |
| expanded_texts = texts.copy() | |
| for i in range(500): # Generate more samples | |
| # Combine random texts | |
| combined = " ".join(random.sample(texts, random.randint(2, 4))) | |
| expanded_texts.append(combined) | |
| # Add technical variations | |
| if i % 50 == 0: | |
| expanded_texts.append(f"Sample {i}: " + random.choice(texts)) | |
| return expanded_texts | |
| def upload_to_huggingface(self, dataset: DatasetDict, | |
| private: bool = True) -> str: | |
| """Upload dataset to HuggingFace Hub.""" | |
| print(f"🌐 Uploading to HuggingFace: {self.repo_id}") | |
| try: | |
| # Create repository | |
| create_repo( | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| private=private, | |
| exist_ok=True, | |
| token=self.hf_token | |
| ) | |
| # Add dataset metadata | |
| dataset_info = { | |
| "dataset_info": self.config, | |
| "splits": { | |
| "train": len(dataset["train"]), | |
| "validation": len(dataset["validation"]), | |
| "test": len(dataset["test"]) | |
| }, | |
| "features": { | |
| "id": "string", | |
| "bit_sequence": "list of integers (0/1)", | |
| "sequence_length": "integer", | |
| "negentropy": "float", | |
| "lz_complexity": "float", | |
| "category": "string", | |
| "has_parity": "boolean" | |
| }, | |
| "usage_notes": [ | |
| "Optimized for BitTransformerLM bit-native training", | |
| "All sequences include parity protection", | |
| "Safety metrics (K/C/S) computed for each sample", | |
| "Supports progressive curriculum learning" | |
| ] | |
| } | |
| # Push dataset with metadata | |
| dataset.push_to_hub( | |
| repo_id=self.repo_id, | |
| token=self.hf_token, | |
| private=private | |
| ) | |
| # Upload additional metadata | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: | |
| json.dump(dataset_info, f, indent=2) | |
| self.api.upload_file( | |
| path_or_fileobj=f.name, | |
| path_in_repo="dataset_info.json", | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| token=self.hf_token | |
| ) | |
| print(f"✅ Dataset uploaded successfully to: https://huggingface.co/datasets/{self.repo_id}") | |
| return f"https://huggingface.co/datasets/{self.repo_id}" | |
| except Exception as e: | |
| print(f"❌ Upload failed: {e}") | |
| raise | |
| def create_bittransformerlm_dataset(hf_token: str, | |
| repo_id: str = "BitTransformerLM", | |
| source_texts: Optional[List[str]] = None) -> str: | |
| """ | |
| Convenience function to create and upload BitTransformerLM dataset. | |
| Args: | |
| hf_token: HuggingFace access token | |
| repo_id: Dataset repository ID | |
| source_texts: Optional list of source texts for conversion | |
| Returns: | |
| URL to the uploaded dataset | |
| """ | |
| builder = BitTransformerDatasetBuilder(hf_token, repo_id) | |
| dataset = builder.build_complete_dataset(source_texts) | |
| return builder.upload_to_huggingface(dataset, private=True) |