HuggingFaceFW/fineweb-edu
Viewer • Updated • 3.5B • 456k • 1.15k
Glimmer-1 is the first model in the Glimmer series: a 11.9K parameter Llama-style transformer trained on 500K tokens of FineWeb-Edu. It is a SLM model exploring the lower bound of useful language model scale.
Glimmer-1-Base is an experimental research model. It has no supervised fine-tuning, is prone to incoherence, and is not suitable for any production use. SFT and CoT training are planned for future releases.
| Property | Value |
|---|---|
| Parameters | ~11,900 |
| Training Tokens | 500,000 (FineWeb-Edu) |
| Context Window | 512 tokens |
| Hardware | RTX 4070 SUPER |
| Status | Base only, no SFT |
| Parameter | Value |
|---|---|
| Architecture | Transformer Decoder (LlamaForCausalLM) |
| Hidden Dimension | 16 |
| Layers | 2 |
| Attention Heads | 4 |
| KV Heads | 1 (GQA) |
| MLP Intermediate Size | 24 (SiLU activation) |
| Context Length | 512 tokens |
| Vocabulary Size | 512 |
| Normalization | RMSNorm, eps 1e-06 |
| Position Encoding | RoPE (default) |
| Embeddings | Tied input / output |
Ensure you have your environment set up:
pip install torch transformers safetensors accelerate
"""
Inference pipeline framework for Glint-Research/Glimmer-1-Base.
Handles direct loading of structural safetensors and tokenization generation loops.
"""
import os
import json
import torch
import torch.nn.functional as F
from safetensors.torch import load_file
from transformers import LlamaConfig, LlamaForCausalLM, AutoTokenizer
class GlimmerInferencePipeline:
def __init__(self, model_path: str, device: str = None):
"""
Initializes the model structure and updates weights directly
from the local repository directory.
"""
if device is None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
else:
self.device = device
print(f"[*] Initializing Glimmer-1-Base runtime on engine: {self.device}")
config_file = os.path.join(model_path, "config.json")
if not os.path.exists(config_file):
raise FileNotFoundError(f"Could not locate config.json inside {model_path}")
with open(config_file, "r", encoding="utf-8") as f:
self.config_data = json.load(f)
self.config = LlamaConfig(**self.config_data)
print("[*] Loading tokenizer engine...")
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
print("[*] Loading underlying safetensors architecture...")
self.model = LlamaForCausalLM(self.config)
weights_file = os.path.join(model_path, "model.safetensors")
if os.path.exists(weights_file):
state_dict = load_file(weights_file, device="cpu")
self.model.load_state_dict(state_dict, strict=True)
else:
raise FileNotFoundError(f"Could not find model.safetensors weight matrix in {model_path}")
self.model.to(self.device)
self.model.eval()
print("[+] Model stack fully loaded and verified.")
@torch.inference_mode()
def generate(
self,
prompt: str,
max_new_tokens: int = 50,
temperature: float = 0.7,
top_k: int = 50
) -> str:
"""
Executes causal autoregressive generation loop.
"""
inputs = self.tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(self.device)
bos_token_id = self.config_data.get("bos_token_id", 1)
eos_token_id = self.config_data.get("eos_token_id", 2)
if input_ids.shape[1] == 0 or input_ids[0, 0] != bos_token_id:
bos_tensor = torch.tensor([[bos_token_id]], dtype=torch.long, device=self.device)
input_ids = torch.cat([bos_tensor, input_ids], dim=-1)
for _ in range(max_new_tokens):
outputs = self.model(input_ids)
next_token_logits = outputs.logits[:, -1, :]
if temperature > 0.0:
next_token_logits = next_token_logits / temperature
if top_k > 0:
indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1, None]
next_token_logits[indices_to_remove] = float('-inf')
probabilities = F.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probabilities, num_samples=1)
else:
next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
input_ids = torch.cat([input_ids, next_token], dim=-1)
if next_token.item() == eos_token_id:
break
# Transform resulting output block back into text
generated_output = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
return generated_output
if __name__ == "__main__":
# Point execution context directly to repository path files
# Replace '.' with historical snapshot paths if running externally
LOCAL_REPO_DIR = "."
try:
pipeline = GlimmerInferencePipeline(model_path=LOCAL_REPO_DIR)
sample_prompt = "Deep learning architecture optimization requires"
print(f"\n[Prompt Input]: {sample_prompt}")
generated_text = pipeline.generate(
prompt=sample_prompt,
max_new_tokens=32,
temperature=0.85
)
print(f"[Generated Response]: {generated_text}\n")
except Exception as e:
print(f"[-] Execution Error failed: {str(e)}")
print("[!] Ensure config.json, tokenizer.json, and model.safetensors are inside the execution directory.")
@misc{glimmer1base2026,
author = {CompactAI},
title = {Glimmer-1: An 11.9K-Parameter Llama-Style Transformer},
year = {2026},
publisher = {Glint Research},
url = {https://huggingface.co/Glint-Research}
}
Built by CompactAI — trained and made by Enderchefcoder
Small models trying their best since 2026.