Model Card for Model ID

~400M parameter model trained with 1M rows of wikipedia.en dataset

epochs:1

Trained on RTX 2060 6GB VRAM 54 GB RAM

GaLore: Memory-Efficient LLM Training by Gradient Low-Rank Projection.

import torch
import argparse
from transformers import AutoModelForCausalLM, AutoTokenizer

def run_inference(model_id, prompt, max_tokens=50):
    print(f"Fetching model '{model_id}' from Hugging Face Hub...")
    
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, 
        torch_dtype=torch.float16, 
        device_map="auto"
    )
    
    print(f"\nPrompt: {prompt}")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Llama models do not use token_type_ids
    inputs.pop("token_type_ids", None)

    # Generate
    print("Generating...")
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=max_tokens, 
            do_sample=True, 
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )
    
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"\nResponse:\n{result}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_id", type=str, default="uisikdag/umitllama04b-galore-english")
    parser.add_argument("--prompt", type=str, default="The future of artificial intelligence is")
    args = parser.parse_args()
    
    run_inference(args.model_id, args.prompt)
Downloads last month
2
Safetensors
Model size
0.4B params
Tensor type
F32
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Dataset used to train uisikdag/umitllama04b-galore-english

Collection including uisikdag/umitllama04b-galore-english