RaveAI

This is my model (490M) trained on 7.5B tokens using own dataset. The model is trained to solve simple mathematics.

The model has a context of 64 tokens.

Format: Alpaca (reduced)

### Instruction: INSTRUCTION ### Response: RESPONSE

Credits

Ttimofeyka - model trainer

TechnoChicken - creator of the dataset

Example of script to run model

import pickle
import torch
import tiktoken
from model import GPTConfig, GPT

max_new_tokens = 30
temperature = 0.7
top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability
seed = 1337
device = 'cuda' # or write 'cpu' if you don't have GPU
dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
compile = True

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

ckpt_path = 'ckpt_f16.pt'
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint['model_args'])
model = GPT(gptconf)
state_dict = checkpoint['model']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
model.load_state_dict(state_dict)

model.eval()
model.to(device)
if compile:
    model = torch.compile(model)

enc = tiktoken.get_encoding("gpt2")
encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
decode = lambda l: enc.decode(l)

with torch.no_grad():
    print('---------------')
    print("INPUT:")
    sample = "### Instruction: "+input()+" ### Response:"
    x = (torch.tensor(encode(sample), dtype=torch.long, device=device)[None, ...])
    y = model.generate(x, 10, temperature=temperature, top_k=top_k)
    print('---------------')
    print("OUTPUT:")
    print(decode(y[0].tolist()))