RaveAI
This is my model (490M) trained on 7.5B tokens using own dataset. The model is trained to solve simple mathematics.
The model has a context of 64 tokens.
Format: Alpaca (reduced)
### Instruction: INSTRUCTION ### Response: RESPONSE
Credits
Ttimofeyka - model trainer
TechnoChicken - creator of the dataset
Example of script to run model
import pickle
import torch
import tiktoken
from model import GPTConfig, GPT
max_new_tokens = 30
temperature = 0.7
top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability
seed = 1337
device = 'cuda' # or write 'cpu' if you don't have GPU
dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
compile = True
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
ckpt_path = 'ckpt_f16.pt'
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint['model_args'])
model = GPT(gptconf)
state_dict = checkpoint['model']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
if k.startswith(unwanted_prefix):
state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
model.load_state_dict(state_dict)
model.eval()
model.to(device)
if compile:
model = torch.compile(model)
enc = tiktoken.get_encoding("gpt2")
encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
decode = lambda l: enc.decode(l)
with torch.no_grad():
print('---------------')
print("INPUT:")
sample = "### Instruction: "+input()+" ### Response:"
x = (torch.tensor(encode(sample), dtype=torch.long, device=device)[None, ...])
y = model.generate(x, 10, temperature=temperature, top_k=top_k)
print('---------------')
print("OUTPUT:")
print(decode(y[0].tolist()))
- Downloads last month
- 0
Unable to determine this model's library. Check the
docs
.