--- license: mpl-2.0 language: - en --- # RaveAI This is my model (490M) trained on 7.5B tokens using own dataset. The model is trained to solve simple mathematics. The model has a context of 64 tokens. ### Format: Alpaca (reduced) ``` ### Instruction: INSTRUCTION ### Response: RESPONSE ``` ### Credits Ttimofeyka - **model trainer** TechnoChicken - **creator of the dataset** ### Example of script to run model ```py import pickle import torch import tiktoken from model import GPTConfig, GPT max_new_tokens = 30 temperature = 0.7 top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability seed = 1337 device = 'cuda' # or write 'cpu' if you don't have GPU dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16' compile = True torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True ckpt_path = 'ckpt_f16.pt' checkpoint = torch.load(ckpt_path, map_location=device) gptconf = GPTConfig(**checkpoint['model_args']) model = GPT(gptconf) state_dict = checkpoint['model'] unwanted_prefix = '_orig_mod.' for k,v in list(state_dict.items()): if k.startswith(unwanted_prefix): state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k) model.load_state_dict(state_dict) model.eval() model.to(device) if compile: model = torch.compile(model) enc = tiktoken.get_encoding("gpt2") encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"}) decode = lambda l: enc.decode(l) with torch.no_grad(): print('---------------') print("INPUT:") sample = "### Instruction: "+input()+" ### Response:" x = (torch.tensor(encode(sample), dtype=torch.long, device=device)[None, ...]) y = model.generate(x, 10, temperature=temperature, top_k=top_k) print('---------------') print("OUTPUT:") print(decode(y[0].tolist())) ```