possible to get a 1.1 and 3b version?
#3
by
LaferriereJC
- opened
would love to train one from scratch.
would this work?
from transformers import MistralConfig, AutoModelForCausalLM
import torch
import sys
config = MistralConfig(
hidden_size = 4096,
intermediate_size = 14336,
num_hidden_layers = 16,
num_attention_heads = 32,
num_key_value_heads = 8,
)
model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16)
print(f'Created a new model with {model.num_parameters()} parameters.')
#this config results in 3.75B parameters.
with torch.no_grad():
for name, param in model.named_parameters():
param.data = torch.zeros(size=param.size(), dtype=param.dtype)
model.save_pretrained(sys.argv[1])
LaferriereJC
changed discussion status to
closed