Spaces:
Runtime error
Runtime error
import math | |
import yaml | |
import gradio as gr | |
import huggingface_hub | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
mlp_config_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"torch_mlp_config.yaml") | |
mlp_weights_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"mlp_weights.pt") | |
wavenet_config_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"wavenet_config.yaml") | |
wavenet_weights_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"wavenet_weights.pt") | |
gpt_micro_config_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"micro_gpt_config.yaml") | |
gpt_micro_weights_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"micro_gpt_weights.pt") | |
gpt_rev_config_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"rev_gpt_config.yaml") | |
gpt_rev_weights_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"rev_gpt_weights.pt") | |
gpt_first_rev_config_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"first_name_gpt_config.yaml") | |
gpt_first_rev_weights_path = huggingface_hub.hf_hub_download( | |
"jefsnacker/surname_generator", | |
"first_name_gpt_weights.pt") | |
with open(mlp_config_path, 'r') as file: | |
mlp_config = yaml.safe_load(file) | |
with open(wavenet_config_path, 'r') as file: | |
wavenet_config = yaml.safe_load(file) | |
with open(gpt_micro_config_path, 'r') as file: | |
gpt_micro_config = yaml.safe_load(file) | |
with open(gpt_rev_config_path, 'r') as file: | |
gpt_rev_config = yaml.safe_load(file) | |
with open(gpt_first_rev_config_path, 'r') as file: | |
gpt_first_rev_config = yaml.safe_load(file) | |
################################################################################## | |
## MLP | |
################################################################################## | |
class MLP(nn.Module): | |
def __init__(self, num_char, hidden_nodes, embeddings, window, num_layers): | |
super(MLP, self).__init__() | |
self.window = window | |
self.hidden_nodes = hidden_nodes | |
self.embeddings = embeddings | |
self.C = nn.Parameter(torch.randn((num_char, embeddings)) * 0.1, requires_grad=True) | |
self.first = nn.Linear(embeddings*window, hidden_nodes) | |
self.layers = nn.Sequential() | |
for i in range(num_layers): | |
self.layers = self.layers.extend(nn.Sequential( | |
nn.Linear(hidden_nodes, hidden_nodes, bias=False), | |
nn.BatchNorm1d(hidden_nodes), | |
nn.Tanh())) | |
self.final = nn.Linear(hidden_nodes, num_char) | |
def forward(self, x): | |
x = self.C[x] | |
x = self.first(x.view(-1, self.window*self.embeddings)) | |
x = self.layers(x) | |
x = self.final(x) | |
return x | |
def sample_char(self, x): | |
logits = self(x) | |
probs = F.softmax(logits, dim=1) | |
return torch.multinomial(probs, num_samples=1).item() | |
mlp = MLP(mlp_config['num_char'], | |
mlp_config['hidden_nodes'], | |
mlp_config['embeddings'], | |
mlp_config['window'], | |
mlp_config['num_layers']) | |
mlp.load_state_dict(torch.load(mlp_weights_path)) | |
mlp.eval() | |
################################################################################## | |
## WaveNet | |
################################################################################## | |
class WaveNet(nn.Module): | |
def __init__(self, num_char, hidden_nodes, embeddings, window, num_layers): | |
super(WaveNet, self).__init__() | |
self.window = window | |
self.hidden_nodes = hidden_nodes | |
self.embeddings = embeddings | |
self.layers = nn.Sequential( | |
nn.Embedding(num_char, embeddings) | |
) | |
for i in range(num_layers): | |
if i == 0: | |
nodes = window | |
else: | |
nodes = hidden_nodes | |
self.layers = self.layers.extend(nn.Sequential( | |
nn.Conv1d(nodes, hidden_nodes, kernel_size=2, stride=1, bias=False), | |
nn.BatchNorm1d(hidden_nodes), | |
nn.Tanh())) | |
self.layers = self.layers.extend(nn.Sequential( | |
nn.Flatten(), | |
nn.Linear(hidden_nodes*(embeddings-num_layers), num_char) | |
)) | |
def forward(self, x): | |
return self.layers(x) | |
def sample_char(self, x): | |
logits = self(x) | |
probs = F.softmax(logits, dim=1) | |
return torch.multinomial(probs, num_samples=1).item() | |
wavenet = WaveNet(wavenet_config['num_char'], | |
wavenet_config['hidden_nodes'], | |
wavenet_config['embeddings'], | |
wavenet_config['window'], | |
wavenet_config['num_layers']) | |
wavenet.load_state_dict(torch.load(wavenet_weights_path)) | |
wavenet.eval() | |
################################################################################## | |
## Transformer | |
################################################################################## | |
class NewGELU(nn.Module): | |
""" | |
Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 | |
""" | |
def forward(self, x): | |
return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) | |
class GptAttention(nn.Module): | |
""" | |
For this attention module k = v = q are all the same. | |
It's for encoder/decoder only transfomers. | |
""" | |
def __init__(self, config): | |
super(GptAttention, self).__init__() | |
self.config = config | |
assert self.config["d_model"] % self.config["heads"] == 0 | |
self.heads = self.config["heads"] | |
self.w_attn = nn.Linear(self.config["d_model"], 3*self.config["d_model"]) | |
self.head = nn.Linear(self.config["d_model"], self.config["d_model"]) | |
self.attn_dropout = nn.Dropout(config["attn_pdrop"]) | |
self.resid_dropout = nn.Dropout(config["resid_pdrop"]) | |
# causal mask to ensure that attention is only applied to the left in the input sequence | |
self.register_buffer( | |
"bias", | |
torch.tril( | |
torch.ones( | |
self.config["window"], | |
self.config["window"]) | |
).view(1, 1, self.config["window"], self.config["window"]) | |
) | |
def forward(self, x): | |
B, window, embs = x.shape | |
q, v, k = self.w_attn(x).split(self.config["d_model"], dim=2) | |
# (B, heads, window, embs) | |
q = q.view( | |
B, | |
window, | |
self.config["heads"], | |
embs // self.config["heads"] | |
).transpose(1, 2) | |
k = k.view( | |
B, | |
window, | |
self.config["heads"], | |
embs // self.config["heads"] | |
).transpose(1, 2) | |
v = v.view( | |
B, | |
window, | |
self.config["heads"], | |
embs // self.config["heads"] | |
).transpose(1, 2) | |
# Self-attend: (B, heads, window, embs) x (B, heads, embs, window) -> (B, heads, window, window) | |
scores = q @ k.transpose(-2, -1) / math.sqrt(k.size(-1)) | |
mask = scores.masked_fill(self.bias[:,:,:window,:window] == 0, float('-inf')) | |
probs = F.softmax(mask, dim=-1) | |
attn = self.attn_dropout(probs) | |
attn = probs @ v | |
attn = attn.transpose(1, 2).contiguous().view(B, window, embs) | |
return self.resid_dropout(self.head(attn)) | |
class FeedForward(nn.Module): | |
def __init__(self, config): | |
super(FeedForward, self).__init__() | |
self.l1 = nn.Linear(config["d_model"], 4*config["d_model"]) | |
self.l2 = nn.Linear(4*config["d_model"], config["d_model"]) | |
self.dropout = nn.Dropout(config["resid_pdrop"]) | |
def forward(self, x): | |
x = NewGELU()(self.l1(x)) | |
return self.dropout(self.l2(x)) | |
class Block(nn.Module): | |
def __init__(self, config): | |
super(Block, self).__init__() | |
self.attn = GptAttention(config) | |
self.norm1 = nn.LayerNorm(config["d_model"]) | |
self.ff = FeedForward(config) | |
self.norm2 = nn.LayerNorm(config["d_model"]) | |
def forward(self, x): | |
x = self.norm1(x + self.attn(x)) | |
x = self.norm2(x + self.ff(x)) | |
return x | |
class GPT(nn.Module): | |
def __init__(self, config): | |
super(GPT, self).__init__() | |
self.config = config | |
self.vocab_emb = nn.Embedding(self.config["vocab"], self.config["d_model"]) | |
self.pos_emb = nn.Embedding(self.config["window"], self.config["d_model"]) | |
self.emb_dropout = nn.Dropout(config["embd_pdrop"]) | |
self.blocks = nn.ModuleList([Block(self.config) for _ in range(self.config["blocks"])]) | |
self.head_layer_norm = nn.LayerNorm(config["d_model"]) | |
self.head = nn.Linear(self.config["d_model"], self.config["vocab"]) | |
def forward(self, x): | |
vocab_emb = self.vocab_emb(x) | |
pos_emb = self.pos_emb(torch.arange(0, x.shape[1], dtype=torch.long, device=x.device)) | |
x = self.emb_dropout(vocab_emb + pos_emb) | |
for b in self.blocks: | |
x = b(x) | |
x = self.head_layer_norm(x) | |
x = self.head(x) | |
return x | |
def configure_opt(self): | |
p_decay = set() | |
p_no_decay = set() | |
whitelist_weight_modules = (torch.nn.Linear, ) | |
blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding) | |
for mn, m in self.named_modules(): | |
for pn, p in m.named_parameters(): | |
fpn = '%s.%s' % (mn, pn) if mn else pn # full param name | |
# random note: because named_modules and named_parameters are recursive | |
# we will see the same tensors p many many times. but doing it this way | |
# allows us to know which parent module any tensor p belongs to... | |
if pn.endswith('bias'): | |
# all biases will not be decayed | |
p_no_decay.add(fpn) | |
elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules): | |
# weights of whitelist modules will be weight decayed | |
p_decay.add(fpn) | |
elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules): | |
# weights of blacklist modules will NOT be weight decayed | |
p_no_decay.add(fpn) | |
# validate that we considered every parameter | |
param_dict = {pn: p for pn, p in self.named_parameters()} | |
inter_params = p_decay & p_no_decay | |
union_params = p_decay | p_no_decay | |
assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), ) | |
assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \ | |
% (str(param_dict.keys() - union_params), ) | |
# create the pytorch optimizer object | |
optim_groups = [ | |
{"params": [param_dict[pn] for pn in sorted(list(p_decay))], "weight_decay": self.config["weight_decay"]}, | |
{"params": [param_dict[pn] for pn in sorted(list(p_no_decay))], "weight_decay": 0.0}, | |
] | |
optimizer = torch.optim.AdamW( | |
optim_groups, | |
lr=self.config["lr"], | |
betas=(self.config["b1"], self.config["b2"]) | |
) | |
return optimizer | |
def sample_char(self, x): | |
logits = self(x) | |
probs = F.softmax(logits[:,-1,:], dim=1) | |
return torch.multinomial(probs, num_samples=1).item() | |
gpt_micro = GPT(gpt_micro_config) | |
gpt_micro.load_state_dict(torch.load(gpt_micro_weights_path)) | |
gpt_micro.eval() | |
gpt_rev = GPT(gpt_rev_config) | |
gpt_rev.load_state_dict(torch.load(gpt_rev_weights_path)) | |
gpt_rev.eval() | |
gpt_first_rev = GPT(gpt_first_rev_config) | |
gpt_first_rev.load_state_dict(torch.load(gpt_first_rev_weights_path)) | |
gpt_first_rev.eval() | |
################################################################################## | |
## Gradio App | |
################################################################################## | |
def generate_names(name_start, name_end, number_of_names, model): | |
if number_of_names < 0: | |
return "Error: Please enter a positive number of names to generate!" | |
# Select model | |
if model == "MLP": | |
config = mlp_config | |
sample_fcn = mlp.sample_char | |
elif model == "WaveNet": | |
config = wavenet_config | |
sample_fcn = wavenet.sample_char | |
elif model == "GPT Micro": | |
config = gpt_micro_config | |
sample_fcn = gpt_micro.sample_char | |
elif model == "GPT Rev": | |
config = gpt_rev_config | |
sample_fcn = gpt_rev.sample_char | |
elif model == "GPT First Rev": | |
config = gpt_first_rev_config | |
sample_fcn = gpt_first_rev.sample_char | |
else: | |
return "Error: Model not selected" | |
stoi = config['stoi'] | |
itos = {s:i for i,s in stoi.items()} | |
output = "" | |
# Sanitize user inputs, and append errors to output | |
name_end = name_end.lower() | |
name_start = name_start.lower() | |
for c in name_end: | |
if c not in stoi: | |
return "Please change name end. \"" + c + "\" not included in the training set." | |
for c in name_start: | |
if c not in stoi: | |
return "Please change name start. \"" + c + "\" not included in the training set." | |
if "num_final_chars_in_dataset" in config and len(name_end) > config["num_final_chars_in_dataset"]: | |
name_end = name_end[-config["num_final_chars_in_dataset"]:] | |
output += "Only accepts up to " + str(config["num_final_chars_in_dataset"]) + " final chars. Using: " + str(name_end) + "\n" | |
elif "num_final_chars_in_dataset" not in config and name_end != "": | |
output += "Final chars not used. Need to use a \"Rev\" model trained with this feature.\n" | |
## Print requested names | |
for _ in range((int)(number_of_names)): | |
name = "" | |
context = [0] * config['window'] | |
if "num_final_chars_in_dataset" in config: | |
for c in name_end: | |
context = context[1:] + [stoi[c]] | |
context = context[1:] + [stoi['.']] | |
# Initialize name with user input | |
for c in name_start: | |
name += c | |
context = context[1:] + [stoi[c]] | |
# Run inference to finish off the name | |
while True: | |
x = torch.tensor(context).view(1, -1) | |
ix = sample_fcn(x) | |
context = context[1:] + [ix] | |
name += itos[ix] | |
if ix == 0: | |
break | |
output += name + "\n" | |
return output | |
demo = gr.Interface( | |
fn=generate_names, | |
inputs=[ | |
gr.Textbox(placeholder="Start name with..."), | |
gr.Textbox(placeholder="End name with... (only works for rev model)"), | |
gr.Number(value=5), | |
gr.Dropdown(["MLP", "WaveNet", "GPT Micro", "GPT Rev", "GPT First Rev"], value="GPT Rev"), | |
], | |
outputs="text", | |
) | |
demo.launch() |