Spaces:

jefsnacker
/

surnamerator

Runtime error

App Files Files Community

surnamerator / app.py

jefsnacker

updated weights for first rev model

8b07bee almost 2 years ago

raw

history blame contribute delete

15.2 kB

	import math
	import yaml

	import gradio as gr

	import huggingface_hub

	import torch
	import torch.nn as nn
	import torch.nn.functional as F


	mlp_config_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"torch_mlp_config.yaml")

	mlp_weights_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"mlp_weights.pt")

	wavenet_config_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"wavenet_config.yaml")

	wavenet_weights_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"wavenet_weights.pt")

	gpt_micro_config_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"micro_gpt_config.yaml")

	gpt_micro_weights_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"micro_gpt_weights.pt")

	gpt_rev_config_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"rev_gpt_config.yaml")

	gpt_rev_weights_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"rev_gpt_weights.pt")

	gpt_first_rev_config_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"first_name_gpt_config.yaml")

	gpt_first_rev_weights_path = huggingface_hub.hf_hub_download(
	"jefsnacker/surname_generator",
	"first_name_gpt_weights.pt")

	with open(mlp_config_path, 'r') as file:
	mlp_config = yaml.safe_load(file)

	with open(wavenet_config_path, 'r') as file:
	wavenet_config = yaml.safe_load(file)

	with open(gpt_micro_config_path, 'r') as file:
	gpt_micro_config = yaml.safe_load(file)

	with open(gpt_rev_config_path, 'r') as file:
	gpt_rev_config = yaml.safe_load(file)

	with open(gpt_first_rev_config_path, 'r') as file:
	gpt_first_rev_config = yaml.safe_load(file)

	##################################################################################
	## MLP
	##################################################################################

	class MLP(nn.Module):
	def __init__(self, num_char, hidden_nodes, embeddings, window, num_layers):
	super(MLP, self).__init__()

	self.window = window
	self.hidden_nodes = hidden_nodes
	self.embeddings = embeddings

	self.C = nn.Parameter(torch.randn((num_char, embeddings)) * 0.1, requires_grad=True)

	self.first = nn.Linear(embeddings*window, hidden_nodes)

	self.layers = nn.Sequential()
	for i in range(num_layers):
	self.layers = self.layers.extend(nn.Sequential(
	nn.Linear(hidden_nodes, hidden_nodes, bias=False),
	nn.BatchNorm1d(hidden_nodes),
	nn.Tanh()))

	self.final = nn.Linear(hidden_nodes, num_char)

	def forward(self, x):
	x = self.C[x]
	x = self.first(x.view(-1, self.window*self.embeddings))

	x = self.layers(x)

	x = self.final(x)
	return x

	def sample_char(self, x):
	logits = self(x)
	probs = F.softmax(logits, dim=1)
	return torch.multinomial(probs, num_samples=1).item()

	mlp = MLP(mlp_config['num_char'],
	mlp_config['hidden_nodes'],
	mlp_config['embeddings'],
	mlp_config['window'],
	mlp_config['num_layers'])

	mlp.load_state_dict(torch.load(mlp_weights_path))
	mlp.eval()

	##################################################################################
	## WaveNet
	##################################################################################

	class WaveNet(nn.Module):
	def __init__(self, num_char, hidden_nodes, embeddings, window, num_layers):
	super(WaveNet, self).__init__()

	self.window = window
	self.hidden_nodes = hidden_nodes
	self.embeddings = embeddings

	self.layers = nn.Sequential(
	nn.Embedding(num_char, embeddings)
	)

	for i in range(num_layers):
	if i == 0:
	nodes = window
	else:
	nodes = hidden_nodes

	self.layers = self.layers.extend(nn.Sequential(
	nn.Conv1d(nodes, hidden_nodes, kernel_size=2, stride=1, bias=False),
	nn.BatchNorm1d(hidden_nodes),
	nn.Tanh()))

	self.layers = self.layers.extend(nn.Sequential(
	nn.Flatten(),
	nn.Linear(hidden_nodes*(embeddings-num_layers), num_char)
	))

	def forward(self, x):
	return self.layers(x)

	def sample_char(self, x):
	logits = self(x)
	probs = F.softmax(logits, dim=1)
	return torch.multinomial(probs, num_samples=1).item()

	wavenet = WaveNet(wavenet_config['num_char'],
	wavenet_config['hidden_nodes'],
	wavenet_config['embeddings'],
	wavenet_config['window'],
	wavenet_config['num_layers'])
	wavenet.load_state_dict(torch.load(wavenet_weights_path))
	wavenet.eval()

	##################################################################################
	## Transformer
	##################################################################################

	class NewGELU(nn.Module):
	"""
	Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
	"""
	def forward(self, x):
	return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))

	class GptAttention(nn.Module):
	"""
	For this attention module k = v = q are all the same.
	It's for encoder/decoder only transfomers.
	"""
	def __init__(self, config):
	super(GptAttention, self).__init__()
	self.config = config

	assert self.config["d_model"] % self.config["heads"] == 0
	self.heads = self.config["heads"]

	self.w_attn = nn.Linear(self.config["d_model"], 3*self.config["d_model"])
	self.head = nn.Linear(self.config["d_model"], self.config["d_model"])

	self.attn_dropout = nn.Dropout(config["attn_pdrop"])
	self.resid_dropout = nn.Dropout(config["resid_pdrop"])

	# causal mask to ensure that attention is only applied to the left in the input sequence
	self.register_buffer(
	"bias",
	torch.tril(
	torch.ones(
	self.config["window"],
	self.config["window"])
	).view(1, 1, self.config["window"], self.config["window"])
	)

	def forward(self, x):
	B, window, embs = x.shape

	q, v, k = self.w_attn(x).split(self.config["d_model"], dim=2)

	# (B, heads, window, embs)
	q = q.view(
	B,
	window,
	self.config["heads"],
	embs // self.config["heads"]
	).transpose(1, 2)
	k = k.view(
	B,
	window,
	self.config["heads"],
	embs // self.config["heads"]
	).transpose(1, 2)
	v = v.view(
	B,
	window,
	self.config["heads"],
	embs // self.config["heads"]
	).transpose(1, 2)

	# Self-attend: (B, heads, window, embs) x (B, heads, embs, window) -> (B, heads, window, window)
	scores = q @ k.transpose(-2, -1) / math.sqrt(k.size(-1))
	mask = scores.masked_fill(self.bias[:,:,:window,:window] == 0, float('-inf'))
	probs = F.softmax(mask, dim=-1)
	attn = self.attn_dropout(probs)
	attn = probs @ v
	attn = attn.transpose(1, 2).contiguous().view(B, window, embs)

	return self.resid_dropout(self.head(attn))

	class FeedForward(nn.Module):
	def __init__(self, config):
	super(FeedForward, self).__init__()
	self.l1 = nn.Linear(config["d_model"], 4*config["d_model"])
	self.l2 = nn.Linear(4*config["d_model"], config["d_model"])
	self.dropout = nn.Dropout(config["resid_pdrop"])

	def forward(self, x):
	x = NewGELU()(self.l1(x))
	return self.dropout(self.l2(x))

	class Block(nn.Module):
	def __init__(self, config):
	super(Block, self).__init__()
	self.attn = GptAttention(config)
	self.norm1 = nn.LayerNorm(config["d_model"])
	self.ff = FeedForward(config)
	self.norm2 = nn.LayerNorm(config["d_model"])

	def forward(self, x):
	x = self.norm1(x + self.attn(x))
	x = self.norm2(x + self.ff(x))
	return x

	class GPT(nn.Module):
	def __init__(self, config):
	super(GPT, self).__init__()
	self.config = config

	self.vocab_emb = nn.Embedding(self.config["vocab"], self.config["d_model"])
	self.pos_emb = nn.Embedding(self.config["window"], self.config["d_model"])
	self.emb_dropout = nn.Dropout(config["embd_pdrop"])

	self.blocks = nn.ModuleList([Block(self.config) for _ in range(self.config["blocks"])])
	self.head_layer_norm = nn.LayerNorm(config["d_model"])
	self.head = nn.Linear(self.config["d_model"], self.config["vocab"])

	def forward(self, x):
	vocab_emb = self.vocab_emb(x)
	pos_emb = self.pos_emb(torch.arange(0, x.shape[1], dtype=torch.long, device=x.device))

	x = self.emb_dropout(vocab_emb + pos_emb)

	for b in self.blocks:
	x = b(x)

	x = self.head_layer_norm(x)
	x = self.head(x)

	return x

	def configure_opt(self):
	p_decay = set()
	p_no_decay = set()
	whitelist_weight_modules = (torch.nn.Linear, )
	blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
	for mn, m in self.named_modules():
	for pn, p in m.named_parameters():
	fpn = '%s.%s' % (mn, pn) if mn else pn # full param name
	# random note: because named_modules and named_parameters are recursive
	# we will see the same tensors p many many times. but doing it this way
	# allows us to know which parent module any tensor p belongs to...
	if pn.endswith('bias'):
	# all biases will not be decayed
	p_no_decay.add(fpn)
	elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
	# weights of whitelist modules will be weight decayed
	p_decay.add(fpn)
	elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
	# weights of blacklist modules will NOT be weight decayed
	p_no_decay.add(fpn)

	# validate that we considered every parameter
	param_dict = {pn: p for pn, p in self.named_parameters()}
	inter_params = p_decay & p_no_decay
	union_params = p_decay \| p_no_decay
	assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
	assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
	% (str(param_dict.keys() - union_params), )

	# create the pytorch optimizer object
	optim_groups = [
	{"params": [param_dict[pn] for pn in sorted(list(p_decay))], "weight_decay": self.config["weight_decay"]},
	{"params": [param_dict[pn] for pn in sorted(list(p_no_decay))], "weight_decay": 0.0},
	]
	optimizer = torch.optim.AdamW(
	optim_groups,
	lr=self.config["lr"],
	betas=(self.config["b1"], self.config["b2"])
	)
	return optimizer

	def sample_char(self, x):
	logits = self(x)
	probs = F.softmax(logits[:,-1,:], dim=1)
	return torch.multinomial(probs, num_samples=1).item()

	gpt_micro = GPT(gpt_micro_config)
	gpt_micro.load_state_dict(torch.load(gpt_micro_weights_path))
	gpt_micro.eval()

	gpt_rev = GPT(gpt_rev_config)
	gpt_rev.load_state_dict(torch.load(gpt_rev_weights_path))
	gpt_rev.eval()

	gpt_first_rev = GPT(gpt_first_rev_config)
	gpt_first_rev.load_state_dict(torch.load(gpt_first_rev_weights_path))
	gpt_first_rev.eval()

	##################################################################################
	## Gradio App
	##################################################################################

	def generate_names(name_start, name_end, number_of_names, model):
	if number_of_names < 0:
	return "Error: Please enter a positive number of names to generate!"

	# Select model
	if model == "MLP":
	config = mlp_config
	sample_fcn = mlp.sample_char
	elif model == "WaveNet":
	config = wavenet_config
	sample_fcn = wavenet.sample_char
	elif model == "GPT Micro":
	config = gpt_micro_config
	sample_fcn = gpt_micro.sample_char
	elif model == "GPT Rev":
	config = gpt_rev_config
	sample_fcn = gpt_rev.sample_char
	elif model == "GPT First Rev":
	config = gpt_first_rev_config
	sample_fcn = gpt_first_rev.sample_char
	else:
	return "Error: Model not selected"

	stoi = config['stoi']
	itos = {s:i for i,s in stoi.items()}

	output = ""

	# Sanitize user inputs, and append errors to output
	name_end = name_end.lower()
	name_start = name_start.lower()

	for c in name_end:
	if c not in stoi:
	return "Please change name end. \"" + c + "\" not included in the training set."

	for c in name_start:
	if c not in stoi:
	return "Please change name start. \"" + c + "\" not included in the training set."

	if "num_final_chars_in_dataset" in config and len(name_end) > config["num_final_chars_in_dataset"]:
	name_end = name_end[-config["num_final_chars_in_dataset"]:]
	output += "Only accepts up to " + str(config["num_final_chars_in_dataset"]) + " final chars. Using: " + str(name_end) + "\n"

	elif "num_final_chars_in_dataset" not in config and name_end != "":
	output += "Final chars not used. Need to use a \"Rev\" model trained with this feature.\n"


	## Print requested names
	for _ in range((int)(number_of_names)):
	name = ""
	context = [0] * config['window']

	if "num_final_chars_in_dataset" in config:
	for c in name_end:
	context = context[1:] + [stoi[c]]
	context = context[1:] + [stoi['.']]

	# Initialize name with user input
	for c in name_start:
	name += c
	context = context[1:] + [stoi[c]]

	# Run inference to finish off the name
	while True:
	x = torch.tensor(context).view(1, -1)
	ix = sample_fcn(x)

	context = context[1:] + [ix]
	name += itos[ix]

	if ix == 0:
	break

	output += name + "\n"

	return output

	demo = gr.Interface(
	fn=generate_names,
	inputs=[
	gr.Textbox(placeholder="Start name with..."),
	gr.Textbox(placeholder="End name with... (only works for rev model)"),
	gr.Number(value=5),
	gr.Dropdown(["MLP", "WaveNet", "GPT Micro", "GPT Rev", "GPT First Rev"], value="GPT Rev"),
	],
	outputs="text",
	)
	demo.launch()