HiTZ
/

PL-BERT-wp-eu

Model card Files Files and versions

PL-BERT-wp-eu / util.py

Ander Arriandiaga

Init repo: configure LFS and ignore phonemizer/

2395c1f 4 days ago

history blame contribute delete

1.69 kB

	import os
	import yaml
	import torch
	from transformers import AlbertConfig, AlbertModel

	class CustomAlbert(AlbertModel):
	def forward(self, args, *kwargs):
	# Call the original forward method
	outputs = super().forward(args, *kwargs)

	# Only return the last_hidden_state
	return outputs.last_hidden_state


	def load_plbert(log_dir):
	config_path = os.path.join(log_dir, "config.yml")
	plbert_config = yaml.safe_load(open(config_path))

	albert_base_configuration = AlbertConfig(**plbert_config['model_params'])
	bert = CustomAlbert(albert_base_configuration)

	files = os.listdir(log_dir)
	ckpts = []
	for f in os.listdir(log_dir):
	if f.startswith("step_"): ckpts.append(f)

	iters = [int(f.split('_')[-1].split('.')[0]) for f in ckpts if os.path.isfile(os.path.join(log_dir, f))]
	iters = sorted(iters)[-1]

	try:
	checkpoint = torch.load(log_dir + "/step_" + str(iters) + ".t7", map_location='cpu', weights_only=False)
	except TypeError:
	checkpoint = torch.load(log_dir + "/step_" + str(iters) + ".t7", map_location='cpu')
	state_dict = checkpoint['net']
	from collections import OrderedDict
	new_state_dict = OrderedDict()
	for k, v in state_dict.items():
	name = k[7:] # remove `module.`
	if name.startswith('encoder.'):
	name = name[8:] # remove `encoder.`
	new_state_dict[name] = v
	# remove optional keys that may not exist across different checkpoint formats
	new_state_dict.pop("embeddings.position_ids", None)
	new_state_dict.pop("position_ids", None)
	bert.load_state_dict(new_state_dict, strict=False)

	return bert