whuanle
/

torchcsharp

Model card Files Files and versions Community

torchcsharp / exportsd.py

whuanle

first commit.

d77868a 8 months ago

raw

history blame contribute delete

13.5 kB

	#
	# Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
	#
	import io
	import torch
	import leb128
	import numpy as np

	def _elem_type(t):
	dt = t.dtype

	if dt == torch.uint8:
	return 0
	elif dt == torch.int8:
	return 1
	elif dt == torch.int16:
	return 2
	elif dt == torch.int32:
	return 3
	elif dt == torch.int64:
	return 4
	elif dt == torch.float16:
	return 5
	elif dt == torch.float32:
	return 6
	elif dt == torch.float64:
	return 7
	elif dt == torch.bool:
	return 11
	elif dt == torch.bfloat16:
	return 15
	else:
	return 4711

	def _write_tensor(t, stream):
	stream.write(leb128.u.encode(_elem_type(t)))
	stream.write(leb128.u.encode(len(t.shape)))
	for s in t.shape:
	stream.write(leb128.u.encode(s))
	stream.write(t.detach().cpu().numpy().tobytes())

	def write_tensor(t, file_name):
	f = open(file_name, "wb")
	_write_tensor(t, f)
	f.close()

	def save_state_dict(sd, stream):
	"""
	Saves a PyToch state dictionary using the format that TorchSharp can
	read.

	:param sd: A dictionary produced by 'model.state_dict()'
	:param stream: An write stream opened for binary I/O.
	"""
	stream.write(leb128.u.encode(len(sd)))
	for entry in sd:
	stream.write(leb128.u.encode(len(entry)))
	stream.write(bytes(entry, 'utf-8'))
	_write_tensor(sd[entry], stream)

	def _write_encoded_int64(value, stream):
	stream.write(leb128.u.encode(value))

	def _write_optim_name(name, stream):
	_write_encoded_int64(len(name),stream)
	stream.write(bytes(name, 'utf-8'))

	def _write_bool(value, stream):
	stream.write(value.to_bytes(1, 'little'))

	def _write_int32(value, stream):
	stream.write(value.to_bytes(4, 'little'))

	def _write_int64(value, stream):
	stream.write(value.to_bytes(8, 'little'))

	def _write_conditional_state_tensor(name, state, stream):
	if name not in state:
	_write_bool(False, stream)
	else:
	buf = state[name]
	if buf == None:
	_write_bool(False, stream)
	else:
	_write_bool(True, stream)
	_write_tensor(buf, stream)

	def _check_state_existance(state, p):
	if not p in state:
	raise KeyError("Identified a parameter with no initalized state. Please make sure to run `optim.step()` for all the parameters at least once.")

	def save_sgd(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('SGD', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(5,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	floats[2] = pg['momentum']
	floats[3] = pg['dampening']
	floats[4] = pg['weight_decay']
	stream.write(floats.tobytes())
	_write_bool(pg['nesterov'], stream)
	_write_bool(pg['maximize'], stream)

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_conditional_state_tensor('momentum_buffer', st, stream)

	def save_asgd(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('ASGD', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(2,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	stream.write(floats.tobytes())
	_write_bool(pg['maximize'], stream)
	floats = np.empty(4,dtype=np.float64)
	floats[0] = pg['lambd']
	floats[1] = pg['alpha']
	floats[2] = pg['weight_decay']
	floats[3] = pg['t0']
	stream.write(floats.tobytes())

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	floats = np.empty(2,dtype=np.float64)
	floats[0] = st['eta']
	floats[1] = st['mu']
	stream.write(floats.tobytes())
	_write_tensor(st['ax'], stream)

	def save_rprop(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('Rprop', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(2,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	stream.write(floats.tobytes())
	_write_bool(pg['maximize'], stream)
	floats = np.empty(4,dtype=np.float64)
	etaminus, etaplus = pg['etas']
	min_step, max_step = pg['step_sizes']
	floats[0] = etaminus
	floats[1] = etaplus
	floats[2] = min_step
	floats[3] = max_step
	stream.write(floats.tobytes())

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(st["step"], stream)
	_write_tensor(st['prev'], stream)
	_write_tensor(st['step_size'], stream)

	def save_rmsprop(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('RMSProp', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(2,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	stream.write(floats.tobytes())
	_write_bool(pg['maximize'], stream)
	floats = np.empty(4,dtype=np.float64)
	floats[0] = pg['momentum']
	floats[1] = pg['alpha']
	floats[2] = pg['eps']
	floats[3] = pg['weight_decay']
	stream.write(floats.tobytes())
	_write_bool(pg['centered'], stream)

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(st["step"], stream)
	_write_tensor(st['square_avg'], stream)
	_write_conditional_state_tensor('momentum_buffer', st, stream)
	_write_conditional_state_tensor('grad_avg', st, stream)

	def save_adam(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('Adam', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(6,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	beta1, beta2 = pg['betas']
	floats[2] = beta1
	floats[3] = beta2
	floats[4] = pg['eps']
	floats[5] = pg['weight_decay']
	stream.write(floats.tobytes())
	_write_bool(pg['amsgrad'], stream)
	_write_bool(pg['maximize'], stream)

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	_write_tensor(st['exp_avg'], stream)
	_write_tensor(st['exp_avg_sq'], stream)
	_write_conditional_state_tensor('max_exp_avg_sq', st, stream)

	def save_adamw(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('AdamW', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(6,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	beta1, beta2 = pg['betas']
	floats[2] = beta1
	floats[3] = beta2
	floats[4] = pg['eps']
	floats[5] = pg['weight_decay']
	stream.write(floats.tobytes())
	_write_bool(pg['amsgrad'], stream)
	_write_bool(pg['maximize'], stream)

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	_write_tensor(st['exp_avg'], stream)
	_write_tensor(st['exp_avg_sq'], stream)
	_write_conditional_state_tensor('max_exp_avg_sq', st, stream)

	def save_nadam(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('NAdam', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(7,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	beta1, beta2 = pg['betas']
	floats[2] = beta1
	floats[3] = beta2
	floats[4] = pg['eps']
	floats[5] = pg['weight_decay']
	floats[6] = pg['momentum_decay']
	stream.write(floats.tobytes())

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	floats = np.empty(1,dtype=np.float64)
	floats[0] = float(st["mu_product"].item())
	stream.write(floats.tobytes())
	_write_tensor(st['exp_avg'], stream)
	_write_tensor(st['exp_avg_sq'], stream)

	def save_radam(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('RAdam', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(6,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	beta1, beta2 = pg['betas']
	floats[2] = beta1
	floats[3] = beta2
	floats[4] = pg['eps']
	floats[5] = pg['weight_decay']
	stream.write(floats.tobytes())

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	_write_tensor(st['exp_avg'], stream)
	_write_tensor(st['exp_avg_sq'], stream)

	def save_adamax(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('Adamax', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(6,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	beta1, beta2 = pg['betas']
	floats[2] = beta1
	floats[3] = beta2
	floats[4] = pg['eps']
	floats[5] = pg['weight_decay']
	stream.write(floats.tobytes())

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	_write_tensor(st['exp_avg'], stream)
	_write_tensor(st['exp_inf'], stream)

	def save_adadelta(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('Adadelta', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(5,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	floats[2] = pg['rho']
	floats[3] = pg['eps']
	floats[4] = pg['weight_decay']
	stream.write(floats.tobytes())
	_write_bool(pg['maximize'], stream)

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(st["step"], stream)
	_write_tensor(st['square_avg'], stream)
	_write_tensor(st['acc_delta'], stream)

	def save_adagrad(optim, stream):

	sd = optim.state_dict()

	_write_optim_name('Adagrad', stream)

	_write_encoded_int64(len(optim.param_groups), stream)
	_write_encoded_int64(len(optim.state), stream)

	# Write options

	for pg in optim.param_groups:
	floats = np.empty(6,dtype=np.float64)
	floats[0] = pg['lr']
	floats[1] = pg['lr']
	floats[2] = pg['lr_decay']
	floats[3] = pg['initial_accumulator_value']
	floats[4] = pg['eps']
	floats[5] = pg['weight_decay']
	stream.write(floats.tobytes())

	# Write state
	for group in optim.param_groups:
	for p in group['params']:
	_check_state_existance(optim.state, p)
	st = optim.state[p]
	_write_int64(int(st["step"].item()), stream)
	_write_tensor(st['sum'], stream)