########################################################################### # NLP demo software by HyperbeeAI. # # Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. hello@hyperbee.ai # ########################################################################### license_statement = "NLP demo software by HyperbeeAI. Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. hello@hyperbee.ai" print("imported layers.py") print(license_statement) print("") import torch, sys import torch.nn as nn import numpy as np from torch.autograd import Function from functions import quantization, clamping_hw, linear_functional class ai85_base(nn.Module): def __init__( self, operation_module = None, operation_fcnl = None, activation_module = None, output_width_30b = False ): super().__init__() self.op = operation_module self.op_fcn = operation_fcnl self.act = activation_module self.wide = output_width_30b self.quantize_Q_d_8b = None self.quantize_Q_u_wb = None self.quantize_Q_d_wide = None self.clamp_C_hw_8b = None self.clamp_C_hw_wide = None self.output_shift = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False) self.weight_bits = nn.Parameter(torch.Tensor([ 8 ]), requires_grad=False) self.bias_bits = nn.Parameter(torch.Tensor([ 8 ]), requires_grad=False) self.quantize_activation = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) self.adjust_output_shift = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False) self.shift_quantile = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) weight_bits = self.weight_bits bias_bits = self.bias_bits shift_quantile = self.shift_quantile self.configure_layer_base( weight_bits, bias_bits, shift_quantile ) def configure_layer_base(self, weight_bits, bias_bits, shift_quantile): self.quantize_Q_d_8b = quantization(xb = 8, mode ='down' , wide=False) # 8 here is activation bits self.quantize_Q_u_wb = quantization(xb = weight_bits, mode ='up' , wide=False) self.quantize_Q_d_wide = quantization(xb = 8, mode ='down' , wide=True) # 8 here is activation bits, but its wide, so check inside self.clamp_C_hw_8b = clamping_hw(xb = 8, wide=False) # 8 here is activation bits self.clamp_C_hw_wide = clamping_hw(xb = None, wide=True) # None to avoid misleading info on the # of bits, check inside self.weight_bits = nn.Parameter(torch.Tensor([ weight_bits ]), requires_grad=False) self.bias_bits = nn.Parameter(torch.Tensor([ bias_bits ]), requires_grad=False) self.shift_quantile = nn.Parameter(torch.Tensor([ shift_quantile ]), requires_grad=False) def forward(self, x): w = self.op.weight b = self.op.bias los = self.output_shift s_o = 2**(los) w_q = self.quantize_Q_u_wb(w); b_q = self.quantize_Q_u_wb(b); x = self.op_fcn(x, w_q, b_q, self.op.stride, self.op.padding) # convolution / linear x = x*s_o if(self.act is not None): x = self.act(x) if((self.wide) and (self.act is None)): x = self.quantize_Q_d_wide(x) x = self.clamp_C_hw_wide(x) ### The +5 here is the 5 fractional bits the chip adds to the number in wide mode ### we divide the number back here to get it back into range. ai8x-training does not do this for some reason ### until the synthesis/deployment phase, and they do a +1 bit, why? x = x / (2**(5)); # this is simulation of chip behavior x = x / 128.0 # this is ours, for convenience + this part is done outside the chip since it's the step before table lookup x = x / 2.0; # this is ours, for convenience + this part is done outside the chip since it's the step before table lookup else: x = self.quantize_Q_d_8b(x) x = self.clamp_C_hw_8b(x) return x class ai85_conv1d(ai85_base): def __init__( self, C_in_channels = None, D_out_channels = None, K_kernel_dimension = None, padding = 0, activation = None, output_width_30b = False, ): if(activation is None): activation_fcn = None; elif(activation == 'relu'): activation_fcn = nn.ReLU(inplace=True); else: print('wrong activation type in model. only {relu} is acceptable. exiting') sys.exit() operation_mdl = nn.Conv1d(C_in_channels, D_out_channels, kernel_size=K_kernel_dimension, stride=1, padding=padding, bias=True); operation_fcn = nn.functional.conv1d super().__init__( activation_module = activation_fcn, operation_module = operation_mdl, operation_fcnl = operation_fcn, output_width_30b = output_width_30b, ) class ai85_add(nn.Module): def __init__(self ): super().__init__() self.clamp_C_hw_8b = clamping_hw( xb = 8, wide=False) # 8 here is activation bits def forward(self, x, res): x = self.clamp_C_hw_8b(x+res) return x class ai85_fullyconnected(ai85_base): def __init__( self, in_features = None, out_features = None, activation = None, output_width_30b = False): if(activation is None): activation_fcn = None; elif(activation == 'relu'): activation_fcn = nn.ReLU(inplace=True); else: print('wrong activation type in model. only {relu} is acceptable. exiting') sys.exit() operation_mdl = nn.Linear(in_features, out_features, bias=True); operation_fcn = linear_functional super().__init__( activation_module = activation_fcn, operation_module = operation_mdl, operation_fcnl = operation_fcn, output_width_30b = output_width_30b ) # Define dummy arguments to make Linear and conv compatible in ai85_base. # the name "op" here refers to op in super, i.e., in base_layer self.op.stride = None self.op.padding = None class lpre(nn.Module): def __init__(self): super().__init__() self.ee1 = nn.Embedding(16384, 64) self.ee2 = nn.Embedding(48, 64) self.quantize = quantization(xb = 8, mode ='updown', wide=False) def forward(self, x, sp1, sp2, sb): pp= torch.arange(sp1, sp2).unsqueeze(0).repeat(sb, 1).to(x.device) ee2_d = self.ee2(pp) ee1_d = self.ee1(x) ed = ee1_d + ee2_d min_w = self.ee2.weight.data.min() + self.ee1.weight.data.min() max_w = self.ee2.weight.data.max() + self.ee1.weight.data.max() t = (ed - min_w) / (max_w - min_w) t = t.add(-0.5).mul(2.0) t = self.quantize(t) t = t.clamp(min= -1.0, max=1.0-(1.0/128.0)) t = t.mul(2**(8-1)).add(0.5).floor().clamp(min=-128, max=127) return t.permute(0, 2, 1)