import math import random from dreamcoder.utilities import * class InvalidLoss(Exception): pass class DN(object): '''differentiable node: parent object of every differentiable operation''' def __init__(self, arguments): self.gradient = None if arguments != []: self.data = None self.arguments = arguments # descendents: every variable that takes this variable as input # descendents: [(DN,float)] # the additional float parameter is d Descendent / d This self.descendents = [] self.recalculate() def __str__(self): if self.arguments == []: return self.name return "(%s %s)" % (self.name, " ".join(str(x) for x in self.arguments)) def __repr__(self): return "DN(op = %s, data = %s, grad = %s, #descendents = %d, args = %s)" % ( self.name, self.data, self.gradient, len(self.descendents), self.arguments) @property def derivative(self): return self.differentiate() def differentiate(self): if self.gradient is None: self.gradient = sum(partial * descendent.differentiate() for descendent, partial in self.descendents) return self.gradient def zeroEverything(self): if self.gradient is None and self.descendents == [] and ( self.data is None or self.arguments == []): return self.gradient = None self.descendents = [] if self.arguments != []: self.data = None for x in self.arguments: x.zeroEverything() def lightweightRecalculate(self): return self.forward(*[a.lightweightRecalculate() for a in self.arguments]) def recalculate(self): if self.data is None: inputs = [a.recalculate() for a in self.arguments] self.data = self.forward(*inputs) # if invalid(self.data): # eprint("I am invalid",repr(self)) # eprint("Here are my inputs",inputs) # self.zeroEverything() # eprint("Here I am after being zeroed",repr(self)) # raise Exception('invalid loss') #assert valid(self.data) partials = self.backward(*inputs) for d, a in zip(partials, self.arguments): # if invalid(d): # eprint("I have an invalid derivative",self) # eprint("Inputs",inputs) # eprint("partials",partials) # raise Exception('invalid derivative') a.descendents.append((self, d)) return self.data def backPropagation(self): self.gradient = 1. self.recursivelyDifferentiate() def recursivelyDifferentiate(self): self.differentiate() for x in self.arguments: x.recursivelyDifferentiate() def updateNetwork(self): self.zeroEverything() l = self.recalculate() self.backPropagation() return l def log(self): return Logarithm(self) def square(self): return Square(self) def exp(self): return Exponentiation(self) def clamp(self, l, u): return Clamp(self, l, u) def __abs__(self): return AbsoluteValue(self) def __add__(self, o): return Addition(self, Placeholder.maybe(o)) def __radd__(self, o): return Addition(self, Placeholder.maybe(o)) def __sub__(self, o): return Subtraction(self, Placeholder.maybe(o)) def __rsub__(self, o): return Subtraction(Placeholder.maybe(o), self) def __mul__(self, o): return Multiplication(self, Placeholder.maybe(o)) def __rmul__(self, o): return Multiplication(self, Placeholder.maybe(o)) def __neg__(self): return Negation(self) def __truediv__(self, o): return Division(self, Placeholder.maybe(o)) def __rtruediv__(self, o): return Division(Placeholder.maybe(o), self) def numericallyVerifyGradients(self, parameters): calculatedGradients = [p.derivative for p in parameters] e = 0.00001 for j, p in enumerate(parameters): p.data -= e y1 = self.lightweightRecalculate() p.data += 2 * e y2 = self.lightweightRecalculate() p.data -= e d = (y2 - y1) / (2 * e) if abs(calculatedGradients[j] - d) > 0.1: eprint( "Bad gradient: expected %f, got %f" % (d, calculatedGradients[j])) def gradientDescent( self, parameters, _=None, lr=0.001, steps=10**3, update=None): for j in range(steps): l = self.updateNetwork() if update is not None and j % update == 0: eprint("LOSS:", l) for p in parameters: eprint(p.data, '\t', p.derivative) if invalid(l): raise InvalidLoss() for p in parameters: p.data -= lr * p.derivative return self.data def restartingOptimize(self, parameters, _=None, attempts=1, s=1., decay=0.5, grow=0.1, lr=0.1, steps=10**3, update=None): ls = [] for _ in range(attempts): for p in parameters: p.data = random.random()*10 - 5 ls.append( self.resilientBackPropagation( parameters, lr=lr, steps=steps, decay=decay, grow=grow)) return min(ls) def resilientBackPropagation( self, parameters, _=None, decay=0.5, grow=1.2, lr=0.1, steps=10**3, update=None): previousSign = [None] * len(parameters) lr = [lr] * len(parameters) for j in range(steps): l = self.updateNetwork() if update is not None and j % update == 0: eprint("LOSS:", l) eprint("\t".join(str(p.derivative) for p in parameters)) if invalid(l): raise InvalidLoss() newSigns = [p.derivative > 0 for p in parameters] for i, p in enumerate(parameters): if p.derivative > 0: p.data -= lr[i] elif p.derivative < 0: p.data += lr[i] if previousSign[i] is not None: if previousSign[i] == newSigns[i]: lr[i] *= grow else: lr[i] *= decay previousSign = newSigns return self.data class Placeholder(DN): COUNTER = 0 def __init__(self, initialValue=0., name=None): self.data = initialValue super(Placeholder, self).__init__([]) if name is None: name = "p_" + str(Placeholder.COUNTER) Placeholder.COUNTER += 1 self.name = name @staticmethod def named(namePrefix, initialValue=0.): p = Placeholder(initialValue, namePrefix + str(Placeholder.COUNTER)) Placeholder.COUNTER += 1 return p def __str__(self): return "Placeholder(%s = %s)" % (self.name, self.data) @staticmethod def maybe(x): if isinstance(x, DN): return x return Placeholder(float(x)) def forward(self): return self.data def backward(self): return [] class Clamp(DN): def __init__(self, x, l, u): assert u > l self.l = l self.u = u super(Clamp, self).__init__([x]) self.name = "clamp" def forward(self, x): if x > self.u: return self.u if x < self.l: return self.l return x def backward(self, x): if x > self.u or x < self.l: return [0.] else: return [1.] class Addition(DN): def __init__(self, x, y): super(Addition, self).__init__([x, y]) self.name = '+' def forward(self, x, y): return x + y def backward(self, x, y): return [1., 1.] class Subtraction(DN): def __init__(self, x, y): super(Subtraction, self).__init__([x, y]) self.name = '-' def forward(self, x, y): return x - y def backward(self, x, y): return [1., -1.] class Negation(DN): def __init__(self, x): super(Negation, self).__init__([x]) self.name = '-' def forward(self, x): return -x def backward(self, x): return [-1.] class AbsoluteValue(DN): def __init__(self, x): super(AbsoluteValue, self).__init__([x]) self.name = 'abs' def forward(self, x): return abs(x) def backward(self, x): if x > 0: return [1.] return [-1.] class Multiplication(DN): def __init__(self, x, y): super(Multiplication, self).__init__([x, y]) self.name = '*' def forward(self, x, y): return x * y def backward(self, x, y): return [y, x] class Division(DN): def __init__(self, x, y): super(Division, self).__init__([x, y]) self.name = '/' def forward(self, x, y): return x / y def backward(self, x, y): return [1.0 / y, -x / (y * y)] class Square(DN): def __init__(self, x): super(Square, self).__init__([x]) self.name = 'sq' def forward(self, x): return x * x def backward(self, x): return [2 * x] class Exponentiation(DN): def __init__(self, x): super(Exponentiation, self).__init__([x]) self.name = 'exp' def forward(self, x): return math.exp(x) def backward(self, x): return [math.exp(x)] class Logarithm(DN): def __init__(self, x): super(Logarithm, self).__init__([x]) self.name = 'log' def forward(self, x): return math.log(x) def backward(self, x): return [1. / x] class LSE(DN): def __init__(self, xs): super(LSE, self).__init__(xs) self.name = 'LSE' def forward(self, *xs): m = max(xs) return m + math.log(sum(math.exp(y - m) for y in xs)) def backward(self, *xs): m = max(xs) zm = sum(math.exp(x - m) for x in xs) return [math.exp(x - m) / zm for x in xs] if __name__ == "__main__": x = Placeholder(10., "x") y = Placeholder(2., "y") z = x - LSE([x, y]) z.updateNetwork() eprint("dL/dx = %f\tdL/dy = %f" % (x.derivative, y.derivative)) x.data = 2. y.data = 10. z.updateNetwork() eprint("dL/dx = %f\tdL/dy = %f" % (x.differentiate(), y.differentiate())) x.data = 2. y.data = 2. z.updateNetwork() eprint("z = ", z.data, z) eprint("dL/dx = %f\tdL/dy = %f" % (x.differentiate(), y.differentiate())) loss = -z eprint(loss) lr = 0.001 loss.gradientDescent([x, y], steps=10000, update=1000)