from dreamcoder.utilities import * from dreamcoder.program import * from dreamcoder.task import Task class FrontierEntry(object): def __init__( self, program, _=None, logPrior=None, logLikelihood=None, logPosterior=None): self.logPosterior = logPrior + logLikelihood if logPosterior is None else logPosterior self.program = program self.logPrior = logPrior self.logLikelihood = logLikelihood def __repr__(self): return "FrontierEntry(program={self.program}, logPrior={self.logPrior}, logLikelihood={self.logLikelihood}".format( self=self) def strip_primitive_values(self): return FrontierEntry(program=strip_primitive_values(self.program), logPrior=self.logPrior, logPosterior=self.logPosterior, logLikelihood=self.logLikelihood) def unstrip_primitive_values(self): return FrontierEntry(program=unstrip_primitive_values(self.program), logPrior=self.logPrior, logPosterior=self.logPosterior, logLikelihood=self.logLikelihood) class Frontier(object): def __init__(self, frontier, task): self.entries = frontier self.task = task def __repr__( self): return "Frontier(entries={self.entries}, task={self.task})".format(self=self) def __iter__(self): return iter(self.entries) def __len__(self): return len(self.entries) def json(self): return {"request": self.task.request.json(), "task": str(self.task), "programs": [{"program": str(e.program), "logLikelihood": e.logLikelihood} for e in self ]} def strip_primitive_values(self): return Frontier([e.strip_primitive_values() for e in self.entries ], self.task) def unstrip_primitive_values(self): return Frontier([e.unstrip_primitive_values() for e in self.entries ], self.task) DUMMYFRONTIERCOUNTER = 0 @staticmethod def dummy(program, logLikelihood=0., logPrior=0., tp=None): """Creates a dummy frontier containing just this program""" if not tp: tp = program.infer().negateVariables() t = Task( "" % (Frontier.DUMMYFRONTIERCOUNTER, str(program)), tp, []) f = Frontier([FrontierEntry(program=program, logLikelihood=logLikelihood, logPrior=logPrior)], task=t) Frontier.DUMMYFRONTIERCOUNTER += 1 return f def marginalLikelihood(self): return lse([e.logPrior + e.logLikelihood for e in self]) def temperature(self,T): """Divides prior by T""" return Frontier([ FrontierEntry(program=e.program, logPrior=e.logPrior/T, logLikelihood=e.logLikelihood) for e in self], task=self.task) def normalize(self): z = self.marginalLikelihood() newEntries = [ FrontierEntry( program=e.program, logPrior=e.logPrior, logLikelihood=e.logLikelihood, logPosterior=e.logPrior + e.logLikelihood - z) for e in self] newEntries.sort(key=lambda e: e.logPosterior, reverse=True) return Frontier(newEntries, self.task) def expectedProductionUses(self, g): """Returns a vector of the expected number of times each production was used""" import numpy as np this = g.rescoreFrontier(self).normalize() ps = list(sorted(g.primitives, key=str)) features = np.zeros(len(ps)) for j, p in enumerate(ps): for e in this: w = math.exp(e.logPosterior) features[j] += w * sum(child == p for _, child in e.program.walk() ) if not p.isInvented: features[j] *= 0.3 return features def removeZeroLikelihood(self): self.entries = [ e for e in self.entries if e.logLikelihood != float('-inf')] return self def topK(self, k): if k == 0: return Frontier([], self.task) if k < 0: return self newEntries = sorted(self.entries, key=lambda e: (-e.logPosterior, str(e.program))) return Frontier(newEntries[:k], self.task) def sample(self): """Samples an entry from a frontier""" return sampleLogDistribution([(e.logLikelihood + e.logPrior, e) for e in self]) @property def bestPosterior(self): return min(self.entries, key=lambda e: (-e.logPosterior, str(e.program))) def replaceWithSupervised(self, g): assert self.task.supervision is not None return g.rescoreFrontier(Frontier([FrontierEntry(self.task.supervision, logLikelihood=0., logPrior=0.)], task=self.task)) @property def bestll(self): best = max(self.entries, key=lambda e: e.logLikelihood) return best.logLikelihood @property def empty(self): return self.entries == [] @staticmethod def makeEmpty(task): return Frontier([], task=task) def summarize(self): if self.empty: return "MISS " + self.task.name best = self.bestPosterior return "HIT %s w/ %s ; log prior = %f ; log likelihood = %f" % ( self.task.name, best.program, best.logPrior, best.logLikelihood) def summarizeFull(self): if self.empty: return "MISS " + self.task.name return "\n".join([self.task.name] + ["%f\t%s" % (e.logPosterior, e.program) for e in self.normalize()]) @staticmethod def describe(frontiers): numberOfHits = sum(not f.empty for f in frontiers) if numberOfHits > 0: averageLikelihood = sum( f.bestPosterior.logPrior for f in frontiers if not f.empty) / numberOfHits else: averageLikelihood = 0 return "\n".join([f.summarize() for f in frontiers] + ["Hits %d/%d tasks" % (numberOfHits, len(frontiers))] + ["Average description length of a program solving a task: %f nats" % (-averageLikelihood)]) def combine(self, other, tolerance=0.01): '''Takes the union of the programs in each of the frontiers''' assert self.task == other.task foundDifference = False x = {e.program: e for e in self} y = {e.program: e for e in other} programs = set(x.keys()) | set(y.keys()) union = [] for p in programs: if p in x: e1 = x[p] if p in y: e2 = y[p] if abs(e1.logPrior - e2.logPrior) > tolerance: eprint( "WARNING: Log priors differed during frontier combining: %f vs %f" % (e1.logPrior, e2.logPrior)) eprint("WARNING: \tThe program is", p) eprint() if abs(e1.logLikelihood - e2.logLikelihood) > tolerance: foundDifference = True eprint( "WARNING: Log likelihoods deferred for %s: %f & %f" % (p, e1.logLikelihood, e2.logLikelihood)) if hasattr(self.task, 'BIC'): eprint("\t%d examples, BIC=%f, parameterPenalty=%f, n parameters=%d, correct likelihood=%f" % (len(self.task.examples), self.task.BIC, self.task.BIC * math.log(len(self.task.examples)), substringOccurrences("REAL", str(p)), substringOccurrences("REAL", str(p)) * self.task.BIC * math.log(len(self.task.examples)))) e1.logLikelihood = - \ substringOccurrences("REAL", str(p)) * self.task.BIC * math.log(len(self.task.examples)) e2.logLikelihood = e1.logLikelihood e1 = FrontierEntry( program=e1.program, logLikelihood=( e1.logLikelihood + e2.logLikelihood) / 2, logPrior=e1.logPrior) else: e1 = y[p] union.append(e1) if foundDifference: eprint( "WARNING: Log likelihoods differed for the same program on the task %s.\n" % (self.task.name), "\tThis is acceptable only if the likelihood model is stochastic. Took the geometric mean of the likelihoods.") return Frontier(union, self.task)