to_delete / dreamcoder /frontier.py
Fraser-Greenlee
add dreamcoder codebase
e1c1753
raw
history blame
9.63 kB
from dreamcoder.utilities import *
from dreamcoder.program import *
from dreamcoder.task import Task
class FrontierEntry(object):
def __init__(
self,
program,
_=None,
logPrior=None,
logLikelihood=None,
logPosterior=None):
self.logPosterior = logPrior + logLikelihood if logPosterior is None else logPosterior
self.program = program
self.logPrior = logPrior
self.logLikelihood = logLikelihood
def __repr__(self):
return "FrontierEntry(program={self.program}, logPrior={self.logPrior}, logLikelihood={self.logLikelihood}".format(
self=self)
def strip_primitive_values(self):
return FrontierEntry(program=strip_primitive_values(self.program),
logPrior=self.logPrior,
logPosterior=self.logPosterior,
logLikelihood=self.logLikelihood)
def unstrip_primitive_values(self):
return FrontierEntry(program=unstrip_primitive_values(self.program),
logPrior=self.logPrior,
logPosterior=self.logPosterior,
logLikelihood=self.logLikelihood)
class Frontier(object):
def __init__(self, frontier, task):
self.entries = frontier
self.task = task
def __repr__(
self): return "Frontier(entries={self.entries}, task={self.task})".format(self=self)
def __iter__(self): return iter(self.entries)
def __len__(self): return len(self.entries)
def json(self):
return {"request": self.task.request.json(),
"task": str(self.task),
"programs": [{"program": str(e.program),
"logLikelihood": e.logLikelihood}
for e in self ]}
def strip_primitive_values(self):
return Frontier([e.strip_primitive_values() for e in self.entries ],
self.task)
def unstrip_primitive_values(self):
return Frontier([e.unstrip_primitive_values() for e in self.entries ],
self.task)
DUMMYFRONTIERCOUNTER = 0
@staticmethod
def dummy(program, logLikelihood=0., logPrior=0., tp=None):
"""Creates a dummy frontier containing just this program"""
if not tp:
tp = program.infer().negateVariables()
t = Task(
"<dummy %d: %s>" %
(Frontier.DUMMYFRONTIERCOUNTER,
str(program)),
tp,
[])
f = Frontier([FrontierEntry(program=program,
logLikelihood=logLikelihood,
logPrior=logPrior)],
task=t)
Frontier.DUMMYFRONTIERCOUNTER += 1
return f
def marginalLikelihood(self):
return lse([e.logPrior + e.logLikelihood for e in self])
def temperature(self,T):
"""Divides prior by T"""
return Frontier([ FrontierEntry(program=e.program,
logPrior=e.logPrior/T,
logLikelihood=e.logLikelihood)
for e in self],
task=self.task)
def normalize(self):
z = self.marginalLikelihood()
newEntries = [
FrontierEntry(
program=e.program,
logPrior=e.logPrior,
logLikelihood=e.logLikelihood,
logPosterior=e.logPrior +
e.logLikelihood -
z) for e in self]
newEntries.sort(key=lambda e: e.logPosterior, reverse=True)
return Frontier(newEntries,
self.task)
def expectedProductionUses(self, g):
"""Returns a vector of the expected number of times each production was used"""
import numpy as np
this = g.rescoreFrontier(self).normalize()
ps = list(sorted(g.primitives, key=str))
features = np.zeros(len(ps))
for j, p in enumerate(ps):
for e in this:
w = math.exp(e.logPosterior)
features[j] += w * sum(child == p
for _, child in e.program.walk() )
if not p.isInvented: features[j] *= 0.3
return features
def removeZeroLikelihood(self):
self.entries = [
e for e in self.entries if e.logLikelihood != float('-inf')]
return self
def topK(self, k):
if k == 0: return Frontier([], self.task)
if k < 0: return self
newEntries = sorted(self.entries,
key=lambda e: (-e.logPosterior, str(e.program)))
return Frontier(newEntries[:k], self.task)
def sample(self):
"""Samples an entry from a frontier"""
return sampleLogDistribution([(e.logLikelihood + e.logPrior, e)
for e in self])
@property
def bestPosterior(self):
return min(self.entries,
key=lambda e: (-e.logPosterior, str(e.program)))
def replaceWithSupervised(self, g):
assert self.task.supervision is not None
return g.rescoreFrontier(Frontier([FrontierEntry(self.task.supervision,
logLikelihood=0., logPrior=0.)],
task=self.task))
@property
def bestll(self):
best = max(self.entries,
key=lambda e: e.logLikelihood)
return best.logLikelihood
@property
def empty(self): return self.entries == []
@staticmethod
def makeEmpty(task):
return Frontier([], task=task)
def summarize(self):
if self.empty:
return "MISS " + self.task.name
best = self.bestPosterior
return "HIT %s w/ %s ; log prior = %f ; log likelihood = %f" % (
self.task.name, best.program, best.logPrior, best.logLikelihood)
def summarizeFull(self):
if self.empty:
return "MISS " + self.task.name
return "\n".join([self.task.name] +
["%f\t%s" % (e.logPosterior, e.program)
for e in self.normalize()])
@staticmethod
def describe(frontiers):
numberOfHits = sum(not f.empty for f in frontiers)
if numberOfHits > 0:
averageLikelihood = sum(
f.bestPosterior.logPrior for f in frontiers if not f.empty) / numberOfHits
else:
averageLikelihood = 0
return "\n".join([f.summarize() for f in frontiers] +
["Hits %d/%d tasks" % (numberOfHits, len(frontiers))] +
["Average description length of a program solving a task: %f nats" % (-averageLikelihood)])
def combine(self, other, tolerance=0.01):
'''Takes the union of the programs in each of the frontiers'''
assert self.task == other.task
foundDifference = False
x = {e.program: e for e in self}
y = {e.program: e for e in other}
programs = set(x.keys()) | set(y.keys())
union = []
for p in programs:
if p in x:
e1 = x[p]
if p in y:
e2 = y[p]
if abs(e1.logPrior - e2.logPrior) > tolerance:
eprint(
"WARNING: Log priors differed during frontier combining: %f vs %f" %
(e1.logPrior, e2.logPrior))
eprint("WARNING: \tThe program is", p)
eprint()
if abs(e1.logLikelihood - e2.logLikelihood) > tolerance:
foundDifference = True
eprint(
"WARNING: Log likelihoods deferred for %s: %f & %f" %
(p, e1.logLikelihood, e2.logLikelihood))
if hasattr(self.task, 'BIC'):
eprint("\t%d examples, BIC=%f, parameterPenalty=%f, n parameters=%d, correct likelihood=%f" %
(len(self.task.examples),
self.task.BIC,
self.task.BIC * math.log(len(self.task.examples)),
substringOccurrences("REAL", str(p)),
substringOccurrences("REAL", str(p)) * self.task.BIC * math.log(len(self.task.examples))))
e1.logLikelihood = - \
substringOccurrences("REAL", str(p)) * self.task.BIC * math.log(len(self.task.examples))
e2.logLikelihood = e1.logLikelihood
e1 = FrontierEntry(
program=e1.program,
logLikelihood=(
e1.logLikelihood +
e2.logLikelihood) /
2,
logPrior=e1.logPrior)
else:
e1 = y[p]
union.append(e1)
if foundDifference:
eprint(
"WARNING: Log likelihoods differed for the same program on the task %s.\n" %
(self.task.name),
"\tThis is acceptable only if the likelihood model is stochastic. Took the geometric mean of the likelihoods.")
return Frontier(union, self.task)