|
from dreamcoder.utilities import eprint |
|
import random |
|
|
|
|
|
class DefaultTaskBatcher: |
|
"""Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None.""" |
|
|
|
def __init__(self): |
|
pass |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
if taskBatchSize is None: |
|
taskBatchSize = len(tasks) |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
|
|
start = (taskBatchSize * currIteration) % len(tasks) |
|
end = start + taskBatchSize |
|
taskBatch = (tasks + tasks)[start:end] |
|
return taskBatch |
|
|
|
class RandomTaskBatcher: |
|
"""Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None.""" |
|
|
|
def __init__(self): |
|
pass |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
if taskBatchSize is None: |
|
taskBatchSize = len(tasks) |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
return random.sample(tasks, taskBatchSize) |
|
|
|
class RandomShuffleTaskBatcher: |
|
"""Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher. |
|
Reshuffles across iterations - intended as benchmark comparison to test the task ordering.""" |
|
def __init__(self, baseSeed=0): self.baseSeed = baseSeed |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
if taskBatchSize is None: |
|
taskBatchSize = len(tasks) |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
|
|
currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks))) |
|
|
|
shuffledTasks = tasks.copy() |
|
random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks) |
|
|
|
shuffledTasksWrap = tasks.copy() |
|
random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap) |
|
|
|
start = (taskBatchSize * currIteration) % len(shuffledTasks) |
|
end = start + taskBatchSize |
|
taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] |
|
|
|
return list(set(taskBatch)) |
|
|
|
class UnsolvedTaskBatcher: |
|
"""At a given epoch, returns only batches of the tasks that have not been solved at least twice""" |
|
|
|
def __init__(self): |
|
self.timesSolved = {} |
|
self.start = 0 |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
assert taskBatchSize is None, "This batching strategy does not support batch sizes" |
|
|
|
for t,f in ec_result.allFrontiers.items(): |
|
if f.empty: |
|
self.timesSolved[t] = max(0, self.timesSolved.get(t,0)) |
|
else: |
|
self.timesSolved[t] = 1 + self.timesSolved.get(t, 0) |
|
return [t for t in tasks if self.timesSolved.get(t,0) < 2 ] |
|
|
|
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio): |
|
numRandom = int(randomRatio * taskBatchSize) |
|
numEntropy = taskBatchSize - numRandom |
|
|
|
eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks))) |
|
eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom)) |
|
taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks) |
|
sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1]) |
|
|
|
entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]] |
|
randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom) |
|
batch = entropyBatch + randomBatch |
|
|
|
return batch |
|
|
|
def kNearestNeighbors(ec_result, tasks, k, task): |
|
"""Finds the k nearest neighbors in the recognition model logProduction space to a given task.""" |
|
import numpy as np |
|
cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks) |
|
argSort = np.argsort(-cosDistance) |
|
topK = argSort[:k] |
|
topKTasks = list(np.array(tasks)[topK]) |
|
return topKTasks |
|
|
|
|
|
class RandomkNNTaskBatcher: |
|
"""Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits.""" |
|
def __init__(self): |
|
pass |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
if taskBatchSize is None: |
|
taskBatchSize = len(tasks) |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
if ec_result.recognitionModel is None: |
|
eprint("No recognition model, falling back on random %d" % taskBatchSize) |
|
return random.sample(tasks, taskBatchSize) |
|
else: |
|
randomTask = random.choice(tasks) |
|
kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) |
|
return [randomTask] + kNN |
|
|
|
class RandomLowEntropykNNTaskBatcher: |
|
"""Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits.""" |
|
def __init__(self): |
|
pass |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] |
|
|
|
if taskBatchSize is None: |
|
return unsolvedTasks |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
if ec_result.recognitionModel is None: |
|
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) |
|
return random.sample(unsolvedTasks, taskBatchSize) |
|
else: |
|
lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) |
|
randomTask = random.choice(lowEntropyUnsolved) |
|
kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) |
|
return [randomTask] + kNN |
|
|
|
|
|
class UnsolvedEntropyTaskBatcher: |
|
"""Returns tasks that have never been solved at any previous iteration. |
|
Given a task batch size, returns the unsolved tasks with the lowest entropy.""" |
|
def __init__(self): |
|
pass |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] |
|
|
|
if taskBatchSize is None: |
|
return unsolvedTasks |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
if ec_result.recognitionModel is None: |
|
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) |
|
return random.sample(unsolvedTasks, taskBatchSize) |
|
else: |
|
return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) |
|
|
|
class UnsolvedRandomEntropyTaskBatcher: |
|
"""Returns tasks that have never been solved at any previous iteration. |
|
Given a task batch size, returns a mix of unsolved tasks with percentRandom |
|
selected randomly and the remaining selected by lowest entropy.""" |
|
def __init__(self): |
|
pass |
|
|
|
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
|
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] |
|
|
|
if taskBatchSize is None: |
|
return unsolvedTasks |
|
elif taskBatchSize > len(tasks): |
|
eprint("Task batch size is greater than total number of tasks, aborting.") |
|
assert False |
|
|
|
if ec_result.recognitionModel is None: |
|
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) |
|
return random.sample(unsolvedTasks, taskBatchSize) |
|
else: |
|
return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|