from dreamcoder.utilities import eprint import random class DefaultTaskBatcher: """Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None.""" def __init__(self): pass def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False start = (taskBatchSize * currIteration) % len(tasks) end = start + taskBatchSize taskBatch = (tasks + tasks)[start:end] # Handle wraparound. return taskBatch class RandomTaskBatcher: """Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None.""" def __init__(self): pass def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False return random.sample(tasks, taskBatchSize) class RandomShuffleTaskBatcher: """Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher. Reshuffles across iterations - intended as benchmark comparison to test the task ordering.""" def __init__(self, baseSeed=0): self.baseSeed = baseSeed def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False # Reshuffles tasks in a fixed way across epochs for reproducibility. currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks))) shuffledTasks = tasks.copy() # Since shuffle works in place. random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks) shuffledTasksWrap = tasks.copy() # Since shuffle works in place. random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap) start = (taskBatchSize * currIteration) % len(shuffledTasks) end = start + taskBatchSize taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] # Wraparound nicely. return list(set(taskBatch)) class UnsolvedTaskBatcher: """At a given epoch, returns only batches of the tasks that have not been solved at least twice""" def __init__(self): self.timesSolved = {} # map from task to times that we have solved it self.start = 0 def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): assert taskBatchSize is None, "This batching strategy does not support batch sizes" for t,f in ec_result.allFrontiers.items(): if f.empty: self.timesSolved[t] = max(0, self.timesSolved.get(t,0)) else: self.timesSolved[t] = 1 + self.timesSolved.get(t, 0) return [t for t in tasks if self.timesSolved.get(t,0) < 2 ] def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio): numRandom = int(randomRatio * taskBatchSize) numEntropy = taskBatchSize - numRandom eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks))) eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom)) taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks) sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1]) entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]] randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom) batch = entropyBatch + randomBatch return batch def kNearestNeighbors(ec_result, tasks, k, task): """Finds the k nearest neighbors in the recognition model logProduction space to a given task.""" import numpy as np cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks) argSort = np.argsort(-cosDistance)# Want the greatest similarity. topK = argSort[:k] topKTasks = list(np.array(tasks)[topK]) return topKTasks class RandomkNNTaskBatcher: """Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits.""" def __init__(self): pass def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False if ec_result.recognitionModel is None: eprint("No recognition model, falling back on random %d" % taskBatchSize) return random.sample(tasks, taskBatchSize) else: randomTask = random.choice(tasks) kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) return [randomTask] + kNN class RandomLowEntropykNNTaskBatcher: """Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits.""" def __init__(self): pass def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] if taskBatchSize is None: return unsolvedTasks elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False if ec_result.recognitionModel is None: eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) return random.sample(unsolvedTasks, taskBatchSize) else: lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) randomTask = random.choice(lowEntropyUnsolved) kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) return [randomTask] + kNN class UnsolvedEntropyTaskBatcher: """Returns tasks that have never been solved at any previous iteration. Given a task batch size, returns the unsolved tasks with the lowest entropy.""" def __init__(self): pass def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] if taskBatchSize is None: return unsolvedTasks elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False if ec_result.recognitionModel is None: eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) return random.sample(unsolvedTasks, taskBatchSize) else: return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) class UnsolvedRandomEntropyTaskBatcher: """Returns tasks that have never been solved at any previous iteration. Given a task batch size, returns a mix of unsolved tasks with percentRandom selected randomly and the remaining selected by lowest entropy.""" def __init__(self): pass def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] if taskBatchSize is None: return unsolvedTasks elif taskBatchSize > len(tasks): eprint("Task batch size is greater than total number of tasks, aborting.") assert False if ec_result.recognitionModel is None: eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) return random.sample(unsolvedTasks, taskBatchSize) else: return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5)