File size: 9,709 Bytes
e1c1753 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
from dreamcoder.utilities import eprint
import random
class DefaultTaskBatcher:
"""Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
start = (taskBatchSize * currIteration) % len(tasks)
end = start + taskBatchSize
taskBatch = (tasks + tasks)[start:end] # Handle wraparound.
return taskBatch
class RandomTaskBatcher:
"""Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
return random.sample(tasks, taskBatchSize)
class RandomShuffleTaskBatcher:
"""Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher.
Reshuffles across iterations - intended as benchmark comparison to test the task ordering."""
def __init__(self, baseSeed=0): self.baseSeed = baseSeed
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
# Reshuffles tasks in a fixed way across epochs for reproducibility.
currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks)))
shuffledTasks = tasks.copy() # Since shuffle works in place.
random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks)
shuffledTasksWrap = tasks.copy() # Since shuffle works in place.
random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap)
start = (taskBatchSize * currIteration) % len(shuffledTasks)
end = start + taskBatchSize
taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] # Wraparound nicely.
return list(set(taskBatch))
class UnsolvedTaskBatcher:
"""At a given epoch, returns only batches of the tasks that have not been solved at least twice"""
def __init__(self):
self.timesSolved = {} # map from task to times that we have solved it
self.start = 0
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
assert taskBatchSize is None, "This batching strategy does not support batch sizes"
for t,f in ec_result.allFrontiers.items():
if f.empty:
self.timesSolved[t] = max(0, self.timesSolved.get(t,0))
else:
self.timesSolved[t] = 1 + self.timesSolved.get(t, 0)
return [t for t in tasks if self.timesSolved.get(t,0) < 2 ]
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio):
numRandom = int(randomRatio * taskBatchSize)
numEntropy = taskBatchSize - numRandom
eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks)))
eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom))
taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks)
sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1])
entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]]
randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom)
batch = entropyBatch + randomBatch
return batch
def kNearestNeighbors(ec_result, tasks, k, task):
"""Finds the k nearest neighbors in the recognition model logProduction space to a given task."""
import numpy as np
cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks)
argSort = np.argsort(-cosDistance)# Want the greatest similarity.
topK = argSort[:k]
topKTasks = list(np.array(tasks)[topK])
return topKTasks
class RandomkNNTaskBatcher:
"""Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d" % taskBatchSize)
return random.sample(tasks, taskBatchSize)
else:
randomTask = random.choice(tasks)
kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
return [randomTask] + kNN
class RandomLowEntropykNNTaskBatcher:
"""Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]
if taskBatchSize is None:
return unsolvedTasks
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
return random.sample(unsolvedTasks, taskBatchSize)
else:
lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)
randomTask = random.choice(lowEntropyUnsolved)
kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
return [randomTask] + kNN
class UnsolvedEntropyTaskBatcher:
"""Returns tasks that have never been solved at any previous iteration.
Given a task batch size, returns the unsolved tasks with the lowest entropy."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]
if taskBatchSize is None:
return unsolvedTasks
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
return random.sample(unsolvedTasks, taskBatchSize)
else:
return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)
class UnsolvedRandomEntropyTaskBatcher:
"""Returns tasks that have never been solved at any previous iteration.
Given a task batch size, returns a mix of unsolved tasks with percentRandom
selected randomly and the remaining selected by lowest entropy."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]
if taskBatchSize is None:
return unsolvedTasks
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
return random.sample(unsolvedTasks, taskBatchSize)
else:
return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5)
|