Fraser-Greenlee commited on
Commit
bbad868
1 Parent(s): c623bcf

update dreamcoder

Browse files
dreamcoder/domains/list/main.py CHANGED
@@ -260,24 +260,23 @@ def list_options(parser):
260
  parser.add_argument("--random-seed", type=int, default=17)
261
 
262
 
263
- def main(args):
264
  """
265
  Takes the return value of the `commandlineArguments()` function as input and
266
  trains/tests the model on manipulating sequences of numbers.
267
  """
268
- random.seed(args.pop("random_seed"))
269
 
270
- dataset = args.pop("dataset")
271
  tasks = {
272
  "Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
273
  "bootstrap": make_list_bootstrap_tasks,
274
  "sorting": sortBootstrap,
275
- "Lucas-depth1": lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
276
- "Lucas-depth2": lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
277
- "Lucas-depth3": lambda: retrieveJSONTasks("data/list_tasks2.json"),
 
278
  }[dataset]()
279
 
280
- maxTasks = args.pop("maxTasks")
281
  if maxTasks and len(tasks) > maxTasks:
282
  necessaryTasks = [] # maxTasks will not consider these
283
  if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
@@ -344,67 +343,4 @@ def main(args):
344
  return all( len(xs) == 1 and xs[0] == y for xs, y in t.examples )
345
  eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function")
346
  tasks = [t for t in tasks if not isIdentityTask(t) ]
347
-
348
- prims = {"base": basePrimitives,
349
- "McCarthy": McCarthyPrimitives,
350
- "common": bootstrapTarget_extra,
351
- "noLength": no_length,
352
- "rich": primitives}[args.pop("primitives")]()
353
- haveLength = not args.pop("noLength")
354
- haveMap = not args.pop("noMap")
355
- haveUnfold = not args.pop("noUnfold")
356
- eprint(f"Including map as a primitive? {haveMap}")
357
- eprint(f"Including length as a primitive? {haveLength}")
358
- eprint(f"Including unfold as a primitive? {haveUnfold}")
359
- baseGrammar = Grammar.uniform([p
360
- for p in prims
361
- if (p.name != "map" or haveMap) and \
362
- (p.name != "unfold" or haveUnfold) and \
363
- (p.name != "length" or haveLength)])
364
-
365
- extractor = {
366
- "learned": LearnedFeatureExtractor,
367
- }[args.pop("extractor")]
368
- extractor.H = args.pop("hidden")
369
-
370
- timestamp = datetime.datetime.now().isoformat()
371
- outputDirectory = "experimentOutputs/list/%s"%timestamp
372
- os.system("mkdir -p %s"%outputDirectory)
373
-
374
- args.update({
375
- "featureExtractor": extractor,
376
- "outputPrefix": "%s/list"%outputDirectory,
377
- "evaluationTimeout": 0.0005,
378
- })
379
-
380
-
381
- eprint("Got {} list tasks".format(len(tasks)))
382
- split = args.pop("split")
383
- if split:
384
- train_some = defaultdict(list)
385
- for t in tasks:
386
- necessary = train_necessary(t)
387
- if not necessary:
388
- continue
389
- if necessary == "some":
390
- train_some[t.name.split()[0]].append(t)
391
- else:
392
- t.mustTrain = True
393
- for k in sorted(train_some):
394
- ts = train_some[k]
395
- random.shuffle(ts)
396
- ts.pop().mustTrain = True
397
-
398
- test, train = testTrainSplit(tasks, split)
399
- if True:
400
- test = [t for t in test
401
- if t.name not in EASYLISTTASKS]
402
-
403
- eprint(
404
- "Alotted {} tasks for training and {} for testing".format(
405
- len(train), len(test)))
406
- else:
407
- train = tasks
408
- test = []
409
-
410
- explorationCompression(baseGrammar, train, testingTasks=test, **args)
260
  parser.add_argument("--random-seed", type=int, default=17)
261
 
262
 
263
+ def main(dataset='Lucas-old', maxTasks=10_000):
264
  """
265
  Takes the return value of the `commandlineArguments()` function as input and
266
  trains/tests the model on manipulating sequences of numbers.
267
  """
268
+ random.seed(9)
269
 
 
270
  tasks = {
271
  "Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
272
  "bootstrap": make_list_bootstrap_tasks,
273
  "sorting": sortBootstrap,
274
+ # removed as file over 10MB
275
+ # "Lucas-depth1": lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
276
+ # "Lucas-depth2": lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
277
+ # "Lucas-depth3": lambda: retrieveJSONTasks("data/list_tasks2.json"),
278
  }[dataset]()
279
 
 
280
  if maxTasks and len(tasks) > maxTasks:
281
  necessaryTasks = [] # maxTasks will not consider these
282
  if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
343
  return all( len(xs) == 1 and xs[0] == y for xs, y in t.examples )
344
  eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function")
345
  tasks = [t for t in tasks if not isIdentityTask(t) ]
346
+ return tasks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dreamcoder/domains/list/makeListTasks.py CHANGED
@@ -85,7 +85,7 @@ def make_list_task(name, examples, **params):
85
  yield Task(name, program_type, examples, cache=cache)
86
 
87
 
88
- def make_list_tasks(n_examples):
89
  import listroutines as lr
90
 
91
  for routine in lr.find(count=100): # all routines
85
  yield Task(name, program_type, examples, cache=cache)
86
 
87
 
88
+ def make_list_tasks(n_examples=4):
89
  import listroutines as lr
90
 
91
  for routine in lr.find(count=100): # all routines
dreamcoder/domains/text/makeTextTasks.py CHANGED
@@ -91,12 +91,10 @@ def randomWords(ds, minimum=1, lb=2, ub=4):
91
  return s
92
 
93
 
94
- def makeTasks():
95
  import random
96
  random.seed(9)
97
 
98
- NUMBEROFEXAMPLES = 4
99
-
100
  problems = []
101
 
102
  def toList(s): return [c for c in s]
@@ -108,7 +106,7 @@ def makeTasks():
108
  if isinstance(x, list):
109
  return [preprocess(z) for z in x]
110
  if isinstance(x, str):
111
- return [c for c in x]
112
  if isinstance(x, bool):
113
  return x
114
  assert False
@@ -126,25 +124,25 @@ def makeTasks():
126
  if d1 != d2:
127
  problem("Replace '%s' w/ '%s'" % (d1, d2),
128
  [((x,), x.replace(d1, d2))
129
- for _ in range(NUMBEROFEXAMPLES)
130
  for x in [randomWords(d1)]],
131
  needToTrain=False)
132
  for d in delimiters:
133
  problem("drop first word delimited by '%s'" % d,
134
  [((x,), d.join(x.split(d)[1:]))
135
- for _ in range(NUMBEROFEXAMPLES)
136
  for x in [randomWords(d)]],
137
  needToTrain=True)
138
  for n in [0, 1, -1]:
139
  problem("nth (n=%d) word delimited by '%s'" % (n, d),
140
  [((x,), x.split(d)[n])
141
- for _ in range(NUMBEROFEXAMPLES)
142
  for x in [randomWords(d)]],
143
  needToTrain=True)
144
  for d1 in delimiters:
145
  problem("Append two words delimited by '%s'" % (d1),
146
  [((x, y), x + d1 + y)
147
- for _ in range(NUMBEROFEXAMPLES)
148
  for x in [randomWord()]
149
  for y in [randomWord()]],
150
  needToTrain=True)
@@ -154,20 +152,20 @@ def makeTasks():
154
  :len(delimiters)]:
155
  problem("Append two words delimited by '%s%s'" % (d1, d2),
156
  [((x, y), x + d1 + d2 + y)
157
- for _ in range(NUMBEROFEXAMPLES)
158
  for x in [randomWord()]
159
  for y in [randomWord()]],
160
  needToTrain=True)
161
  for n in range(1, 6):
162
  problem("Drop last %d characters" % n,
163
  [((x,), x[:-n])
164
- for _ in range(NUMBEROFEXAMPLES)
165
  for x in [randomWord(minimum=n)]],
166
  needToTrain=True)
167
  if n > 1:
168
  problem("Take first %d characters" % n,
169
  [((x,), x[:n])
170
- for _ in range(NUMBEROFEXAMPLES)
171
  for x in [randomWord(minimum=n)]],
172
  needToTrain=True)
173
  for d1, d2 in randomPermutation(
@@ -176,7 +174,7 @@ def makeTasks():
176
  :len(delimiters)]:
177
  problem("Extract word delimited by '%s' - '%s'" % (d1, d2),
178
  [((a + d1 + b + d2 + c + d + e,), b)
179
- for _ in range(int(NUMBEROFEXAMPLES / 2))
180
  for d in [d1, d2]
181
  for a in [randomWord()]
182
  for b in [randomWord()]
@@ -187,7 +185,7 @@ def makeTasks():
187
  for n in range(len(delimiters)):
188
  problem("First letters of words (%s)" % ("I" * (1 + n)),
189
  [((x,), "".join(map(lambda z: z[0], x.split(' '))))
190
- for _ in range(NUMBEROFEXAMPLES)
191
  for x in [randomWords(' ')]
192
  ],
193
  needToTrain=True)
@@ -195,27 +193,27 @@ def makeTasks():
195
  for d in delimiters:
196
  problem("Take first character and append '%s'" % d,
197
  [((x,), x[0] + d)
198
- for _ in range(NUMBEROFEXAMPLES)
199
  for x in [randomWord()]],
200
  needToTrain=True)
201
 
202
  for n in range(len(delimiters)):
203
  problem("Abbreviate separate words (%s)" % ("I" * (n + 1)),
204
  [((x, y), "%s.%s." % (x[0], y[0]))
205
- for _ in range(NUMBEROFEXAMPLES)
206
  for y in [randomWord()]
207
  for x in [randomWord()]])
208
  d = delimiters[n]
209
  problem("Abbreviate words separated by '%s'" % d,
210
  [((x + d + y,), "%s.%s." % (x[0], y[0]))
211
- for _ in range(NUMBEROFEXAMPLES)
212
  for y in [randomWord()]
213
  for x in [randomWord()]])
214
 
215
  for n in range(len(delimiters)):
216
  problem("Append 2 strings (%s)" % ('I' * (n + 1)),
217
  [((x, y), x + y)
218
- for _ in range(NUMBEROFEXAMPLES)
219
  for y in [randomWord()]
220
  for x in [randomWord()]],
221
  needToTrain=True)
@@ -224,33 +222,33 @@ def makeTasks():
224
  w = randomWord(minimum=3)
225
  problem("Prepend '%s'" % w,
226
  [((x,), w + x)
227
- for _ in range(NUMBEROFEXAMPLES)
228
  for x in [randomWord()]])
229
  w = randomWord(minimum=3)
230
  problem("Append '%s'" % w,
231
  [((x,), x + w)
232
- for _ in range(NUMBEROFEXAMPLES)
233
  for x in [randomWord()]])
234
  w = randomWord(minimum=3)
235
  problem("Prepend '%s' to first word" % w,
236
  [((x + ' ' + y,), w + x)
237
- for _ in range(NUMBEROFEXAMPLES)
238
  for x in [randomWord()]
239
  for y in [randomWord()]])
240
 
241
  for n in range(1,6):
242
  problem("parentheses around a single word (%s)"%('I'*n),
243
  [((w,),"(%s)"%w)
244
- for _ in range(NUMBEROFEXAMPLES)
245
  for w in [randomWord()] ])
246
  problem("parentheses around first word",
247
  [((w + " " + s,),"(%s)"%w)
248
- for _ in range(NUMBEROFEXAMPLES)
249
  for w in [randomWord()]
250
  for s in [randomWords(" ")] ])
251
  problem("parentheses around second word",
252
  [((s,), "(%s)"%(s.split(" ")[1]))
253
- for _ in range(NUMBEROFEXAMPLES)
254
  for s in [randomWords(" ")] ])
255
 
256
  allowed = [d for d in delimiters if d not in "()"]
@@ -258,7 +256,7 @@ def makeTasks():
258
  problem("parentheses around word delimited by '%s' & '%s'"%(d1,d2),
259
  [((prefix + d1 + word + d2 + suffix,),
260
  prefix + d1 + '(' + word + ')' + d2 + suffix)
261
- for _ in range(NUMBEROFEXAMPLES)
262
  for prefix in [randomWords("", lb=0, ub=1)]
263
  for suffix in [randomWords(allowed, ub=2, lb=1)]
264
  for word in [randomWord()] ])
@@ -267,7 +265,7 @@ def makeTasks():
267
  w = randomWord(minimum=3)
268
  problem("ensure suffix `%s`"%w,
269
  [ ((s + (w if f else ""),), s + w)
270
- for _ in range(NUMBEROFEXAMPLES)
271
  for s in [randomWords(" ")]
272
  for f in [random.choice([True,False])] ])
273
 
91
  return s
92
 
93
 
94
+ def makeTasks(n_examples = 4):
95
  import random
96
  random.seed(9)
97
 
 
 
98
  problems = []
99
 
100
  def toList(s): return [c for c in s]
106
  if isinstance(x, list):
107
  return [preprocess(z) for z in x]
108
  if isinstance(x, str):
109
+ return x
110
  if isinstance(x, bool):
111
  return x
112
  assert False
124
  if d1 != d2:
125
  problem("Replace '%s' w/ '%s'" % (d1, d2),
126
  [((x,), x.replace(d1, d2))
127
+ for _ in range(n_examples)
128
  for x in [randomWords(d1)]],
129
  needToTrain=False)
130
  for d in delimiters:
131
  problem("drop first word delimited by '%s'" % d,
132
  [((x,), d.join(x.split(d)[1:]))
133
+ for _ in range(n_examples)
134
  for x in [randomWords(d)]],
135
  needToTrain=True)
136
  for n in [0, 1, -1]:
137
  problem("nth (n=%d) word delimited by '%s'" % (n, d),
138
  [((x,), x.split(d)[n])
139
+ for _ in range(n_examples)
140
  for x in [randomWords(d)]],
141
  needToTrain=True)
142
  for d1 in delimiters:
143
  problem("Append two words delimited by '%s'" % (d1),
144
  [((x, y), x + d1 + y)
145
+ for _ in range(n_examples)
146
  for x in [randomWord()]
147
  for y in [randomWord()]],
148
  needToTrain=True)
152
  :len(delimiters)]:
153
  problem("Append two words delimited by '%s%s'" % (d1, d2),
154
  [((x, y), x + d1 + d2 + y)
155
+ for _ in range(n_examples)
156
  for x in [randomWord()]
157
  for y in [randomWord()]],
158
  needToTrain=True)
159
  for n in range(1, 6):
160
  problem("Drop last %d characters" % n,
161
  [((x,), x[:-n])
162
+ for _ in range(n_examples)
163
  for x in [randomWord(minimum=n)]],
164
  needToTrain=True)
165
  if n > 1:
166
  problem("Take first %d characters" % n,
167
  [((x,), x[:n])
168
+ for _ in range(n_examples)
169
  for x in [randomWord(minimum=n)]],
170
  needToTrain=True)
171
  for d1, d2 in randomPermutation(
174
  :len(delimiters)]:
175
  problem("Extract word delimited by '%s' - '%s'" % (d1, d2),
176
  [((a + d1 + b + d2 + c + d + e,), b)
177
+ for _ in range(int(n_examples / 2))
178
  for d in [d1, d2]
179
  for a in [randomWord()]
180
  for b in [randomWord()]
185
  for n in range(len(delimiters)):
186
  problem("First letters of words (%s)" % ("I" * (1 + n)),
187
  [((x,), "".join(map(lambda z: z[0], x.split(' '))))
188
+ for _ in range(n_examples)
189
  for x in [randomWords(' ')]
190
  ],
191
  needToTrain=True)
193
  for d in delimiters:
194
  problem("Take first character and append '%s'" % d,
195
  [((x,), x[0] + d)
196
+ for _ in range(n_examples)
197
  for x in [randomWord()]],
198
  needToTrain=True)
199
 
200
  for n in range(len(delimiters)):
201
  problem("Abbreviate separate words (%s)" % ("I" * (n + 1)),
202
  [((x, y), "%s.%s." % (x[0], y[0]))
203
+ for _ in range(n_examples)
204
  for y in [randomWord()]
205
  for x in [randomWord()]])
206
  d = delimiters[n]
207
  problem("Abbreviate words separated by '%s'" % d,
208
  [((x + d + y,), "%s.%s." % (x[0], y[0]))
209
+ for _ in range(n_examples)
210
  for y in [randomWord()]
211
  for x in [randomWord()]])
212
 
213
  for n in range(len(delimiters)):
214
  problem("Append 2 strings (%s)" % ('I' * (n + 1)),
215
  [((x, y), x + y)
216
+ for _ in range(n_examples)
217
  for y in [randomWord()]
218
  for x in [randomWord()]],
219
  needToTrain=True)
222
  w = randomWord(minimum=3)
223
  problem("Prepend '%s'" % w,
224
  [((x,), w + x)
225
+ for _ in range(n_examples)
226
  for x in [randomWord()]])
227
  w = randomWord(minimum=3)
228
  problem("Append '%s'" % w,
229
  [((x,), x + w)
230
+ for _ in range(n_examples)
231
  for x in [randomWord()]])
232
  w = randomWord(minimum=3)
233
  problem("Prepend '%s' to first word" % w,
234
  [((x + ' ' + y,), w + x)
235
+ for _ in range(n_examples)
236
  for x in [randomWord()]
237
  for y in [randomWord()]])
238
 
239
  for n in range(1,6):
240
  problem("parentheses around a single word (%s)"%('I'*n),
241
  [((w,),"(%s)"%w)
242
+ for _ in range(n_examples)
243
  for w in [randomWord()] ])
244
  problem("parentheses around first word",
245
  [((w + " " + s,),"(%s)"%w)
246
+ for _ in range(n_examples)
247
  for w in [randomWord()]
248
  for s in [randomWords(" ")] ])
249
  problem("parentheses around second word",
250
  [((s,), "(%s)"%(s.split(" ")[1]))
251
+ for _ in range(n_examples)
252
  for s in [randomWords(" ")] ])
253
 
254
  allowed = [d for d in delimiters if d not in "()"]
256
  problem("parentheses around word delimited by '%s' & '%s'"%(d1,d2),
257
  [((prefix + d1 + word + d2 + suffix,),
258
  prefix + d1 + '(' + word + ')' + d2 + suffix)
259
+ for _ in range(n_examples)
260
  for prefix in [randomWords("", lb=0, ub=1)]
261
  for suffix in [randomWords(allowed, ub=2, lb=1)]
262
  for word in [randomWord()] ])
265
  w = randomWord(minimum=3)
266
  problem("ensure suffix `%s`"%w,
267
  [ ((s + (w if f else ""),), s + w)
268
+ for _ in range(n_examples)
269
  for s in [randomWords(" ")]
270
  for f in [random.choice([True,False])] ])
271