# RWKV v5 / embedding init-range 1e-01 / 4k

- 96 layers
- 1024 embedding size

Going through the modified memory training for v5 models, across various initial embedding model weights

**Note:** This project assumes you have the rwkv-infctx-trainer conda env setup

# Basic Setup ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 2 max words, 400 samples - at ../dataset/word-2-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 5 max words, 400 samples - at ../dataset/gen-word-5-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 40 max words, 400 samples - at ../dataset/gen-word-40-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 15 max words, 400 samples - at ../dataset/gen-word-15-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 10 max words, 400 samples - at ../dataset/gen-word-10-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 652 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 35 max words, 400 samples - at ../dataset/gen-word-35-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 590 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 25 max words, 400 samples - at ../dataset/gen-word-25-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 1304 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 55 max words, 800 samples - at ../dataset/gen-word-55-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 1062 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 80 max words, 800 samples - at ../dataset/gen-word-80-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 1779 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 2602 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 877 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 1056 samples (20 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 806 samples (20 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 60 max words, 800 samples - at ../dataset/gen-word-60-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 716 samples (20 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 50 max words, 800 samples - at ../dataset/gen-word-50-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 880 samples (20 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 948 samples (20 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 748 samples (20 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 380 samples (20 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 5578 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 670 samples (20 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 65 max words, 800 samples - at ../dataset/gen-word-65-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 415 samples (20 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 30 max words, 400 samples - at ../dataset/gen-word-30-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 291 samples (20 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 345 samples (20 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 45 max words, 400 samples - at ../dataset/gen-word-45-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 20 max words, 400 samples - at ../dataset/gen-word-20-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 368 samples (20 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 564 samples (20 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 752 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 537 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 70 max words, 800 samples - at ../dataset/gen-word-70-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 358 samples (20 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 95 max words, 800 samples - at ../dataset/gen-word-95-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 105 max words, 800 samples - at ../dataset/gen-word-105-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 130 max words, 800 samples - at ../dataset/gen-word-130-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 85 max words, 800 samples - at ../dataset/gen-word-85-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 619 samples (20 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 203 samples (20 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 593 samples (20 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 100 max words, 800 samples - at ../dataset/gen-word-100-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 192 samples (20 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 141 samples (20 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 218 samples (20 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 208 samples (20 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 140 samples (20 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 117 samples (20 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 277 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 125 max words, 800 samples - at ../dataset/gen-word-125-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 301 samples (20 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 197 samples (20 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 117 samples (20 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 180 max words, 800 samples - at ../dataset/gen-word-180-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 116 samples (20 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 120 samples (20 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 170 max words, 800 samples - at ../dataset/gen-word-170-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 139 samples (20 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 118 samples (20 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 140 samples (20 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 120 samples (20 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 143 samples (20 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 142 samples (20 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 75 max words, 800 samples - at ../dataset/gen-word-75-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 118 samples (20 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 145 samples (20 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 115 samples (20 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 289 samples (20 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 118 samples (20 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 140 samples (20 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 189 samples (20 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 200 samples (20 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 141 samples (20 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 184 samples (20 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 90 max words, 800 samples - at ../dataset/gen-word-90-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 140 samples (20 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 278 samples (20 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 278 samples (20 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 145 max words, 800 samples - at ../dataset/gen-word-145-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 276 samples (20 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 143 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 110 max words, 800 samples - at ../dataset/gen-word-110-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 287 samples (20 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 185 samples (20 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 141 samples (20 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 145 samples (20 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 118 samples (20 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 143 samples (20 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 200 samples (20 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 142 samples (20 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 160 max words, 800 samples - at ../dataset/gen-word-160-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 398 samples (20 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 319 samples (20 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 188 samples (20 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 141 samples (20 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 99 samples (20 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 215 max words, 800 samples - at ../dataset/gen-word-215-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 189 samples (20 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 186 samples (20 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 186 samples (20 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 155 max words, 800 samples - at ../dataset/gen-word-155-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 335 samples (20 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 117 samples (20 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 220 max words, 800 samples - at ../dataset/gen-word-220-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 305 samples (20 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 187 samples (20 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 200 samples (20 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 186 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 225 max words, 800 samples - at ../dataset/gen-word-225-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 120 max words, 800 samples - at ../dataset/gen-word-120-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 270 samples (20 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 115 max words, 800 samples - at ../dataset/gen-word-115-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 140 samples (20 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 118 samples (20 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 274 samples (20 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 116 samples (20 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 183 samples (20 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 115 samples (20 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 180 samples (20 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 120 samples (20 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 119 samples (20 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 140 max words, 800 samples - at ../dataset/gen-word-140-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 143 samples (20 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 324 samples (20 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 210 max words, 800 samples - at ../dataset/gen-word-210-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 205 max words, 800 samples - at ../dataset/gen-word-205-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 118 samples (20 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 250 max words, 800 samples - at ../dataset/gen-word-250-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 115 samples (20 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 140 samples (20 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 245 max words, 800 samples - at ../dataset/gen-word-245-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 165 max words, 800 samples - at ../dataset/gen-word-165-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 116 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 115 samples (20 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 185 samples (20 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 150 max words, 800 samples - at ../dataset/gen-word-150-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 185 max words, 800 samples - at ../dataset/gen-word-185-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 135 max words, 800 samples - at ../dataset/gen-word-135-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 100 samples (20 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 200 max words, 800 samples - at ../dataset/gen-word-200-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 240 max words, 800 samples - at ../dataset/gen-word-240-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated a single JSONL file with 143 samples (20 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 330 max words, 800 samples - at ../dataset/gen-word-330-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 255 max words, 800 samples - at ../dataset/gen-word-255-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 350 max words, 800 samples - at ../dataset/gen-word-350-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 290 max words, 800 samples - at ../dataset/gen-word-290-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 345 max words, 800 samples - at ../dataset/gen-word-345-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 190 max words, 800 samples - at ../dataset/gen-word-190-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 295 max words, 800 samples - at ../dataset/gen-word-295-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 320 max words, 800 samples - at ../dataset/gen-word-320-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 415 max words, 800 samples - at ../dataset/gen-word-415-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 230 max words, 800 samples - at ../dataset/gen-word-230-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 395 max words, 800 samples - at ../dataset/gen-word-395-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 260 max words, 800 samples - at ../dataset/gen-word-260-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 265 max words, 800 samples - at ../dataset/gen-word-265-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 275 max words, 800 samples - at ../dataset/gen-word-275-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 175 max words, 800 samples - at ../dataset/gen-word-175-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 355 max words, 800 samples - at ../dataset/gen-word-355-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 195 max words, 800 samples - at ../dataset/gen-word-195-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 270 max words, 800 samples - at ../dataset/gen-word-270-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 370 max words, 800 samples - at ../dataset/gen-word-370-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 285 max words, 800 samples - at ../dataset/gen-word-285-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 235 max words, 800 samples - at ../dataset/gen-word-235-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 505 max words, 800 samples - at ../dataset/gen-word-505-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 365 max words, 800 samples - at ../dataset/gen-word-365-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 450 max words, 800 samples - at ../dataset/gen-word-450-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 280 max words, 800 samples - at ../dataset/gen-word-280-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 315 max words, 800 samples - at ../dataset/gen-word-315-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 300 max words, 800 samples - at ../dataset/gen-word-300-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 305 max words, 800 samples - at ../dataset/gen-word-305-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 385 max words, 800 samples - at ../dataset/gen-word-385-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 430 max words, 800 samples - at ../dataset/gen-word-430-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 360 max words, 800 samples - at ../dataset/gen-word-360-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 525 max words, 800 samples - at ../dataset/gen-word-525-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 340 max words, 800 samples - at ../dataset/gen-word-340-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 405 max words, 800 samples - at ../dataset/gen-word-405-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 420 max words, 800 samples - at ../dataset/gen-word-420-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 390 max words, 800 samples - at ../dataset/gen-word-390-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 495 max words, 800 samples - at ../dataset/gen-word-495-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 500 max words, 800 samples - at ../dataset/gen-word-500-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 375 max words, 800 samples - at ../dataset/gen-word-375-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 325 max words, 800 samples - at ../dataset/gen-word-325-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 520 max words, 800 samples - at ../dataset/gen-word-520-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 310 max words, 800 samples - at ../dataset/gen-word-310-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 410 max words, 800 samples - at ../dataset/gen-word-410-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 380 max words, 800 samples - at ../dataset/gen-word-380-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 460 max words, 800 samples - at ../dataset/gen-word-460-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 400 max words, 800 samples - at ../dataset/gen-word-400-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 470 max words, 800 samples - at ../dataset/gen-word-470-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 335 max words, 800 samples - at ../dataset/gen-word-335-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 530 max words, 800 samples - at ../dataset/gen-word-530-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 540 max words, 800 samples - at ../dataset/gen-word-540-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 485 max words, 800 samples - at ../dataset/gen-word-485-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 550 max words, 800 samples - at ../dataset/gen-word-550-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 435 max words, 800 samples - at ../dataset/gen-word-435-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 425 max words, 800 samples - at ../dataset/gen-word-425-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 455 max words, 800 samples - at ../dataset/gen-word-455-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 465 max words, 800 samples - at ../dataset/gen-word-465-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 510 max words, 800 samples - at ../dataset/gen-word-510-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 490 max words, 800 samples - at ../dataset/gen-word-490-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 445 max words, 800 samples - at ../dataset/gen-word-445-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 545 max words, 800 samples - at ../dataset/gen-word-545-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 480 max words, 800 samples - at ../dataset/gen-word-480-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 440 max words, 800 samples - at ../dataset/gen-word-440-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 475 max words, 800 samples - at ../dataset/gen-word-475-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 515 max words, 800 samples - at ../dataset/gen-word-515-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated JSONL file with - 535 max words, 800 samples - at ../dataset/gen-word-535-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "## Done ##\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total 297M\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "drwxr-xr-x 2 root root 12K Aug 31 21:58 .\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "drwxr-xr-x 14 root root 302 Aug 31 21:58 ..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 78K Aug 31 21:58 gen-word-10-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 858K Aug 31 21:58 gen-word-100-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 895K Aug 31 21:58 gen-word-105-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 936K Aug 31 21:58 gen-word-110-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 968K Aug 31 21:58 gen-word-115-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1008K Aug 31 21:58 gen-word-120-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.1M Aug 31 21:58 gen-word-125-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.1M Aug 31 21:58 gen-word-130-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.2M Aug 31 21:58 gen-word-135-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.2M Aug 31 21:58 gen-word-140-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.2M Aug 31 21:58 gen-word-145-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 98K Aug 31 21:58 gen-word-15-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.3M Aug 31 21:58 gen-word-150-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.3M Aug 31 21:58 gen-word-155-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.3M Aug 31 21:58 gen-word-160-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.4M Aug 31 21:58 gen-word-165-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.4M Aug 31 21:58 gen-word-170-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.4M Aug 31 21:58 gen-word-175-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.5M Aug 31 21:58 gen-word-180-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.5M Aug 31 21:58 gen-word-185-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.6M Aug 31 21:58 gen-word-190-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.6M Aug 31 21:58 gen-word-195-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 118K Aug 31 21:58 gen-word-20-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.6M Aug 31 21:58 gen-word-200-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.7M Aug 31 21:58 gen-word-205-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.7M Aug 31 21:58 gen-word-210-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.7M Aug 31 21:58 gen-word-215-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.8M Aug 31 21:58 gen-word-220-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.8M Aug 31 21:58 gen-word-225-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.9M Aug 31 21:58 gen-word-230-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 1.9M Aug 31 21:58 gen-word-235-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.0M Aug 31 21:58 gen-word-240-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.0M Aug 31 21:58 gen-word-245-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 138K Aug 31 21:58 gen-word-25-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.0M Aug 31 21:58 gen-word-250-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.1M Aug 31 21:58 gen-word-255-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.1M Aug 31 21:58 gen-word-260-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.1M Aug 31 21:58 gen-word-265-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.2M Aug 31 21:58 gen-word-270-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.2M Aug 31 21:58 gen-word-275-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.2M Aug 31 21:58 gen-word-280-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.3M Aug 31 21:58 gen-word-285-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.3M Aug 31 21:58 gen-word-290-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.4M Aug 31 21:58 gen-word-295-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 156K Aug 31 21:58 gen-word-30-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.4M Aug 31 21:58 gen-word-300-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.5M Aug 31 21:58 gen-word-305-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.5M Aug 31 21:58 gen-word-310-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.5M Aug 31 21:58 gen-word-315-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.6M Aug 31 21:58 gen-word-320-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.6M Aug 31 21:58 gen-word-325-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.6M Aug 31 21:58 gen-word-330-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.7M Aug 31 21:58 gen-word-335-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.7M Aug 31 21:58 gen-word-340-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.7M Aug 31 21:58 gen-word-345-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 177K Aug 31 21:58 gen-word-35-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.8M Aug 31 21:58 gen-word-350-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.8M Aug 31 21:58 gen-word-355-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.9M Aug 31 21:58 gen-word-360-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.9M Aug 31 21:58 gen-word-365-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 2.9M Aug 31 21:58 gen-word-370-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.0M Aug 31 21:58 gen-word-375-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.0M Aug 31 21:58 gen-word-380-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.0M Aug 31 21:58 gen-word-385-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.1M Aug 31 21:58 gen-word-390-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.1M Aug 31 21:58 gen-word-395-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 196K Aug 31 21:58 gen-word-40-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.1M Aug 31 21:58 gen-word-400-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.2M Aug 31 21:58 gen-word-405-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.3M Aug 31 21:58 gen-word-410-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.3M Aug 31 21:58 gen-word-415-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.3M Aug 31 21:58 gen-word-420-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.3M Aug 31 21:58 gen-word-425-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.4M Aug 31 21:58 gen-word-430-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.4M Aug 31 21:58 gen-word-435-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.5M Aug 31 21:58 gen-word-440-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.5M Aug 31 21:58 gen-word-445-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 216K Aug 31 21:58 gen-word-45-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.5M Aug 31 21:58 gen-word-450-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.6M Aug 31 21:58 gen-word-455-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.6M Aug 31 21:58 gen-word-460-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.6M Aug 31 21:58 gen-word-465-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.7M Aug 31 21:58 gen-word-470-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.7M Aug 31 21:58 gen-word-475-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.8M Aug 31 21:58 gen-word-480-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.8M Aug 31 21:58 gen-word-485-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.8M Aug 31 21:58 gen-word-490-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.9M Aug 31 21:58 gen-word-495-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 59K Aug 31 21:58 gen-word-5-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 472K Aug 31 21:58 gen-word-50-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 3.9M Aug 31 21:58 gen-word-500-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.0M Aug 31 21:58 gen-word-505-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.0M Aug 31 21:58 gen-word-510-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.0M Aug 31 21:58 gen-word-515-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.1M Aug 31 21:58 gen-word-520-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.1M Aug 31 21:58 gen-word-525-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.2M Aug 31 21:58 gen-word-530-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.2M Aug 31 21:58 gen-word-535-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.2M Aug 31 21:58 gen-word-540-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.3M Aug 31 21:58 gen-word-545-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 509K Aug 31 21:58 gen-word-55-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 4.3M Aug 31 21:58 gen-word-550-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 545K Aug 31 21:58 gen-word-60-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 578K Aug 31 21:58 gen-word-65-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 624K Aug 31 21:58 gen-word-70-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 658K Aug 31 21:58 gen-word-75-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 710K Aug 31 21:58 gen-word-80-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 738K Aug 31 21:58 gen-word-85-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 772K Aug 31 21:58 gen-word-90-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 809K Aug 31 21:58 gen-word-95-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 523K Aug 31 21:58 shuffle-word-10-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 567K Aug 31 21:58 shuffle-word-100-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 557K Aug 31 21:58 shuffle-word-105-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 556K Aug 31 21:58 shuffle-word-110-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 551K Aug 31 21:58 shuffle-word-115-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 552K Aug 31 21:58 shuffle-word-120-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 555K Aug 31 21:58 shuffle-word-125-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 547K Aug 31 21:58 shuffle-word-130-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 547K Aug 31 21:58 shuffle-word-135-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 540K Aug 31 21:58 shuffle-word-140-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 547K Aug 31 21:58 shuffle-word-145-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 433K Aug 31 21:58 shuffle-word-15-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 544K Aug 31 21:58 shuffle-word-150-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 541K Aug 31 21:58 shuffle-word-155-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 541K Aug 31 21:58 shuffle-word-160-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 541K Aug 31 21:58 shuffle-word-165-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 545K Aug 31 21:58 shuffle-word-170-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 545K Aug 31 21:58 shuffle-word-175-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 541K Aug 31 21:58 shuffle-word-180-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 539K Aug 31 21:58 shuffle-word-185-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 540K Aug 31 21:58 shuffle-word-190-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 540K Aug 31 21:58 shuffle-word-195-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 393K Aug 31 21:58 shuffle-word-20-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 544K Aug 31 21:58 shuffle-word-200-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 537K Aug 31 21:58 shuffle-word-205-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 540K Aug 31 21:58 shuffle-word-210-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 537K Aug 31 21:58 shuffle-word-215-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 535K Aug 31 21:58 shuffle-word-220-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 532K Aug 31 21:58 shuffle-word-225-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 536K Aug 31 21:58 shuffle-word-230-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 534K Aug 31 21:58 shuffle-word-235-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 533K Aug 31 21:58 shuffle-word-240-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 533K Aug 31 21:58 shuffle-word-245-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 357K Aug 31 21:58 shuffle-word-25-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 534K Aug 31 21:58 shuffle-word-250-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 534K Aug 31 21:58 shuffle-word-255-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 534K Aug 31 21:58 shuffle-word-260-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 534K Aug 31 21:58 shuffle-word-265-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 532K Aug 31 21:58 shuffle-word-270-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 534K Aug 31 21:58 shuffle-word-275-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 533K Aug 31 21:58 shuffle-word-280-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 533K Aug 31 21:58 shuffle-word-285-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 535K Aug 31 21:58 shuffle-word-290-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 535K Aug 31 21:58 shuffle-word-295-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 349K Aug 31 21:58 shuffle-word-30-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 536K Aug 31 21:58 shuffle-word-300-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 532K Aug 31 21:58 shuffle-word-305-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-310-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 532K Aug 31 21:58 shuffle-word-315-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 532K Aug 31 21:58 shuffle-word-320-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 532K Aug 31 21:58 shuffle-word-325-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-330-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 526K Aug 31 21:58 shuffle-word-335-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 527K Aug 31 21:58 shuffle-word-340-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-345-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 326K Aug 31 21:58 shuffle-word-35-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 528K Aug 31 21:58 shuffle-word-350-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 533K Aug 31 21:58 shuffle-word-355-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-360-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-365-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-370-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-375-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 531K Aug 31 21:58 shuffle-word-380-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-385-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 531K Aug 31 21:58 shuffle-word-390-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-395-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 320K Aug 31 21:58 shuffle-word-40-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 531K Aug 31 21:58 shuffle-word-400-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 528K Aug 31 21:58 shuffle-word-405-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-410-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 524K Aug 31 21:58 shuffle-word-415-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 526K Aug 31 21:58 shuffle-word-420-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 525K Aug 31 21:58 shuffle-word-425-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-430-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 526K Aug 31 21:58 shuffle-word-435-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-440-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 528K Aug 31 21:58 shuffle-word-445-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 314K Aug 31 21:58 shuffle-word-45-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 527K Aug 31 21:58 shuffle-word-450-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 531K Aug 31 21:58 shuffle-word-455-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 526K Aug 31 21:58 shuffle-word-460-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 525K Aug 31 21:58 shuffle-word-465-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 527K Aug 31 21:58 shuffle-word-470-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 525K Aug 31 21:58 shuffle-word-475-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-480-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 528K Aug 31 21:58 shuffle-word-485-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 525K Aug 31 21:58 shuffle-word-490-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 529K Aug 31 21:58 shuffle-word-495-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 814K Aug 31 21:58 shuffle-word-5-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 621K Aug 31 21:58 shuffle-word-50-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-500-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 524K Aug 31 21:58 shuffle-word-505-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 528K Aug 31 21:58 shuffle-word-510-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 523K Aug 31 21:58 shuffle-word-515-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 531K Aug 31 21:58 shuffle-word-520-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 530K Aug 31 21:58 shuffle-word-525-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 527K Aug 31 21:58 shuffle-word-530-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 523K Aug 31 21:58 shuffle-word-535-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 524K Aug 31 21:58 shuffle-word-540-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 526K Aug 31 21:58 shuffle-word-545-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 597K Aug 31 21:58 shuffle-word-55-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 526K Aug 31 21:58 shuffle-word-550-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 603K Aug 31 21:58 shuffle-word-60-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 593K Aug 31 21:58 shuffle-word-65-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 580K Aug 31 21:58 shuffle-word-70-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 583K Aug 31 21:58 shuffle-word-75-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 574K Aug 31 21:58 shuffle-word-80-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 574K Aug 31 21:58 shuffle-word-85-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 571K Aug 31 21:58 shuffle-word-90-count.jsonl\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r-- 1 root root 574K Aug 31 21:58 shuffle-word-95-count.jsonl\n" - ########################################
# Generate the required jsonl dataset
########################################

# Reset the dataset dir
mkdir -p ../dataset
rm -rf ../dataset/*.jsonl

# Generate the various datasets
echo "## Generating word reptition dataset ##"

#
# We reduce the training set for lower word count - and shift the focus upwards
#
python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 400 &
for i in {5..45..5} 
do
 python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 400 & 
 python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & 
done

#
# Ramping up the 50+ - 510 words dataset
# 
for i in {50..550..5} 
do
 python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 800 & 
 python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & 
done

wait
echo "## Done ##"

ls -alh ../dataset/ "done\n", - "\n", - "#\n", - "# Ramping up the 50+ - 510 words dataset\n", - "# \n", - "for i in {50..550..5} \n", - "do\n", - " python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 800 & \n", - " python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n", - "done\n", - "\n", - "wait\n", - "echo \"## Done ##\"\n", - "\n", - "ls -alh ../dataset/" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d333baca", - "metadata": { - "execution": { - "iopub.execute_input": "2023-08-31T21:58:43.238676Z", - "iopub.status.busy": "2023-08-31T21:58:43.238125Z", - "iopub.status.idle": "2023-09-01T02:53:33.810375Z", - "shell.execute_reply": "2023-09-01T02:53:33.808544Z" - }, - "papermill": { - "duration": 17690.609691, - "end_time": "2023-09-01T02:53:33.812814", - "exception": false, - "start_time": "2023-08-31T21:58:43.203123", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - 