diff --git "a/encoding/vqgan-jax-encoding-yfcc100m.ipynb" "b/encoding/vqgan-jax-encoding-yfcc100m.ipynb"
--- "a/encoding/vqgan-jax-encoding-yfcc100m.ipynb"
+++ "b/encoding/vqgan-jax-encoding-yfcc100m.ipynb"
@@ -20,7 +20,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 92,
"id": "3b59489e",
"metadata": {},
"outputs": [],
@@ -38,6 +38,7 @@
"from torchvision.transforms import InterpolationMode\n",
"from torch.utils.data import Dataset, DataLoader\n",
"from torchvision.datasets.folder import default_loader\n",
+ "import os\n",
"\n",
"import jax\n",
"from jax import pmap"
@@ -61,7 +62,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 93,
"id": "2ca50dc7",
"metadata": {},
"outputs": [],
@@ -77,22 +78,22 @@
"We'll use a VQGAN trained by using Taming Transformers and converted to a JAX model."
]
},
- {
- "cell_type": "markdown",
- "id": "ad05a1bd",
- "metadata": {},
- "source": [
- "**Disabling** Does not work in my local system right now."
- ]
- },
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 167,
"id": "29ce8b15",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Working with z of shape (1, 256, 16, 16) = 65536 dimensions.\n"
+ ]
+ }
+ ],
"source": [
- "#model = VQModel.from_pretrained(\"flax-community/vqgan_f16_16384\")"
+ "model = VQModel.from_pretrained(\"flax-community/vqgan_f16_16384\")"
]
},
{
@@ -105,7 +106,7 @@
},
{
"cell_type": "code",
- "execution_count": 79,
+ "execution_count": 94,
"id": "33861477",
"metadata": {},
"outputs": [],
@@ -116,16 +117,16 @@
},
{
"cell_type": "code",
- "execution_count": 80,
+ "execution_count": 134,
"id": "81b19eca",
"metadata": {},
"outputs": [],
"source": [
- "yfcc100m = Path('/sddata/dalle-mini/YFCC100M_OpenAI_subset')\n",
+ "yfcc100m = Path('/home/khali/TPU-Test/YFCC100M_OpenAI_subset')\n",
"# Images are 'sharded' from the following directory\n",
- "yfcc100m_images = yfcc100m/'data'/'images'\n",
+ "yfcc100m_images = yfcc100m/'data'/'data'/'images'\n",
"yfcc100m_metadata = yfcc100m/'metadata_YFCC100M.jsonl'\n",
- "yfcc100m_output = yfcc100m/'metadata_encoded.jsonl'"
+ "yfcc100m_output = yfcc100m/'metadata_encoded.tsv'"
]
},
{
@@ -146,7 +147,7 @@
},
{
"cell_type": "code",
- "execution_count": 81,
+ "execution_count": 96,
"id": "7811648c",
"metadata": {},
"outputs": [],
@@ -157,1255 +158,803 @@
},
{
"cell_type": "code",
- "execution_count": 82,
- "id": "753659fe",
+ "execution_count": 10,
+ "id": "4811a230",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Using custom data configuration default-57592e8ed16d752b\n",
- "Reusing dataset json (/home/pedro/.cache/huggingface/datasets/json/default-57592e8ed16d752b/0.0.0/793d004298099bd3c4e61eb7878475bcf1dc212bf2e34437d85126758720d7f9)\n"
+ "tcmalloc: large alloc 1254047744 bytes == 0xb2b08000 @ 0x7f9e78632680 0x7f9e78653824 0x585b92 0x504d56 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56 0x56acb6 0x5f5956 0x56aadf 0x5f5956 0x56acb6 0x568d9a 0x5f5b33 0x50b7f8 0x5f2702 0x56c332\n",
+ "tcmalloc: large alloc 1254047744 bytes == 0xfd74e000 @ 0x7f9e78632680 0x7f9e78653824 0x590214 0x586f90 0x56e1f3 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56 0x56acb6 0x5f5956 0x56aadf 0x5f5956 0x56acb6 0x568d9a 0x5f5b33 0x50b7f8 0x5f2702 0x56c332\n",
+ "tcmalloc: large alloc 5016190976 bytes == 0x148b42000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5019099136 bytes == 0x273f12000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5019811840 bytes == 0x39f9a8000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5024571392 bytes == 0x4cb4ec000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5021097984 bytes == 0x4cb4ec000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5022818304 bytes == 0x4cb4ec000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5020794880 bytes == 0x4cb4ec000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5019451392 bytes == 0x39f9a8000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5020565504 bytes == 0x4cb4ec000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5012561920 bytes == 0x273f12000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5021835264 bytes == 0x5f6cba000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n",
+ "tcmalloc: large alloc 5017436160 bytes == 0x273f12000 @ 0x7f9e78632680 0x7f9e78653824 0x5b9144 0x7f9b2929127e 0x7f9b29291a19 0x7f9b29291886 0x7f9b29291cef 0x7f9b2928f204 0x5f2cc9 0x5f30ff 0x5705f6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x56acb6 0x5f5956 0x5a8cb3 0x56ae94 0x568d9a 0x68cdc7 0x5ff5d4 0x5c3cb0 0x56aadf 0x501148 0x56c422 0x501148 0x56c422 0x501148 0x504d56\n"
]
}
],
"source": [
- "dataset = load_dataset(\"json\", data_files=[str(yfcc100m_metadata)])"
+ "# The metadata is too bog to load into memory at once, so chopping it into chunks\n",
+ "chunk_size=1000000\n",
+ "batch_no=1\n",
+ "for chunk in pd.read_json(yfcc100m_metadata, orient=\"records\", lines=True,chunksize=chunk_size):\n",
+ " chunk.to_csv('./chunks/chunk'+str(batch_no)+'.tsv', sep=\"\\t\", index=False)\n",
+ " batch_no+=1"
]
},
{
"cell_type": "code",
- "execution_count": 83,
- "id": "9343df1b",
+ "execution_count": 25,
+ "id": "46b2f083",
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " photoid | \n",
+ " uid | \n",
+ " unickname | \n",
+ " datetaken | \n",
+ " dateuploaded | \n",
+ " capturedevice | \n",
+ " title | \n",
+ " description | \n",
+ " usertags | \n",
+ " machinetags | \n",
+ " ... | \n",
+ " licenseurl | \n",
+ " serverid | \n",
+ " farmid | \n",
+ " secret | \n",
+ " secretoriginal | \n",
+ " ext | \n",
+ " marker | \n",
+ " key | \n",
+ " title_clean | \n",
+ " description_clean | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 137943 | \n",
+ " 48600072071@N01 | \n",
+ " doctor+paradox | \n",
+ " 2004-08-01 18:13:06.0 | \n",
+ " 1091409186 | \n",
+ " NaN | \n",
+ " A+Picture+Share%21 | \n",
+ " Antenna | \n",
+ " cameraphone,cayugaheights,green,hydrant,ithaca... | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-sa/2.0/ | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1650c7cdc6 | \n",
+ " 1650c7cdc6 | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " d29e7c6a3028418c64eb15e3cf577c2 | \n",
+ " A Picture Share! | \n",
+ " Antenna | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1246361 | \n",
+ " 44124324682@N01 | \n",
+ " mharrsch | \n",
+ " 2004-11-03 23:04:02.0 | \n",
+ " 1099523042 | \n",
+ " NaN | \n",
+ " An+ornate+Roman+urn | \n",
+ " Photographed+at+the+%3Ca+href%3D%22http%3A%2F%... | \n",
+ " ancient,baltimore,burial,death,empire,funeral,... | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-sa/2.0/ | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " cf37054610 | \n",
+ " cf37054610 | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " d29f01b149167d683f9ddde464bb3db | \n",
+ " An ornate Roman urn | \n",
+ " Photographed at the Walters Art Museum, Baltim... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1251599 | \n",
+ " 51035803024@N01 | \n",
+ " bmitd67 | \n",
+ " 2004-10-30 17:09:32.0 | \n",
+ " 1099538888 | \n",
+ " Canon+PowerShot+S30 | \n",
+ " Jai+%26+Tara+on+the+Cumberland | \n",
+ " Another+trip+for+the+happy+couple. | \n",
+ " blue+heron,cumberland+river,jai,tara,tennessee | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-sa/2.0/ | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 4a4234e32c | \n",
+ " 4a4234e32c | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " d296e9e34bdae41edb6c679ff824ab2a | \n",
+ " Jai & Tara on the Cumberland | \n",
+ " Another trip for the happy couple. | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2348587 | \n",
+ " 73621375@N00 | \n",
+ " Thom+Watson | \n",
+ " 2004-12-18 21:08:09.0 | \n",
+ " 1103497228 | \n",
+ " SONY+DSC-W1 | \n",
+ " Castle+gate+-+%22lite-brited%22 | \n",
+ " Taken+at+the+Miracle+of+Lights+display+in+Cent... | \n",
+ " bullrunpark,castle,centreville,christmas,decor... | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-sa/2.0/ | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 7162c974c3 | \n",
+ " 7162c974c3 | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " d29ce96395848478b1e8396e44899 | \n",
+ " Castle gate - \"lite-brited\" | \n",
+ " Taken at the Miracle of Lights display in Cent... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3516047 | \n",
+ " 48600072071@N01 | \n",
+ " doctor+paradox | \n",
+ " 2005-01-18 16:44:18.0 | \n",
+ " 1106084658 | \n",
+ " NaN | \n",
+ " A+Picture+Share%21 | \n",
+ " Tabular | \n",
+ " cameraphone,moblog,unfound | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-sa/2.0/ | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 663e0d8b3d | \n",
+ " 663e0d8b3d | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " d29abf32c4e12ff881f975b70e0cec0 | \n",
+ " A Picture Share! | \n",
+ " Tabular | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 999995 | \n",
+ " 4648651054 | \n",
+ " 24511045@N04 | \n",
+ " mtfrazier | \n",
+ " 2010-05-02 15:47:45.0 | \n",
+ " 1275083371 | \n",
+ " Canon+EOS+50D | \n",
+ " U.S.+Navy+Blue+Angels%3A+2010 | \n",
+ " 2+May+2010%0ASunday%0ASt.+Joseph%2C+Missouri | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-nd/2.0/ | \n",
+ " 4072 | \n",
+ " 5 | \n",
+ " 2d12d73fb0 | \n",
+ " dd5856ea42 | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " 60fa2911cb81eb25b356e9fee978aef | \n",
+ " U.S. Navy Blue Angels: 2010 | \n",
+ " 2 May 2010 Sunday St. Joseph, Missouri | \n",
+ "
\n",
+ " \n",
+ " 999996 | \n",
+ " 4652130996 | \n",
+ " 21963865@N04 | \n",
+ " GRAB1.0 | \n",
+ " 2010-05-29 19:23:10.0 | \n",
+ " 1275200833 | \n",
+ " SONY+DSLR-A230 | \n",
+ " Attempts+on+Her+Life | \n",
+ " BAPA+1+production+of+Martin+Crimp%27s+Attempts... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-nd/2.0/ | \n",
+ " 4003 | \n",
+ " 5 | \n",
+ " 8889121579 | \n",
+ " 2f46599456 | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " 60f5ef5ce4c2d24566226abebd67d4 | \n",
+ " Attempts on Her Life | \n",
+ " BAPA 1 production of Martin Crimp's Attempts o... | \n",
+ "
\n",
+ " \n",
+ " 999997 | \n",
+ " 4652568339 | \n",
+ " 64025277@N00 | \n",
+ " 1Sock | \n",
+ " 2010-05-13 15:38:37.0 | \n",
+ " 1275234267 | \n",
+ " Canon+EOS+DIGITAL+REBEL+XT | \n",
+ " Carlsbad+Caverns+3 | \n",
+ " %E2%99%A5%E2%99%A5%E2%99%A5%E2%99%A5%E2%99%A5%... | \n",
+ " carlsbad,carlsbad+caverns,cave,faa,new+mexico,... | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-nd/2.0/ | \n",
+ " 4010 | \n",
+ " 5 | \n",
+ " 0a1808a69e | \n",
+ " cf6d348e3d | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " 60f029482d1d1028fda5281daf498f | \n",
+ " Carlsbad Caverns 3 | \n",
+ " ♥♥♥♥♥♥♥ Interested in purchasing this photogra... | \n",
+ "
\n",
+ " \n",
+ " 999998 | \n",
+ " 4653110895 | \n",
+ " 20483509@N00 | \n",
+ " subberculture | \n",
+ " 2010-05-30 15:37:05.0 | \n",
+ " 1275245596 | \n",
+ " Canon+DIGITAL+IXUS+40 | \n",
+ " Want | \n",
+ " Isn%27t+that+gorgeous%3F | \n",
+ " 2010,edinburgh+museum,may,phonebox,wood | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-sa/2.0/ | \n",
+ " 4066 | \n",
+ " 5 | \n",
+ " 77c3b3a254 | \n",
+ " c4697e1511 | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " 60f72775f433cf8de3efaeb431866153 | \n",
+ " Want | \n",
+ " Isn't that gorgeous? | \n",
+ "
\n",
+ " \n",
+ " 999999 | \n",
+ " 4655503987 | \n",
+ " 8457193@N07 | \n",
+ " zackojones | \n",
+ " 2010-05-30 15:34:58.0 | \n",
+ " 1275310230 | \n",
+ " Canon+EOS+7D | \n",
+ " Summertime | \n",
+ " You+gotta+love+it%21 | \n",
+ " georgia,savannah,united+states,us | \n",
+ " NaN | \n",
+ " ... | \n",
+ " http://creativecommons.org/licenses/by-nc-sa/2.0/ | \n",
+ " 4043 | \n",
+ " 5 | \n",
+ " caff543bfe | \n",
+ " f60952ac4d | \n",
+ " jpg | \n",
+ " 0 | \n",
+ " 60f687e11b913bce461e9525d8047e0 | \n",
+ " Summertime | \n",
+ " You gotta love it! | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1000000 rows × 26 columns
\n",
+ "
"
+ ],
"text/plain": [
- "Dataset({\n",
- " features: ['photoid', 'uid', 'unickname', 'datetaken', 'dateuploaded', 'capturedevice', 'title', 'description', 'usertags', 'machinetags', 'longitude', 'latitude', 'accuracy', 'pageurl', 'downloadurl', 'licensename', 'licenseurl', 'serverid', 'farmid', 'secret', 'secretoriginal', 'ext', 'marker', 'key', 'title_clean', 'description_clean'],\n",
- " num_rows: 14825233\n",
- "})"
+ " photoid uid unickname datetaken \\\n",
+ "0 137943 48600072071@N01 doctor+paradox 2004-08-01 18:13:06.0 \n",
+ "1 1246361 44124324682@N01 mharrsch 2004-11-03 23:04:02.0 \n",
+ "2 1251599 51035803024@N01 bmitd67 2004-10-30 17:09:32.0 \n",
+ "3 2348587 73621375@N00 Thom+Watson 2004-12-18 21:08:09.0 \n",
+ "4 3516047 48600072071@N01 doctor+paradox 2005-01-18 16:44:18.0 \n",
+ "... ... ... ... ... \n",
+ "999995 4648651054 24511045@N04 mtfrazier 2010-05-02 15:47:45.0 \n",
+ "999996 4652130996 21963865@N04 GRAB1.0 2010-05-29 19:23:10.0 \n",
+ "999997 4652568339 64025277@N00 1Sock 2010-05-13 15:38:37.0 \n",
+ "999998 4653110895 20483509@N00 subberculture 2010-05-30 15:37:05.0 \n",
+ "999999 4655503987 8457193@N07 zackojones 2010-05-30 15:34:58.0 \n",
+ "\n",
+ " dateuploaded capturedevice \\\n",
+ "0 1091409186 NaN \n",
+ "1 1099523042 NaN \n",
+ "2 1099538888 Canon+PowerShot+S30 \n",
+ "3 1103497228 SONY+DSC-W1 \n",
+ "4 1106084658 NaN \n",
+ "... ... ... \n",
+ "999995 1275083371 Canon+EOS+50D \n",
+ "999996 1275200833 SONY+DSLR-A230 \n",
+ "999997 1275234267 Canon+EOS+DIGITAL+REBEL+XT \n",
+ "999998 1275245596 Canon+DIGITAL+IXUS+40 \n",
+ "999999 1275310230 Canon+EOS+7D \n",
+ "\n",
+ " title \\\n",
+ "0 A+Picture+Share%21 \n",
+ "1 An+ornate+Roman+urn \n",
+ "2 Jai+%26+Tara+on+the+Cumberland \n",
+ "3 Castle+gate+-+%22lite-brited%22 \n",
+ "4 A+Picture+Share%21 \n",
+ "... ... \n",
+ "999995 U.S.+Navy+Blue+Angels%3A+2010 \n",
+ "999996 Attempts+on+Her+Life \n",
+ "999997 Carlsbad+Caverns+3 \n",
+ "999998 Want \n",
+ "999999 Summertime \n",
+ "\n",
+ " description \\\n",
+ "0 Antenna \n",
+ "1 Photographed+at+the+%3Ca+href%3D%22http%3A%2F%... \n",
+ "2 Another+trip+for+the+happy+couple. \n",
+ "3 Taken+at+the+Miracle+of+Lights+display+in+Cent... \n",
+ "4 Tabular \n",
+ "... ... \n",
+ "999995 2+May+2010%0ASunday%0ASt.+Joseph%2C+Missouri \n",
+ "999996 BAPA+1+production+of+Martin+Crimp%27s+Attempts... \n",
+ "999997 %E2%99%A5%E2%99%A5%E2%99%A5%E2%99%A5%E2%99%A5%... \n",
+ "999998 Isn%27t+that+gorgeous%3F \n",
+ "999999 You+gotta+love+it%21 \n",
+ "\n",
+ " usertags machinetags ... \\\n",
+ "0 cameraphone,cayugaheights,green,hydrant,ithaca... NaN ... \n",
+ "1 ancient,baltimore,burial,death,empire,funeral,... NaN ... \n",
+ "2 blue+heron,cumberland+river,jai,tara,tennessee NaN ... \n",
+ "3 bullrunpark,castle,centreville,christmas,decor... NaN ... \n",
+ "4 cameraphone,moblog,unfound NaN ... \n",
+ "... ... ... ... \n",
+ "999995 NaN NaN ... \n",
+ "999996 NaN NaN ... \n",
+ "999997 carlsbad,carlsbad+caverns,cave,faa,new+mexico,... NaN ... \n",
+ "999998 2010,edinburgh+museum,may,phonebox,wood NaN ... \n",
+ "999999 georgia,savannah,united+states,us NaN ... \n",
+ "\n",
+ " licenseurl serverid farmid \\\n",
+ "0 http://creativecommons.org/licenses/by-nc-sa/2.0/ 1 1 \n",
+ "1 http://creativecommons.org/licenses/by-nc-sa/2.0/ 1 1 \n",
+ "2 http://creativecommons.org/licenses/by-nc-sa/2.0/ 1 1 \n",
+ "3 http://creativecommons.org/licenses/by-nc-sa/2.0/ 2 1 \n",
+ "4 http://creativecommons.org/licenses/by-nc-sa/2.0/ 3 1 \n",
+ "... ... ... ... \n",
+ "999995 http://creativecommons.org/licenses/by-nc-nd/2.0/ 4072 5 \n",
+ "999996 http://creativecommons.org/licenses/by-nc-nd/2.0/ 4003 5 \n",
+ "999997 http://creativecommons.org/licenses/by-nc-nd/2.0/ 4010 5 \n",
+ "999998 http://creativecommons.org/licenses/by-sa/2.0/ 4066 5 \n",
+ "999999 http://creativecommons.org/licenses/by-nc-sa/2.0/ 4043 5 \n",
+ "\n",
+ " secret secretoriginal ext marker \\\n",
+ "0 1650c7cdc6 1650c7cdc6 jpg 0 \n",
+ "1 cf37054610 cf37054610 jpg 0 \n",
+ "2 4a4234e32c 4a4234e32c jpg 0 \n",
+ "3 7162c974c3 7162c974c3 jpg 0 \n",
+ "4 663e0d8b3d 663e0d8b3d jpg 0 \n",
+ "... ... ... ... ... \n",
+ "999995 2d12d73fb0 dd5856ea42 jpg 0 \n",
+ "999996 8889121579 2f46599456 jpg 0 \n",
+ "999997 0a1808a69e cf6d348e3d jpg 0 \n",
+ "999998 77c3b3a254 c4697e1511 jpg 0 \n",
+ "999999 caff543bfe f60952ac4d jpg 0 \n",
+ "\n",
+ " key title_clean \\\n",
+ "0 d29e7c6a3028418c64eb15e3cf577c2 A Picture Share! \n",
+ "1 d29f01b149167d683f9ddde464bb3db An ornate Roman urn \n",
+ "2 d296e9e34bdae41edb6c679ff824ab2a Jai & Tara on the Cumberland \n",
+ "3 d29ce96395848478b1e8396e44899 Castle gate - \"lite-brited\" \n",
+ "4 d29abf32c4e12ff881f975b70e0cec0 A Picture Share! \n",
+ "... ... ... \n",
+ "999995 60fa2911cb81eb25b356e9fee978aef U.S. Navy Blue Angels: 2010 \n",
+ "999996 60f5ef5ce4c2d24566226abebd67d4 Attempts on Her Life \n",
+ "999997 60f029482d1d1028fda5281daf498f Carlsbad Caverns 3 \n",
+ "999998 60f72775f433cf8de3efaeb431866153 Want \n",
+ "999999 60f687e11b913bce461e9525d8047e0 Summertime \n",
+ "\n",
+ " description_clean \n",
+ "0 Antenna \n",
+ "1 Photographed at the Walters Art Museum, Baltim... \n",
+ "2 Another trip for the happy couple. \n",
+ "3 Taken at the Miracle of Lights display in Cent... \n",
+ "4 Tabular \n",
+ "... ... \n",
+ "999995 2 May 2010 Sunday St. Joseph, Missouri \n",
+ "999996 BAPA 1 production of Martin Crimp's Attempts o... \n",
+ "999997 ♥♥♥♥♥♥♥ Interested in purchasing this photogra... \n",
+ "999998 Isn't that gorgeous? \n",
+ "999999 You gotta love it! \n",
+ "\n",
+ "[1000000 rows x 26 columns]"
]
},
- "execution_count": 83,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "dataset = dataset['train']\n",
- "dataset"
+ "# looking up at a chunk\n",
+ "pd.read_csv(\"./chunks/chunk1.tsv\", sep=\"\\t\")"
]
},
{
"cell_type": "code",
- "execution_count": 84,
- "id": "c4794c29",
+ "execution_count": 98,
+ "id": "c51c5597",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " key | \n",
+ " title_clean | \n",
+ " description_clean | \n",
+ " ext | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " d29e7c6a3028418c64eb15e3cf577c2 | \n",
+ " A Picture Share! | \n",
+ " Antenna | \n",
+ " jpg | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " d29f01b149167d683f9ddde464bb3db | \n",
+ " An ornate Roman urn | \n",
+ " Photographed at the Walters Art Museum, Baltim... | \n",
+ " jpg | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " d296e9e34bdae41edb6c679ff824ab2a | \n",
+ " Jai & Tara on the Cumberland | \n",
+ " Another trip for the happy couple. | \n",
+ " jpg | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " d29ce96395848478b1e8396e44899 | \n",
+ " Castle gate - \"lite-brited\" | \n",
+ " Taken at the Miracle of Lights display in Cent... | \n",
+ " jpg | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " d29abf32c4e12ff881f975b70e0cec0 | \n",
+ " A Picture Share! | \n",
+ " Tabular | \n",
+ " jpg | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " key title_clean \\\n",
+ "0 d29e7c6a3028418c64eb15e3cf577c2 A Picture Share! \n",
+ "1 d29f01b149167d683f9ddde464bb3db An ornate Roman urn \n",
+ "2 d296e9e34bdae41edb6c679ff824ab2a Jai & Tara on the Cumberland \n",
+ "3 d29ce96395848478b1e8396e44899 Castle gate - \"lite-brited\" \n",
+ "4 d29abf32c4e12ff881f975b70e0cec0 A Picture Share! \n",
+ "\n",
+ " description_clean ext \n",
+ "0 Antenna jpg \n",
+ "1 Photographed at the Walters Art Museum, Baltim... jpg \n",
+ "2 Another trip for the happy couple. jpg \n",
+ "3 Taken at the Miracle of Lights display in Cent... jpg \n",
+ "4 Tabular jpg "
+ ]
+ },
+ "execution_count": 98,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Looking at a chunk with only the relevant columns that we need\n",
+ "df = pd.read_csv(\"./chunks/chunk1.tsv\", sep=\"\\t\")[[\"key\", \"title_clean\", \"description_clean\", \"ext\"]]\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc1668f8",
"metadata": {},
- "outputs": [],
"source": [
- "def image_exists(root: str, name: str, ext: str):\n",
- " image_path = (Path(root)/name[0:3]/name[3:6]/name).with_suffix(ext)\n",
- " return image_path.exists()"
+ "### Grabbing each chunks from the folder, cleaning it up, only taking the entries which image exist and appending it to the global df"
]
},
{
"cell_type": "code",
- "execution_count": 90,
- "id": "1b500078",
+ "execution_count": null,
+ "id": "abbcccf3",
"metadata": {},
"outputs": [],
"source": [
- "def select_existing_rows(examples):\n",
- " # Select lists we want to keep\n",
- " keys = examples['key']\n",
- " titles_clean = examples['title_clean']\n",
- " descriptions_clean = examples.get('description_clean', '')\n",
- " exts = examples['ext']\n",
- " \n",
- " result = {'key': [], 'title_clean': [], 'description_clean': [], 'ext': []}\n",
- " for i, image_name in enumerate(keys):\n",
- " print(i, image_name)\n",
- " if image_exists(root=str(yfcc100m_images), name=image_name, ext='.' + exts[i]):\n",
- " result[\"key\"].append(image_name)\n",
- " result[\"title_clean\"].append(titles_clean[i])\n",
- " result[\"description_clean\"].append(descriptions_clean[i])\n",
- " result[\"ext\"].append(exts[i])\n",
- " print(f'returning {len(result[\"key\"])}')\n",
- " return result"
+ "# the function that helps us to decide whether an image with certain id exists in storage, we only take the ones that we have the images for\n",
+ "def image_exists(item):\n",
+ " name, _, _, ext, _ = item\n",
+ " root=str(yfcc100m_images)\n",
+ " image_path = (Path(root)/name[0:3]/name[3:6]/name).with_suffix(\".\"+ext)\n",
+ " if image_path.exists():\n",
+ " return True\n",
+ " else:\n",
+ " return None"
]
},
{
"cell_type": "code",
- "execution_count": 91,
- "id": "467378c1",
+ "execution_count": 86,
+ "id": "44fa86ab",
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "b72e866c3f174e9e9aa2430e204f2baf",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Selecting rows with images that exist: 0%| | 0/14826 [00:00, ?ba/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0 d29e7c6a3028418c64eb15e3cf577c2\n",
- "1 d29f01b149167d683f9ddde464bb3db\n",
- "2 d296e9e34bdae41edb6c679ff824ab2a\n",
- "3 d29ce96395848478b1e8396e44899\n",
- "4 d29abf32c4e12ff881f975b70e0cec0\n",
- "5 d298a61f2f7be6c9e2c2af81755b489\n",
- "6 d29b1b973ab1a95a37cd4cda37999fb\n",
- "7 d290d566266ad568e94128d4135b41a\n",
- "8 d29b1ac2a497b0d9a4a43c3a51d13fb\n",
- "9 d29ebe6c96f53b2f5d7f5eed9b2b2898\n",
- "10 d29ec1b3f75749a231ee1d9d206baf6e\n",
- "11 d290bee419ce98d9a79ccf512a47a79\n",
- "12 d29bc1eff62a477131516c40a54f2dce\n",
- "13 d292a123bcf58e13128d2067593d81\n",
- "14 d294424637d532d8cfbcf2ca99b85f\n",
- "15 d29a51d8502f531115b108d59c811ab\n",
- "16 d29a9f0fce210c7e050877a53697031\n",
- "17 d290c750469f11795ed85fa62e4b52\n",
- "18 d29e13badf42d839b421478be4452dbe\n",
- "19 d29c1d635348aa35474a90f57aafb7\n",
- "20 d291a7c7c71455d5b3cdd97ca5e4c\n",
- "21 d295f95d7cb204dc812a476af5f4f8a\n",
- "22 d2932ecd1053165aa3d7b9e68547e0b6\n",
- "23 d29cd5a4b1d6a759b63df357ef2b\n",
- "24 d294e885117ca7d9b328c5b9388f52\n",
- "25 d2999b54832bb275a7e2eea47e98f11\n",
- "26 d29f89d491812beb84e62223b4541d7\n",
- "27 d2993599afe456ba786060129fc9cdfd\n",
- "28 d290ceb78d0f7c8c49930cd96b12b27\n",
- "29 d29db640e6943c341e3df6b4a815a17\n",
- "30 d29d8ae6354fd9c1613ac3750feb298\n",
- "31 d29b7d6de63ce541b71ceb78745fbfc\n",
- "32 d290cc9739e6f554b8f27f6496af5a6\n",
- "33 d29dcfe9b1c7381614d5bd8290d435\n",
- "34 d297e414783424b8d8339d8c9b54ca72\n",
- "35 d2918885fdc74e96cdec3fd49e409667\n",
- "36 d29d30289c89a2e9fc6234283b0397\n",
- "37 d291536c1f1b3be24034663d3e57c84b\n",
- "38 d29cccb1954ac8268963b8614d4541\n",
- "39 d2945c8676633807e51169a7d123f49\n",
- "40 d29b46d79ae192685dfebbafe681bf5\n",
- "41 d298c4194cae157a78d9c85f6965ced6\n",
- "42 d29f4f4050493b5b11bf029731250\n",
- "43 d29fff53248edfe2539d2e2edf9bc4e\n",
- "44 d2913017cf52bd6b239c16119ee955d\n",
- "45 d29b53425360629f945b442e1819182b\n",
- "46 d29f737d2137b6a58c8a3db2673dfa\n",
- "47 d295c31c46ee0107f3224a12bb18e0\n",
- "48 d29735527519d9efd3477ede346d077\n",
- "49 d29549dc1a2f31e65add5159aca6ca7e\n",
- "50 d29a56bba12ada01da573a325d2bbd\n",
- "51 d29eb4236f7a4564cb0368c98b9d15a\n",
- "52 d29417751c506a499af2bf9bba1c91dc\n",
- "53 d299335cdf1679c9a5fe6b655e22cfc6\n",
- "54 d29ad98d3118d6e21a94156f427812d3\n",
- "55 d29a7bd530fa6cec73f55f5fdec35\n",
- "56 d2983c9adab124234cf170b157d986aa\n",
- "57 d299aa496b76993e1fbfaca5fadefa82\n",
- "58 d29444db0139d8bdbdd96723aaba0\n",
- "59 d29c2fcbaeb4a6bd7b7eb13e467823\n",
- "60 d29b9d5c68124a5e56a4594974c9e7ec\n",
- "61 d29c941e4349d152939733a01debb9ce\n",
- "62 d2971493e2bdea2a48c3e3f7b9f3b9f9\n",
- "63 d29f28e4b5254594fb581803aeaf1d7\n",
- "64 d292145142221a995b2b17ef267fd5\n",
- "65 d299bbbad76835c2edf1f012bd899883\n",
- "66 d29462a3fdd5c994b2a09958f4413f39\n",
- "67 d294a4f2273696fbcbc52c9ae3fa4cc7\n",
- "68 d29b29cbf37f4b3278d34a9e2274cdc\n",
- "69 d294f51aaae56a1eca32fac5551330c9\n",
- "70 d29925d6bef3c5e318a6ef9461281ae2\n",
- "71 d2914df9dabb24aad3b9a14f76bcaaf5\n",
- "72 d290aae57d883ac77e882f082725753\n",
- "73 d29d63a276a23f42f2011975bbb1432\n",
- "74 d2914a2414672f2edc55afe090faa68b\n",
- "75 d2943dd2a17e8b96766f47a75beceb6\n",
- "76 d2915b7155c94423748e2a5f102d273d\n",
- "77 d292f2a2d138a6606ec6acbea4d4b8a6\n",
- "78 d29bf6fa58a7401fb2efd8f7b55473e\n",
- "79 d29a7085f4c7a3ef1caab1d33c7772\n",
- "80 d29e29aa2149be589126184fa6ba95b\n",
- "81 d29d6e2963309e7b5c5978ea71f593\n",
- "82 d291adb1933c79228e1fdfe1762f5b9f\n",
- "83 d294bcf7cc7eeb78fa6439c66c135359\n",
- "84 d29d12c15f67b3b6968abe771a5fd0\n",
- "85 d29ba798d4f1c1e8cbcb46beda14c8f\n",
- "86 d29056682c23fa206f7c952e512499ee\n",
- "87 d2969725825c9da68e49c0e7be8daf1d\n",
- "88 d291bc5a6b35c53f4117ad2415baa3\n",
- "89 d29d45ca9fb464c9c60561fae2948\n",
- "90 d291d29ff97c66bf1c6b4c956ed81\n",
- "91 d298d5e0d479c7ec8add78c4aa80\n",
- "92 d2943f69e59528b6e4a9a696763545\n",
- "93 d299e68a3d7d50448da951f8792693b\n",
- "94 d291ea441ccf3cc8f34073d4ce4d8d20\n",
- "95 d29680dcc84f825828708b3d9427a8ae\n",
- "96 d291201e72010356e91c88917aacfc\n",
- "97 d297a5b71895f7bfbfb3d156d8ed3b83\n",
- "98 d29d7e64a92ea1bd6ba08ff76c1a3bd8\n",
- "99 d291f75dc99bee639fd680af7e7a4fb0\n",
- "100 d29ddb6ca8a49f3369733f4d3a8887\n",
- "101 d292ff4251b9a36b19fa6cbbc87851f\n",
- "102 d29ca1839cfbb1c40ac2ba9cb30dea3\n",
- "103 d29919029e2eeec95d7a41e9472e86e\n",
- "104 d29b393acb76bf3dd45f1b4ef4f513\n",
- "105 d29bcff1a7b2f4b109b42e99a49cb7\n",
- "106 d2909c7f45b39247ba0c2ed811067ee\n",
- "107 d29964417f32c1420bf235613af9c9\n",
- "108 d29a9e825141d6605e6d9c4e658ae7f0\n",
- "109 d29f2e7fea71dc911403e8c2b12414f\n",
- "110 d2999b51a0e9a37f7d670ac979ae8c0\n",
- "111 d2944c2f78b758c1b141104492f3f2\n",
- "112 d2934e0baba9f344e85f7326e902b97\n",
- "113 d29bda80ffd5d46aa798d715ff2c0\n",
- "114 d2912c43811337b61f9c119f35781e56\n",
- "115 d2945a077f98fdbde0fca07e22a41\n",
- "116 d29aa68847eb4a66e71ab2626f84e\n",
- "117 d291edb7ec46c41048967ab971c3c29c\n",
- "118 d29ba93b2642d9f937316694da15b22b\n",
- "119 d292aa457e9d7c8ffe8e55761f7d4f5\n",
- "120 d29da11457a35654323fa3d93834a34\n",
- "121 d295ba252458b15eab8957ac679509c\n",
- "122 d299e43dd7d1ea48ddc02a823ddd7cc\n",
- "123 d29d94f1c378e42b6fe990f76b94fe6\n",
- "124 d29c52f12fede24a2caf6170655b558d\n",
- "125 d2927cdb877b23f7c8356f8619aafc\n",
- "126 d296d62fbd28d32a4d6f9f42a336ffdf\n",
- "127 d294691643464e92e6afea29b7e6784\n",
- "128 d29e9baa87e9b85136f85a397bcd126\n",
- "129 d29a539bdde71c478e3655983d1ea\n",
- "130 d29ff2366b398bac938d46e82945141\n",
- "131 d29998f21f7db92b129b3567da7a546\n",
- "132 d29c9c1908eeb3fd0fd8737ba53b59f\n",
- "133 d29ae0f149cc6b67fefcaa79add5b873\n",
- "134 d2988df2a66eccca85d12020f1896558\n",
- "135 d29f85ed0efd46c7a46539e18a622\n",
- "136 d296bfdc77c0cb5479361fdee3fa6\n",
- "137 d29479e2d8f288ea2445659243bea37\n",
- "138 d29bed60bd16db97ef5ec5f65cb7f1\n",
- "139 d29595ef1a163f68659b56815b2fb21\n",
- "140 d29f6ef4562c75bdc5d82dfd49394a\n",
- "141 d2911c3e4ed5a8574991583e9e713f49\n",
- "142 d29094b0977ffff0ff18a7715a96bda6\n",
- "143 d291a0104cc6b978ddfb6d9eee0ec7f\n",
- "144 d29d542a1dd0a0682c355e7ad45db14\n",
- "145 d29f79dd32c418886791cbdb4ebe90f1\n",
- "146 d29aeec1dced477c94bd396fc3127\n",
- "147 d293eaabfefd938d71278825339494\n",
- "148 d29e64339b9568b37fd4bdd7edb9836a\n",
- "149 d298a529ba8a82918fe096e81110981e\n",
- "150 d29ce5cf8539329ae4204e6ce327b2b\n",
- "151 d29142b3b0e68e94a8522dfd3a2b690\n",
- "152 d29e96e6b1b7eff768dff0ed8955edf1\n",
- "153 d29265dae85a59cdd9615daffbcefe5\n",
- "154 d29d9b1d50f747cb9adfc2b32fc32c\n",
- "155 d29c5e12470d4e366c4bb8fa9baddc\n",
- "156 d294fe93887dd377520c64839dc139\n",
- "157 d29371933762af2ed19cafc3dd4fabd9\n",
- "158 d2981a42980199daf93915c1eef93e\n",
- "159 d29ea4ef385cf37b9045b06b4ba7252\n",
- "160 d299a091d31f65129d3396a58f92642d\n",
- "161 d29f3628139676f4ceb6bfe0cbc9aaf6\n",
- "162 d29b114fa2616f46bcebb37f5d9f59d\n",
- "163 d299a8d174e1f54ec383a241a8bb50ae\n",
- "164 d2959843a6d4fc265637b63543a9419a\n",
- "165 d29638c081713aa4dc5cd32ce88c158d\n",
- "166 d2967886b6b0cbfb6b98b175822c956a\n",
- "167 d29a6bc47f95f7668fbf43a1a1e947\n",
- "168 d29e6265a813f02f2dcd114e5f2748be\n",
- "169 d29960478ff0d270e822e84644638f7\n",
- "170 d290a8ea4c1d4fda164e6ba0f978876c\n",
- "171 d2934645777ae1ec25b2a4dfeb6036ed\n",
- "172 d299ecad1261a988e0dcce7d1c7f6c42\n",
- "173 d29b6eb6dc77c7be34481fa54754c\n",
- "174 d2956739c7f8791ed7d4aff4f92b948\n",
- "175 d293aa871b9e4c6913c43e91cf48040\n",
- "176 d292d08afc78df9d38f6f1f1f28ec0\n",
- "177 d29f448b6f32d4b9b64719beaaab3e2\n",
- "178 d29e4bf8ed852972ff75aa6e4e6964\n",
- "179 d29a2088d388e1799d144cdd88025ff\n",
- "180 d29ad328dfb255da22af6aa72f15888\n",
- "181 d29cf0f71eee59ccdecc8cb9e1aef482\n",
- "182 d29a84ccf19a324aff1ece8d1831c8d\n",
- "183 d298b98b2dcd287d31b7e5446ba8284\n",
- "184 d29055e4ee873415ae12477e46fba\n",
- "185 d2922e6430bb75a26072149343cc191a\n",
- "186 d2955ecab0c76472d6f9dad25d165b8b\n",
- "187 d29113a376ed16c6c1a65370cb845e65\n",
- "188 d2944e25fb89b2466feb547d29e63975\n",
- "189 d29d40ee1841d4dcf143925564f347\n",
- "190 d2988f40eb3522d26574e7947840e116\n",
- "191 d29fbc1e7c3c137f783ba57222cd98f6\n",
- "192 d29fab615e3124424ba7eb1726c64b\n",
- "193 d29e8e17d964c0aea5e52fe3c8c28270\n",
- "194 d29b2838685267fc64f64a41211df2f5\n",
- "195 d29a8a9d8edacfece418c8ea31eaf6\n",
- "196 d29d6d371f46b5a31dd175a8a7f41\n",
- "197 d29fbb7cc17c4e374232af541d4e4240\n",
- "198 d293279e99aecab63ef09872b93f8d\n",
- "199 d29fb59416e4c74dbed06129b130d828\n",
- "200 d290dd1a636d24fa2fc96ef667f91a3\n",
- "201 d29dc37487691ba4417611ab9b1187\n",
- "202 d29f93114ac4dc3baace4d5e0bd3e57\n",
- "203 d294259c4b3c35a1559d7f742a36c034\n",
- "204 d29470f8213e53e2aa7164f3623ccc1f\n",
- "205 d29faf1d9d6b7f77deb5385ea85111f\n",
- "206 d29b18455b80dc37d81024d1ab6a99f\n",
- "207 d29eb148320276099cd37f74b5b9ed8\n",
- "208 d2961799bc3a8866450f9dedf3753da\n",
- "209 d29826568875bdfe2f9b479a7e64ac4\n",
- "210 d2928972156f2e5372cce81c5434655\n",
- "211 d2992f331841f8c161554ea0f41faff\n",
- "212 d291c33cbc5075d4be40b790c7435e\n",
- "213 d29c655b787cb0e02be7272a57686cfa\n",
- "214 d2939d29b1d1d5c3aa21d3b46127a1f8\n",
- "215 d29839b090823f6d915a2bf4338b5b\n",
- "216 d29334188dda37bfc48b84d1c2ad9f\n",
- "217 d29e7e8ebfaae9b98b4141eb5242fb1\n",
- "218 d295057d1bf5e65929485f92a4c1ab\n",
- "219 d29184e22ba47ae9c449fc21086a82\n",
- "220 d29e384d3221b25ec6221376c1a025d8\n",
- "221 d290b2c211d229a262082c646856d57\n",
- "222 d295472dea1c94a5a4a2443f83b82\n",
- "223 d29ca6ac3858de4c4a0b6fd88547aac\n",
- "224 d2956a39fbc3081f9fe394aff50aa\n",
- "225 d29dc4779f78f63bca6252c4bd65bd46\n",
- "226 d29960f8b3be7860b0af7497bd16d6bd\n",
- "227 d2912134838dc88b389b60571f73c358\n",
- "228 d295fc5a26d7da3cfa4fd4a824bf0\n",
- "229 d29b4efbfa2b68093ee70fb2381fbfc\n",
- "230 d293694f84e8ffafa8f42d55941bbd76\n",
- "231 d29833fb44ce2852ef764a8fa87631d4\n",
- "232 d296962ad593259fc695a76170c4f097\n",
- "233 d2915c686462926e7e53b112ddcd1ac2\n",
- "234 d298a5dcab9e7da513cf278d3e19f2\n",
- "235 d29266df8098c511507828f4632363\n",
- "236 d296413b2724e3c51954834ded46211b\n",
- "237 d29514a1ed5b50893427ebefe1e5f8ab\n",
- "238 d298bce28583de4cb34f279882d66bd\n",
- "239 d2906ed28478ab13689e4ef0165376a4\n",
- "240 d293d91cc7bad938f8cf20b372ac93b\n",
- "241 d29fc131a3fe8ca955e6b59d2c8c981\n",
- "242 d29215ab9be9ff9185479e24cf4ce56a\n",
- "243 d29f9bc02cb942b447e9f99db525c\n",
- "244 d29149d031f6d431bd3015219f275bd5\n",
- "245 d29aae5bdb354bd96e46cf598dadb0\n",
- "246 d2932362faf4ca9ac8697cbdd6c34bf\n",
- "247 d2929a57d3d8a6bea06b5f4f9afb452\n",
- "248 d29a5ac5d9ac764db856509640a5142a\n",
- "249 d299c482f3314c3dad947db3a3156\n",
- "250 d29d1183e0959ae7ac6c8e7b66374db\n",
- "251 d29953bd569ce38442fc69fbcd8bd83c\n",
- "252 d291a8ac4849b85a59b8c68dc752d0\n",
- "253 d291e4aed5fb43f4f325d11ad66af251\n",
- "254 d29675b4a3dfee18769a3ac674c1f3f\n",
- "255 d29e648fed5b29ad1c152aa9cea0c6fc\n",
- "256 d29b629bab26f59f9117f69a679db8\n",
- "257 d29c593a5f55b48a9aefc728a6ccda8c\n",
- "258 d29a4fe79f1cf5298b9feac44547d56\n",
- "259 d290c12092a99314f219c6a4e6387eb1\n",
- "260 d29fb6bcfb2d1e53fe919c26df36b578\n",
- "261 d29482e12c94103037566fa2b227b21\n",
- "262 d29e6ccc3336f45a4f216c83d5f72f1\n",
- "263 d2931ac79c95d7526bf543407a54c3\n",
- "264 d29450397c2bda2c15071e551dc4cfe\n",
- "265 d29bd6f895319993cfa28439d6f561e7\n",
- "266 d29038e417e3faf2e364fbae694f1a5f\n",
- "267 d2958160bdd4a498a8fdbb78f464b7\n",
- "268 d29b8defd5fc3ddf3048cf72176f19b\n",
- "269 d296bb80e1db5d5fd3379dc8d92122a\n",
- "270 d294c804331bd4d1c57f01642f61f7\n",
- "271 d29461e7a136d19d529d286f5f2fdf3\n",
- "272 d291d4646e9cddd811b3944076644810\n",
- "273 d299760693b62a5eae3f1b3b075d75\n",
- "274 d299bbb989b35ffd8f1ce25446f0d78c\n",
- "275 d29962a38b71686cf1b772e97325a78a\n",
- "276 d29f63192e11d1199628d01420131fef\n",
- "277 d296de83a8761980102db948e468ae6\n",
- "278 d291ab4d3f535975431e6af3b4640e6\n",
- "279 d2978ef0cb5ef834b43202e51e33dc1\n",
- "280 d29258d726278a56dd3919cc188a896\n",
- "281 d2915e331f6388e5ca27935d52b9148\n",
- "282 d29c604cc496ec9f12437c94fbac2864\n",
- "283 d29a46723eb8d9632698ca567bd5568d\n",
- "284 d29659d98358b82f7f5db4ef5a5e\n",
- "285 d296207f2b1399f9f1af5dfcda227947\n",
- "286 d2919ae436a2fb4c57d3b033b335a9ca\n",
- "287 d2934474c6a34db57e3ff64a1845aff\n",
- "288 d2992c53edf3df75ba139316ee933b\n",
- "289 d29c21b8de09a7635e787d148742371\n",
- "290 d29dac31e0d9ae8a8b1b9dc4463fd891\n",
- "291 d29f958ccbae9457c11cb31f7d96bb5c\n",
- "292 d2928d424132804e6fb2732910640\n",
- "293 d29947c65eb219959c9ca5e701e75ce\n",
- "294 d29c5d39572d4358239862d22b36ec6\n",
- "295 d2925f6193f32511b1afd8a1508a6179\n",
- "296 d29a91e0249311e7a9d58675153b1d33\n",
- "297 d2997d5a8416e7371663096084e3\n",
- "298 d29519b713f78f3d8fa21622c7681af2\n",
- "299 d292f7f7a6f8b994f62993eed8482f39\n",
- "300 d29b243ee2f2b8b043d2f8f09d611ce1\n",
- "301 d290d43bb17551605962218450a0f179\n",
- "302 d29ffa62178f054ec2094ed374db1c1\n",
- "303 d293de44bad13998ce435eb50b7bf\n",
- "304 d293e015cf9f815306545116ad229ab\n",
- "305 d2919a98989c3b9758b76ca2f1c7379\n",
- "306 d293ecf2a8a2e3e2074c45719737e91\n",
- "307 d29fa524eb54e62d943e9bfa7c489\n",
- "308 d29d70e5f39dd79c41bfa01367b3d96e\n",
- "309 d297a1b5faf9c99b1a862c8cde3a0d5\n",
- "310 d2954ff7073f7b8889b8fdaf9bf803\n",
- "311 d2934a55774d6351ca6f0bf85ae6bbd\n",
- "312 d297d2161bdeda48298bdf679089240\n",
- "313 d29dfbb7e8746e19c769ed08cf9ab4\n",
- "314 d29eac9d5137123aa873d7872a266572\n",
- "315 d2989a38a0351440c9c3ea531f48b782\n",
- "316 d294c2e6796f8f38524c4f36196f7d6\n",
- "317 d29e843d1b3e22f97c2121d849eb43\n",
- "318 d29b8763a0409c173651f57d2cc732\n",
- "319 d29fe485da174c44309b2d2893666583\n",
- "320 d29fbf9d5b504081915276afb3ff171\n",
- "321 d294b740bb69be81a4a05e945afcd26b\n",
- "322 d299cda76b9a1deafd15c8d029c814ca\n",
- "323 d29f87767b65523073bbc0d62f4a1137\n",
- "324 d29a59c28c2a20c9ea582bac4f3d803c\n",
- "325 d29292389f7dfef416fcc59f3dab3445\n",
- "326 d29f3eca85aaac96afd5b439733ce3a\n",
- "327 d2924afd49df91267aacbcd3a55d87e\n",
- "328 d299228447c3965210c9e2a0287d62bc\n",
- "329 d299dfd6e5ed848f24adb34a1e66fa81\n",
- "330 d29248b6cd344ac354ed3573c2ead3\n",
- "331 d2917a5b3d58c3a1bde2f19029d4db1\n",
- "332 d29a88af89763c811ad42f71e1adcd\n",
- "333 d29fad4f12b05bc6316078f393c594fc\n",
- "334 d293f6cd8ec2298d61c4c51bb43cb71\n",
- "335 d298e6a75eadc0c941252a9f2a7d53b1\n",
- "336 d290e285c5b531f06c921a5f20874ec6\n",
- "337 d29d3cb79958d1e43568f1c63d34e39c\n",
- "338 d29bbde017cfdde5da2e3fe91dc17e15\n",
- "339 d2992c66109486fe8e2585e81e547c94\n",
- "340 d29e5c8da4a9c63a963321b74cbe1b9\n",
- "341 d290552ac332bf7fe8925333cf3a132\n",
- "342 d29d596ffc266f1bc5f0637472499d55\n",
- "343 d29beee88cc15b282a2dda98d6331f5f\n",
- "344 d29357e935842c476dff143abfc5d1de\n",
- "345 d2968522cfd5bf7d71331553902af0\n",
- "346 d29b589144c4cef74c5888dd073b4\n",
- "347 d29f6794bb3380d5388abc10c1d18b68\n",
- "348 d294f6f9b9d1f9aa1cb516a095199c25\n",
- "349 d2977b12141f2a60378f424936328c2\n",
- "350 d2909ecedecb8fcfb434e1e563ac\n",
- "351 d293365d7f87b7390888e6df1e8b2b8\n",
- "352 d296aea77833d92f0a596fe811b28f9\n",
- "353 d29fc025178de971df2b671296e885a8\n",
- "354 d29d3e58a9f013c875a9728dfb7bc5b\n",
- "355 d29033a3c4e8957464ad3290a213e8\n",
- "356 d29c12a61ed81d6ec44664df4dfcf\n",
- "357 d29fa842cadb65b73211e71a91f160\n",
- "358 d29f5d646fc70ded5b8147931cc42bc\n",
- "359 d2929e6a9fb9d2b0dd2d6ed6212e1c44\n",
- "360 d29719b14e693377102b3c4153208158\n",
- "361 d296f8b2a05c5c840fd3bf274d24de\n",
- "362 d29d8ddb8d04a849453aebafb197ff6\n",
- "363 d29923d45ae9d1ce1b2f013d93cf076\n",
- "364 d29d77467587445028d29d9bb078739\n",
- "365 d293ad8814801a96ef38962c19c2fe2\n",
- "366 d299588fa03492cd5dc1f4699f66c6f3\n",
- "367 d291c3e7637a0827245c44ad9afd09a\n",
- "368 d29599987170f4af26232b57acd2\n",
- "369 d299c7086bd48d25ff6ed1bdbfd2fc6\n",
- "370 d2926f81cde09deba0c1c6267496ead3\n",
- "371 d296ae4789c8813ae39495c3d6c574\n",
- "372 d29f75b10f3ffbafced8c67fa76fb55\n",
- "373 d293cec43d8db67d452f15c8796c3fd\n",
- "374 d295132ca6905e8ed6ad506833d2061\n",
- "375 d29cb2d78a6c4714148eb5bbe9b5b64a\n",
- "376 d29ee1d7ab8a5d5246edc558c6faea\n",
- "377 d297cc2b8df01184d81cc24b0ccf450\n",
- "378 d292cb986c671084d12f6036607af6f\n",
- "379 d29dc92b9e1f40308b373c1feaba64b6\n",
- "380 d29f68ab824de5c9641813ca85ac691\n",
- "381 d29c1d9c597176505d43699a47d7ee2e\n",
- "382 d291acdbd759cb8da5cb32ec36ff7c8\n",
- "383 d296d76b5e39f47645de56169e8b75a\n",
- "384 d29cdb95cfa957d17f5c598e9a7d1fd\n",
- "385 d29afe6fc2ba2256a1d54d1841fd09a\n",
- "386 d2926e08a211c431631b9e3ad1ff3a\n",
- "387 d29c978c20ada05bbda59b5b4df01946\n",
- "388 d2905d2da31be4d896e01cb989b7a33b\n",
- "389 d298a3fcabed7531fa21f6e31ffa44\n",
- "390 d2912f36916842608b6f026888558\n",
- "391 d29979bec95bd937836ad8f513c4343\n",
- "392 d29a198573f1bdd60deabe1a1d0e669\n",
- "393 d294a57d4c3b634cba3c9786f83d746\n",
- "394 d29876388196118296732cc9de5c4c3\n",
- "395 d2904fcf257ab1d071adb3d0197fbea6\n",
- "396 d29de6c749995a657f686cc7312a66f0\n",
- "397 d29c1db1e79d573aaee391d16536b764\n",
- "398 d299d9295eec3275ac6766c777828a59\n",
- "399 d2921a88927731d2f586f923e8a83fc\n",
- "400 d29e39a136dad6ad234debf2f8d4facf\n",
- "401 d294cb1721f626926b83e263f18b55\n",
- "402 d2905bc59d4c1b55d3cd1bd2c5abd15c\n",
- "403 d294d51d4d2df9f7bd7ecc5a7fb8011\n",
- "404 d290c0f68de49420548fc674cf29b01\n",
- "405 d299d4a536ccb9f0722cc4fa44a32f13\n",
- "406 d29a3db0e17adda5d1b81986dbdbaed\n",
- "407 d29a8087a55cb855bf4019a44bb1dcf8\n",
- "408 d29fc641d016cafe244f5e1cd96ee2ab\n",
- "409 d29bcf7bfe6309e8f64a65d1b4f8e51\n",
- "410 d291bf19c28b57b8b86e43d04f2bcac\n",
- "411 d292bf6fa98d48cd5549b022328fef64\n",
- "412 d290644911735ee948b705a85e35c8\n",
- "413 d29df0f5a31d7cdc1b8d37ff824c9832\n",
- "414 d29450e886b5b1f825a9ebd50693638\n",
- "415 d293554f84d67d1d40fa834f2cdc19c5\n",
- "416 d297f65eb6778396ffa0fbc4a19217f4\n",
- "417 d291f16b3f979b1d8f5b6e90ac289073\n",
- "418 d29a9d3159b08ef8c7c0c59e90579758\n",
- "419 d29af2f570815fc2ca7475d4c994e28\n",
- "420 d29915b11f3faba2ef25837b5e2f3\n",
- "421 d292ba3bdbce131b4b8eaba527568c\n",
- "422 d29d4f93ee7b46b311aa51239528f\n",
- "423 d291a3a9822042b33f6fcb1c9784923b\n",
- "424 d2994c646d1a58a51e059e4866794f\n",
- "425 d29f5ae959310c31a201342a95ad0be\n",
- "426 d29b56938e0e851fea353dffcf7bdfa\n",
- "427 d29db91d89a1a8768ad269a57768755\n",
- "428 d29af5cbce13275d12295feb3c634838\n",
- "429 d29cf5c5ab5aa85e642e48e6af7fd21d\n",
- "430 d29fc380ed8a16c5ac9166a2b7a5587\n",
- "431 d29b43aaea1f53f10fe7a6dbada\n",
- "432 d29f689aa8dcc25f3e3633426a192a\n",
- "433 d29053b6bd4b624167afbd4f517b4b95\n",
- "434 d29145cf1aef12781ba46fd06bf974a\n",
- "435 d29dc0e926c665e23678acdeea24aaac\n",
- "436 d29c8e62cbfe4d14a646c9bbc8fcca5\n",
- "437 d29e8837c56f5267630e5ae169bfd7\n",
- "438 d29b74122114c49f36ab28ee4b97a2b\n",
- "439 d2987c6c6453c84fb481f9cf337a3c7\n",
- "440 d29173c29c7c5fcc92299e4b26f5efa\n",
- "441 d29a3731ab79ce73a4d593b9fec56cd\n",
- "442 d29aa2edfcd3bb4fb534027c494881\n",
- "443 d29d6222f59352c7883c9851cbb8d8d\n",
- "444 d296ed54c0a533659da6d5c8cdeed961\n",
- "445 d29f7ae54b96b167c4f3eb5772ba67\n",
- "446 d2962af850a2dcfc528eb58175e373b\n",
- "447 d29ce7f2934f6874fcebd63986193911\n",
- "448 d29a34ff699ecd4f359039902365f2ac\n",
- "449 d29da8178da009efa3663ef49a5bbbc\n",
- "450 d2986f88e06afb263353345729bc9fb0\n",
- "451 d297313017d2278a6b5414f0c32045ed\n",
- "452 d29492bd2642d4c1cb38cf556ba15141\n",
- "453 d292040f0d9472364211ba7f61858e\n",
- "454 d29b7145e356aa5c5636e0183ddb92e\n",
- "455 d29c45ab915f4d22ea37bee6b340c43a\n",
- "456 d29854a78878f17d04a4465ef1c1d33\n",
- "457 d299c9d4b94b8a6aca9a8c59a1a3d92\n",
- "458 d2994c6132ce56145bb5cb1040668360\n",
- "459 d2957b49289677c103bfa3c8c426ee2\n",
- "460 d29f6a26cd131dd82b3b54151431a4f8\n",
- "461 d2931df6c55137e6aa899bad4e4b02\n",
- "462 d29955bc44e4a34b495b72a355cb88\n",
- "463 d29c1dc294983dee299fc1d81caa72ad\n",
- "464 d29b5b3e656eb0ffc0494e01532cdcb\n",
- "465 d29fceda623a8e845eec231294b34289\n",
- "466 d2981ba75e6c7a85fe7dfceaebf659\n",
- "467 d2987a6bc7bfc7a2c6aff6fb466652e\n",
- "468 d29ab7c2bc5df4e1eda994590eeb294\n",
- "469 d2925d7b49ad90233c57b664e6ed6879\n",
- "470 d29ab251b6ea37c9868bf7724850e0a1\n",
- "471 d299902666d823548fe9cce52d60bad0\n",
- "472 d29851d275ac962358b158e0de1b13\n",
- "473 d2944768b1e5c897c545cbf240ea5367\n",
- "474 d29c87dac6749e7ecfac0860edab4b\n",
- "475 d29bf455ae5baf7df36aea784cad8edf\n",
- "476 d29d9e3fcba5e57e95b8a091c11a4e18\n",
- "477 d29de84d67cb66a9b4f68a4e36244d\n",
- "478 d297f29c3c57ba46c5e4759d7e275b92\n",
- "479 d299864d4459c82c9d417123fb7ed0e0\n",
- "480 d2956a07cc7b735cb2fc39940e079c2\n",
- "481 d29e8a8d225666058d1519cd1d9bcb\n",
- "482 d29ea1a95113e741e995195bef381eb\n",
- "483 d2924d93a760bbb9fdaa3580d6fb40\n",
- "484 d29a83c0ff462b1bee23b4914e28470\n",
- "485 d2933dd155f25bacd950a3e77785ae53\n",
- "486 d293a48ce261f88a756931de8fe812a2\n",
- "487 d2923e4fb7105a50964af5f793bb312\n",
- "488 d29ba0eb4b1aed94e2f514fa22d3144c\n",
- "489 d29bb961e35c744ceaa8d934f56c38\n",
- "490 d2996b3456a45a3d41238d9776d2e8d1\n",
- "491 d290558fbcd5a1c42c88e6a688391d5\n",
- "492 d29ca6fc962ba76eb2f6dd94dfa1b51\n",
- "493 d292e6dae1547d0c1771389fe9454d5\n",
- "494 d29fa8e372d613b21f876bc4e861164\n",
- "495 d2954d2b6a9d4e255f22dd209678\n",
- "496 d29cb4599b081c0fb829e394c3e3b67\n",
- "497 d29b8a9a436ed6e2b57f994238cfc3f\n",
- "498 d29dd04bba37b924be68b994d1ee2e4b\n",
- "499 d293db1c82edad55c8a8299e6b438793\n",
- "500 d29886f24fe094ddb7e46aaf5e0dab6\n",
- "501 d29dc2bde2c4827c70ec2be14799bc\n",
- "502 d29e5578df4d7f59092a02be4bce080\n",
- "503 d29d7a2f8b3f1bba983d6f2966f9e7d\n",
- "504 d29e81db43bf6a7161d2ec361371ff\n",
- "505 d29744b420d887a067757a3545822e\n",
- "506 d2907f4872a14b616ad6559a6bd68414\n",
- "507 d2919bfb4baf1ce16c76b5cffebd4177\n",
- "508 d29dc9ce25a91e984119614aba432934\n",
- "509 d29a39a665d71a1f95bd630a8f7b798\n",
- "510 d29e5e4fe2bb33d13aca84411dfe\n",
- "511 d2923a1af99c914ca2332a25cfc219a\n",
- "512 d293a3fc5bebde784f86994dcaaafcc\n",
- "513 d2937777a67145a58baf14705ad36ba7\n",
- "514 d296453ce16df5aab685a113b28d1a0\n",
- "515 d29e9754b3c64e1a4403898d9c91893\n",
- "516 d292a6e2c0d1928ddb1e773cbfaa4b6\n",
- "517 d2994c52c4139a85acb2643be15ae92d\n",
- "518 d29e843f3a842ff08412ad598a6c8f72\n",
- "519 d29b8be313f882b734a4c15de6578\n",
- "520 d297cc9a5828aaa149722a0a99bd31a\n",
- "521 d29f53d4d383ec62ede38be2624ca47f\n",
- "522 d291a8d61746094db6cfb5a37a5e811\n",
- "523 d293f937fe7825ef9266c34b0ab47e\n",
- "524 d29bc0c163c4dbf2c82b3648cb9ddd\n",
- "525 d29a2340f91e4bb4c3982245f5abe91\n",
- "526 d29e72d22e7849766784a5c1d57e1898\n",
- "527 d299ae8d8e8e5b1479981ba7e3d620d8\n",
- "528 d29fd3352226e4cc839cfb19ff05a61\n",
- "529 d29e74ce55d334f67db6ec34837b33f4\n",
- "530 d29c3d44fb84d8f711b37b742d957b\n",
- "531 d294236f1cd30b04c665afbfefa1a3\n",
- "532 d29ac034f114febf34847edf9d36f5\n",
- "533 d29eb139aeaaf7fa3d1bca4e44ab477a\n",
- "534 d295e75c7ac697329dd61a4ed47b64a5\n",
- "535 d29f1334244f1b3db941e3e410412d18\n",
- "536 d29e897cb34d9749afa9b714d36fef9e\n",
- "537 d29e5aad6acd594189b6cec37ca5b4\n",
- "538 d29925c2742e847a96765883d5df7f6\n",
- "539 d294517196a46fc47b02564e3c521a\n",
- "540 d2924adc342d2534f5eb7c53c110d482\n",
- "541 d292ce95c7f45a3c3493b8e4ba4268f\n",
- "542 d29f5120673264b9e6351343569c8643\n",
- "543 d29da888145eab653f71aaaa3e3b721\n",
- "544 d2915e2dc9768d7c9e293ba8ff78ccef\n",
- "545 d293e1e962479acb87de21b7b4de047\n",
- "546 d2959ed48cb926ba9835f41778c29d6e\n",
- "547 d295281dbf716fd08f070a0629d5da\n",
- "548 d290a6b81821ff7f9ad19afa2fd84ac\n",
- "549 d298531ae57af88b8433a7f282ea2b\n",
- "550 d297f18142c314a35af6c63b14b9f9e\n",
- "551 d2962446fd5abe52567fe06a1b38cce7\n",
- "552 d29a0cac08f5ed2323feda06c8bea28\n",
- "553 d29e9e1e2949c704454b2b6611d6d8c\n",
- "554 d29cd042363ce647ef1b76b1b391fc\n",
- "555 d295873185d9a9c1792b8b59d4bae4\n",
- "556 d29a4bb262d1b0729424e7f1bc7b44b\n",
- "557 d2908589c031ebf2abcd76a75fc11a0\n",
- "558 d2949f30dbbae0207b4a391fe658bb\n",
- "559 d297ebbd26c1ffc19841283c3da33b5\n",
- "560 d29e913733a376e1d68dc1d98937659\n",
- "561 d29d0cdfd7d93dd29577df869a06dd7\n",
- "562 d29af95d81eed483fd7fe25ef7f68a2\n",
- "563 d294afd91d3f92eeac312f73cc12395a\n",
- "564 d2952a96e1218338870efc098f19e\n",
- "565 d29c8e6ecad3413f7cb21c3f1f26e0\n",
- "566 d29a77eec1c5179561cb35204f6bae7e\n",
- "567 d2904a3dc19b4e8ab52f24f0f49ed576\n",
- "568 d292ea27ddc8522669b2aa0ab2a401a\n",
- "569 d29ed6ae6be3816d379b98efc8fb8b3b\n",
- "570 d29b13aaa457d8bacd6806919dfc1e\n",
- "571 d290a2953354a07cc6711c284edd2a88\n",
- "572 d295f3be27879c4aab716ef8d20156b\n",
- "573 d29748307bcfec96dd14fe6ba36abc56\n",
- "574 d295b1b47195707cb92faa7d952627\n",
- "575 d29b1acbe5886cdaccb0bafccb27e024\n",
- "576 d29b44b8afcaefa5769cd3bfdb96997\n",
- "577 d293c0c7285c877e805d50c2196d3c78\n",
- "578 d2997dee4dfcc7bc62bfe5db828c5\n",
- "579 d29399cc8460364389d2cd1a6392e54\n",
- "580 d2944cf6f785198d86773c4b44cc5c0\n",
- "581 d29285ab48f5a6ecd9c6a45b229b335\n",
- "582 d29d8536b0504ab1ac1eed17f1ccaebe\n",
- "583 d2988eddef757af7d9814858e4d980f6\n",
- "584 d296cda8b065f1315df7d284722773f\n",
- "585 d29577d5147cbbca8a97cf13fe1ae27c\n",
- "586 d2927f9825ab537f697784afbdc9bf9\n",
- "587 d297b94ebc79c5d68bd1bf46999cc8\n",
- "588 d2981b64d66b3ddae5b9d547df7f4bc4\n",
- "589 d29b4e8491a1454ac5b4effde5f29\n",
- "590 d294da8d22ec4c14683d3fbcb346298f\n",
- "591 d299e568ccd4bfe473214f116f16cd51\n",
- "592 d29d9be9054b13d6d9391419c09b1d\n",
- "593 d29067138de3e6fc96ac8837bc4d217\n",
- "594 d29d8eced5755ee1574b85cb0dbc390\n",
- "595 d294918dea66792e53ef27496fb42a\n",
- "596 d290c72ed624368549d9da1b296f127\n",
- "597 d29fdc2a87e9197967743cf0afc02b2d\n",
- "598 d29f5e962f897871323880579646c1c\n",
- "599 d29cd4a5555bde50a6c9cc3d6eaa93a\n",
- "600 d29da0cb710d6764f708cb3a16bd058\n",
- "601 d29938dd4780fef53cb4502944c2ae1\n",
- "602 d29dd6fd1e45279df9f83a8d16b481a\n",
- "603 d298a69ab72fc0784b72e43e42acd\n",
- "604 d29dfb75856f9761de28eec281a4412a\n",
- "605 d29f50837e6202bb0ff8db95626eaa3\n",
- "606 d2911cb3cee24be5abb2c7ebd92ea9b\n",
- "607 d29a9927caceae6c14dc6d7ec3eed2a\n",
- "608 d2937bef46c0e329fc3465dbf7d4b3e3\n",
- "609 d29af7e273912a5b4217277c54b8e26\n",
- "610 d2916e2d11436160344632db0dee96\n",
- "611 d29fce919dc47333a5c20f84960403f\n",
- "612 d295dc6f26f8e6379e2de3584d8a5fa5\n",
- "613 d2941541ad8d453ff7ffd6301e4d2818\n",
- "614 d29d6bb724e9eaf716f34c5c9f3690\n",
- "615 d2944b67e4a43a6362214585122ba\n",
- "616 d292493e4de6937f7515a7747e1ecd3d\n",
- "617 d29b656c1314f13cd9761279f1afcaa\n",
- "618 d29036667479f953455c613c75b274a9\n",
- "619 d29181e24020b418ea3ea3b8fc44f4d6\n",
- "620 d29fa49f6edefd91b8e854cc89b1d284\n",
- "621 d291e0cb8aa28d3b11a03e513da6d3\n",
- "622 d2992973f5ad4d39a1f19328663af85\n",
- "623 d297297a667b971328636252dd708\n",
- "624 d2998240a3682259f1f6ed2b36824d58\n",
- "625 d29b375f07fdd1976e6f41bc7d86559\n",
- "626 d29558b35bdcc2453d7a3067adcac60\n",
- "627 d2946353e753729d4a5127ed6b3b898\n",
- "628 d298165c933ec382a997c6c9665824\n",
- "629 d29c273bea347330addd7eb689651f8b\n",
- "630 d295a0f3693929e2c8ba2362f9b0acca\n",
- "631 d2998924cf5a2c1cfdfee129d4f5a46a\n",
- "632 d299d317d99a45faaff48b2772b0f6f6\n",
- "633 d29858f839fa221075a9bb494374bcc\n",
- "634 d29cd7edc74f467aed9f6321528ebdf\n",
- "635 d2992f3279c543fff277528149c59f3\n",
- "636 d29694d25d472eea115117b871659\n",
- "637 d29b51fa7b0b19c8557e94a6687e8a1\n",
- "638 d29015f0e054a3efbb9be424e58dbf4\n",
- "639 d296ed355cbf57ab8b69f99c70f0f3\n",
- "640 d291f57f60fb372b2ff12a8ce8704569\n",
- "641 d2932924f67936b51bb26840a99c6539\n",
- "642 d29899f48469ec2054cdd9a1a2918468\n",
- "643 d2941558d8eb2cd1dd944137bdfb8b29\n",
- "644 d295582dbe4e6a7243d6bc3a2833488d\n",
- "645 d294bd593ed959668e48c92881f73ffc\n",
- "646 d2956ff88a4af4ec6bb1eb08b8df32a\n",
- "647 d291ed4d52f2596f310212beac7bbd\n",
- "648 d294bff890fdb108792581b118c82\n",
- "649 d29e7e09384d5b8ea2e80d12ef553\n",
- "650 d297bf2df519cb82918959c3ccc0cb55\n",
- "651 d29737c8bac680c1a4b820f6e21457\n",
- "652 d29d705d34814746b26010a9cdaafb4e\n",
- "653 d293e464cbd68fcb8343eca51fbf8194\n",
- "654 d29996eb36de8886d68dba475d21c3a3\n",
- "655 d298cc71c18291df579944ee3cd93ac\n",
- "656 d29d81c9f42bfb5f5dd3321c5e9fb671\n",
- "657 d292e52e76e8ba45356e47255cdfe350\n",
- "658 d29b5f55f4116aa6de6f713c9cc2582\n",
- "659 d29ed238e14a28b3f41e33b454abb15f\n",
- "660 d2906b3162ee858fc6add2cb5f4b276\n",
- "661 d29830b5f42099f12c90f830a2ade11b\n",
- "662 d29215bf492daaaf2d54aa8022b6165\n",
- "663 d290668a4ca023fde6853e8b48ec9b4f\n",
- "664 d29ab760cfaf4b8232dd94e8ffe71b25\n",
- "665 d2912760a642287432653899a490a229\n",
- "666 d29d911a3e0b877c95a2962b7377a79\n",
- "667 d291a7e575ad7f83deade677c4f27246\n",
- "668 d29c7f972b1716113edc6a68c64e79\n",
- "669 d29fc373b6c588d3c81fe753f88257ac\n",
- "670 d29769564e8289f43c8ff769359ea9c6\n",
- "671 d29d92153f44fdba926224ac7b8bec3\n",
- "672 d297b8d58478a65c5ca79df8ad88b\n",
- "673 d29351fb392d71e61da1ffac96a29ae\n",
- "674 d29194b067e9dab16f189287ffc779ad\n",
- "675 d299f4c2f6fa7ef665b935f6d3bbd3\n",
- "676 d29af155d2d7c947b5e77397b64a544\n",
- "677 d29b42d6ece2ecac3d784a915b7485ac\n",
- "678 d2982667801c9f5171c6c1f04a797498\n",
- "679 d298e8bff0871d2249bf487971c377ca\n",
- "680 d2979222961dafe3cb68dbf3f0518b\n",
- "681 d29a6752837d58368e53925569908f1c\n",
- "682 d299d0b46a9eb5c41c4469e189a7eed\n",
- "683 d297b34212b5b49e1a1b28a8b3866773\n",
- "684 d293f2acede4ebe470106a623b6a515c\n",
- "685 d29a44a041d991b77794fb01167395\n",
- "686 d2972c64bf3d7131b17912d56db8\n",
- "687 d29e50c918f8b4b4c8bcf34364afbdd8\n",
- "688 d29ea7580511e498046f389bce9d53\n",
- "689 d29025f886bcdc88e3cf3795c96262\n",
- "690 d29ffc11409938bbb8eb13bde312d777\n",
- "691 d29ee0be2178e838fa8ea3861d9b7172\n",
- "692 d2904623523d8119d2b1a28364cedfc9\n",
- "693 d296e64238df7f7f47a2d664d34ed85\n",
- "694 d29b441881275c44f3c2c62e8e78c\n",
- "695 d29cdbb47a6b9db377c38f6666cf526\n",
- "696 d294e9faa752812dfbfd612fe6e8f3\n",
- "697 d29637c1fb7be3ba592c873ccfb3fac\n",
- "698 d296164d9647a69fc2a22855ea2207b\n",
- "699 d292bbd49cfbc285ae38abc7f581f37e\n",
- "700 d29b8659d7fcc6bfa6ab48a7eee32e96\n",
- "701 d297388166d5f101c5c3496d223c48b\n",
- "702 d291ceb7ef4442ca3ff9f6437c2460\n",
- "703 d29accbd24c24f7621e05afcb067e\n",
- "704 d29da88fc29a3bca4dcfdc190f9dff\n",
- "705 d2987a60afd375f03612b54f3b9cd93\n",
- "706 d29517c88f943233dfbb38b5bfe7245\n",
- "707 d296e19f3d1bc89a4f5aa0e55bf190ae\n",
- "708 d29a651edf99216156e1b366ad3f9469\n",
- "709 d297de9d0b1b1a58fb375c5ce4a1f6\n",
- "710 d29191dc4ad5586eb86162e8c6ffa8a6\n",
- "711 d2926d26636aa631e6ca74ff32ec99a\n",
- "712 d29a13c54979d5667bd0a06c231587cb\n",
- "713 d294ad142f70de4eee6ef9aabd051f7\n",
- "714 d29e758920ad853db60a175dbf16\n",
- "715 d2941b4a896ac7205845efc1e3d311f4\n",
- "716 d2979692498dda6be501a2c19abbec1\n",
- "717 d29035fa19ce8d2db8209c6dd9cf18c\n",
- "718 d297902857ecceb79df42eb2b8286830\n",
- "719 d29ebd7d6556655ba798a563c3c81b4\n",
- "720 d2996ccbc7aef032ddc9c40c9f4e2\n",
- "721 d29f3bf78d3e4e659e8c65b2a06063ef\n",
- "722 d2973089b678f4f783f7687fdfd3c413\n",
- "723 d29e239ea96cdd0f59bc7e3d299a723\n",
- "724 d29e50b79ce062e5e8bc6c1b2622ed1d\n",
- "725 d297a8f1a68efcf7f6a3747f9b99bc16\n",
- "726 d29463a2ef7ee7ab996621fff62cce4\n",
- "727 d29977ed567f23fe5d5b13f5186ea89\n",
- "728 d2985efb90f03916d7d36c121cec28\n",
- "729 d29842dcb0b1fccd70c834c2e92e5a1a\n",
- "730 d290aa2a5091fac7d791ed22707cdcac\n",
- "731 d29e107f6963ad47284ed6a47366a3f\n",
- "732 d29711f5d625e953559d1a6b33462bd4\n",
- "733 d2955ca3d3d6f6cefdf20b350ebb9c7\n",
- "734 d29c3a4c14fc71c7d050fd293c2737\n",
- "735 d2955b7c292299d843e3a2bd7a631816\n",
- "736 d29b3807fcbecdbc37d7a11421a612e\n",
- "737 d29cec30dfa4a2bb22b2785522d8bac\n",
- "738 d29afc07a96bbdc641abdbc71862f7\n",
- "739 d298116eb1cdf4135a5761a4774c5e59\n",
- "740 d2905e5f8949982586204e633e322fe\n",
- "741 d299fe3aae57bbc76f66867395063\n",
- "742 d29d987f35742fa36bc11ea02ef6c395\n",
- "743 d296257f78985bee2f732d4bdcd9787\n",
- "744 d29aa8dcf7d7bbb373adf71c8f05418\n",
- "745 d29e4124dacad46ca69bb3952f1654\n",
- "746 d299bfd25f0b38f9e9b1cb076999968\n",
- "747 d29cb18dc082b9abf777775c244bcb\n",
- "748 d295ea8e422a5599c2e7865ed482eb5\n",
- "749 d293878ad47054666535fbc2bb4783d\n",
- "750 d29c78cf63ccaa475b9be293c50dd48\n",
- "751 d292bdf79b415e4fdc426be3a29eb62\n",
- "752 d293c4bad6e52c345a5c6a37168672\n",
- "753 d297224294f55bc425603b6e261e88c8\n",
- "754 d2924655f5f018d7f2a153691444f1bc\n",
- "755 d292eea19488127d5c4247ff3832c7\n",
- "756 d29ee24ef7d324f6c94df7ac4f534b3\n",
- "757 d29ac7c01ebbb7d56b12929253a2e711\n",
- "758 d2925f53be7ac95079be431cfb3393e\n",
- "759 d29ad94d7541ad3f4569386c1311570\n",
- "760 d2982ce8d6fd3d7891d7a5244d6e46\n",
- "761 d298dc1f3a5423b0c848d648f3578de\n",
- "762 d29866f8452bc5e5a93495d309824f6\n",
- "763 d29237ee31a09c1143df8ad7e5edd7e\n",
- "764 d29f1036bfa749f2844986e4ec2b08f\n",
- "765 d290c145d34ccf80b624421a3a9862c\n",
- "766 d290c5efc4398d2f99b8806260b3bd5\n",
- "767 d29bce9d439282122cacfd9a3994ee6\n",
- "768 d29487ffb1f785b1dec81bcb1644a5\n",
- "769 d294c93212c56d4c638e1015c7a4cd\n",
- "770 d29ca0b321bf709c9314bf546435a014\n",
- "771 d2944be7a48692e91c12e44924e8c378\n",
- "772 d29599c0222fe18127894979b2d9e195\n",
- "773 d291b5269b7e2be302bfc2e35cd9493\n",
- "774 d29ad7050d32f987bd1ec29956537e\n",
- "775 d29e847361db8a6d95dd31e02656adf3\n",
- "776 d2938c8eb48754392aa2979628517cf8\n",
- "777 d2985b3835dd12d38b4ee2fe44f5d51\n",
- "778 d292bc4f15b8a333420c036e552dbef\n",
- "779 d29fd3b52d428dfcbabc1e5dce0b218\n",
- "780 d2925265e9c682772c6fcc801fd567ed\n",
- "781 d29385b2d29b13dc8531bc0e3ae3c3\n",
- "782 d29862aaf54f52438142c03ec5fd9\n",
- "783 d29517565aef432bca2c56111eb6bb8\n",
- "784 d295e1fea5f25c2ae638080cd4fd986\n",
- "785 d29d7de1daa871be5dcea8b895860c9\n",
- "786 d29b2dfc8c1aa0b5d35e12298ece91db\n",
- "787 d29dd7cf86216b8880f4bcac6c5ac89e\n",
- "788 d296ea2bba6573f1a42eb1f4b861ee\n",
- "789 d29db474e03b44f26b1918293eb880ab\n",
- "790 d29c5bba5ada4ff67ea059ec58bec4ff\n",
- "791 d29f8eded8bd5e5b39384682b2de5\n",
- "792 d2983d5eb4a476b5d821989ac7f7c99\n",
- "793 d29cee44da3e77889c2fab8a3145aab9\n",
- "794 d29ba9669bc9321fecefc1d11f498d2\n",
- "795 d29825d027a364754199952ca1334046\n",
- "796 d29aad8d9afcaf7e6ed0fae648833447\n",
- "797 d2975510d6eefcc0eda919f86f148b1c\n",
- "798 d291a0ce1a15ea52f6eeaed1edf58dce\n",
- "799 d2904816b05dc67c8537bb6582de982f\n",
- "800 d29fab6e11338ab587f16136cf9444\n",
- "801 d296acd93e6038c342a36b18a7909fd0\n",
- "802 d2982cd07bbc8e64c6e135232b8b6925\n",
- "803 d299dd6b21d0c6f1cbd19185d579dc\n",
- "804 d29544126d3e23f648bd112ef6a61ce\n",
- "805 d29826cc8d2dd83b92e86337d0c031c\n",
- "806 d29dd510e5e7614b5ca4379d6ed707\n",
- "807 d29ad167c0ce4337b9589eafe03494af\n",
- "808 d29960b34deb26acde68498c9f6f9cf\n",
- "809 d299c3dd50eff7982fcfb87421d424f\n",
- "810 d292f74f748e4d1973c5b9515e96f\n",
- "811 d29aab4e896a43874eb098ff95b7d359\n",
- "812 d29750922a9834d5a9dd10bb748832b\n",
- "813 d29fa86517414edfbdf9a96e18ac67\n",
- "814 d29aa42bf76a9f2cd1c51157b2368a2\n",
- "815 d29f267452f46384c1eee391630f\n",
- "816 d29ff196e725e8639fbc3a09899e8\n",
- "817 d296c63571356a27fbdf0afd31afb\n",
- "818 d29be82641878d0b8f02762222f8256\n",
- "819 d29f3c32b0e180c8636dbe96571c47ca\n",
- "820 d29c3829d6808ff566be34ff1f3f3a21\n",
- "821 d2966dfabc587217328be7627c64a4a\n",
- "822 d299e265e3e3e6535a294dc4adeddf\n",
- "823 d29e7ba26ed08b7b5499d2853bbe1440\n",
- "824 d2943428447dafe9c198f43443dec1\n",
- "825 d29aea1d82818daffabb7f916ca6e80\n",
- "826 d294b396de33e8ec9e19064faffa6c7\n",
- "827 d292357ac7c73ffe69394836cbe6c81\n",
- "828 d2946d777ace8d523759eed85c97d\n",
- "829 d2993a76ad5a82137a158a3cd69a3498\n",
- "830 d29ced67ec73d4466990b2a0c39dc952\n",
- "831 d298d6a3641f7a81d7e810f3178c3ffb\n",
- "832 d29c3acabee676411107c46199976a\n",
- "833 d292f399f7e6c1748a6697fbdb8a3b0\n",
- "834 d299cd99a36be18b25f23d3578a819cb\n",
- "835 d293983a451ca4bb7ded3ec6ee2e934\n",
- "836 d29a176ba9b216e746d4fcd147dd01e\n",
- "837 d29c48fc5e615a222bf8f16cc683d217\n",
- "838 d29f3f15a8513c1281e157c6e990d5bc\n",
- "839 d299e63ed9cbd0dd4279a0c1b8406f2\n",
- "840 d2913834d6c820d0cae85aab464764d\n",
- "841 d29528f1fba2a9365522f0528dd69c4\n",
- "842 d296d5361b93faa231da4da432b8720\n",
- "843 d29ebee84c287cd5ed27dfbe37df79a\n",
- "844 d29cd3f3444df8673be61bd7499332c\n",
- "845 d290855d546a7a55da3548fe7deb5c2\n",
- "846 d29425170a41b4b75177787b5c65ab\n",
- "847 d294fb4a65a5d037676413a1d95cee12\n",
- "848 d297cd83a3c4d24264d8fad18c4ab9\n",
- "849 d29c1ee453c62685c16851bb257f1b7\n",
- "850 d29da361eded18e5845516efa1c146\n",
- "851 d29316f1b419cca78b8eb21c91d094f2\n",
- "852 d2907ecfc6276dca050fc71a479048\n",
- "853 d29e9a4d1b55913b632a057c282add\n",
- "854 d29d5849151bfa133bacad8ab196d7\n",
- "855 d29f52a596a66fd22c99f7292fcbd4\n",
- "856 d2995314458dc296fa8a50d578907f63\n",
- "857 d299729169810aee8cf863b98885472\n",
- "858 d29098775cb7d920da1dd2d5d444df1e\n",
- "859 d29dc3f4bf446d5bff72ccb27c631\n",
- "860 d29ddc6413cd9fdaa9b97bfb38697166\n",
- "861 d29a189d80fb1b8d64b2b92d6e25576\n",
- "862 d297d4df9a1ec83dbf159293653ff71\n",
- "863 d290b698b6489f39e8f2ad3ce4261594\n",
- "864 d29fa9ad34a40779b3736a71e3e8956\n",
- "865 d295d0175728998b8560507b74416d81\n",
- "866 d295b028a2ba30598e75b46cc28feafb\n",
- "867 d29a33b2188451fca68da55e201b57d3\n",
- "868 d29273375d45edaa581a111ce22e5482\n",
- "869 d29307629e27a419c4c9459a918ff6f\n",
- "870 d296de62b4fa21342a695aebf897b4f\n",
- "871 d29691391d561ed43b68c891792e946\n",
- "872 d29e81413ac99fb57626e48278e783\n",
- "873 d2925c82aa7fd47f6ea9e20f5715719\n",
- "874 d29f6adb7938f5a11ffcd43fa17be269\n",
- "875 d2956a499e1e38b4538a34c88af8438\n",
- "876 d2941bd03c33d2bb4921405e52677319\n",
- "877 d294252778388bab43954be9d64bf111\n",
- "878 d29fd97555f88c8f56515d5ef6aa35a\n",
- "879 d2966cf6fc7be1abf528520b894d077\n",
- "880 d29ee684b54476792bddd1dcf5455a5d\n",
- "881 d29be915e4b91c211a954dea9af625\n",
- "882 d297fd4966bb91d5809164c3eed20f3\n",
- "883 d299c65b6d9e92f0e34f3ca5452f2142\n",
- "884 d297bce473a261d5a8fdc04b62a9b9a\n",
- "885 d29ebb6fb2285a61486c87fff8d38e\n",
- "886 d29f118564ee222d29141c5670a3f3cc\n",
- "887 d29152bcecd9b6abd8cc15c33d28948\n",
- "888 d29b5b2817bf309dcf124afcc41096\n",
- "889 d298ebd5120ab3cf7a4184a43c5cf\n",
- "890 d29c328293364cd58d99c33f46a7c1\n",
- "891 d297c62a9eb99ba18b46df6c43ef0e5\n",
- "892 d296d4312ccba6f29387ac3a67a0ce\n",
- "893 d2972baeb06f9d30176a6a2b8f5c535\n",
- "894 d29cb9ffbaa7fca2aa38c5aa0050a8\n",
- "895 d2941e7858af7221c16d1bfe1792b3a\n",
- "896 d29ed990f5cbb0eaeb1cdd1645e216cb\n",
- "897 d29f8616ffa1c0e16437ecda5833578e\n",
- "898 d29f264016fb567fc523f16c99dd63d\n",
- "899 d297f883cdb8fba51734bfcb78ac\n",
- "900 d295f5e269934ac4bf355afa329b0ac\n",
- "901 d2961171e684b9d624ecf5439932a537\n",
- "902 d291c634c899a6c3fe5f28c54680f4ac\n",
- "903 d29cd457bd71c490bb53521b1730eb2c\n",
- "904 d292daf9834286ad9850fc67f4c3d69\n",
- "905 d29effc9b2125ce69c928f41dffefb3\n",
- "906 d298d24472564871585443c2ba9f6625\n",
- "907 d29d42f9314878c6bb302c1a73d6f1\n",
- "908 d296bcc710b43b719bcdd93e2cdaf29\n",
- "909 d29825d81e57c0c317ae93b5dbe78\n",
- "910 d29b39adc7a9e4ef33cbd8a6ef32879\n",
- "911 d298f52822e8a1b966f359eef53869ed\n",
- "912 d2909f79c3d51e9b8b41366d851791\n",
- "913 d29f4b4ed084c42652b62b0b6182269\n",
- "914 d2952bad222361a6263b53cef5c08fd7\n",
- "915 d29e95a3a24e6f548dc5bc66e9534ef9\n",
- "916 d2983e18a5eb44da8b9cd2955d2598\n",
- "917 d292380b6f7a4791e6829215b4df483\n",
- "918 d2966824b928a21da99f327dcc25b2c\n",
- "919 d29690a89ccff87641923adb266ace\n",
- "920 d297d320fc6e1221f2939dead1829f1\n",
- "921 d297dac5dd61868c16393413e9df419\n",
- "922 d291f0fce6e2b09f27c637d1def6fda0\n",
- "923 d2937ba495db231c9f863bdd5e2efc2\n",
- "924 d2945f89f43315f3fcee9ccc5f14fde\n",
- "925 d294d41c45ea9fd8cf1df22214f7f65\n",
- "926 d2923eda9fd98f1fa0fdc85c2c6a8f58\n",
- "927 d2919a159571de2c8ae87fcee7f72\n",
- "928 d294bd134345a46391dcec1cd27248fd\n",
- "929 d299b714d28830663458662681e041e4\n",
- "930 d2994917ae56468019ace55110693b\n",
- "931 d29638364c5166dc5ce5040424db5\n",
- "932 d297a7b0a91ff4e9d999dfad446501d\n",
- "933 d29883a44e13226a369554c0f826474\n",
- "934 d291879da81e887f31e11fe0c54b69ed\n",
- "935 d290fd3d51f8d62324b0338a84278ba8\n",
- "936 d29465e1fe608a4bdd4b3cba5f985129\n",
- "937 d293d623b63e47b96e812ac2fe5565f\n",
- "938 d29fffdf16211b8d5aa41487a8daa5ca\n",
- "939 d299fc7fb7f458ec1b976a5a52b8b04a\n",
- "940 d296a995f653a0335e447e0f9f8804c\n",
- "941 d296f252693c6130da6fbaadc08469\n",
- "942 d29cc9dcde13c9371a28cc1bf9836e3c\n",
- "943 d295918d4f51d352b3c83bdf3d16f861\n",
- "944 d29832ee32acfc4c7b56c4d1eed42\n",
- "945 d296ef3360d4f5ddfbd530d479d2992c\n",
- "946 d2965113b74b1a9ec3cbc33602811e9\n",
- "947 d2956451b5c77299969f87aea3621e3\n",
- "948 d29ab427ff507dbbe13ae25ebbbace6f\n",
- "949 d29a5ba29763bc916b853c15293689f\n",
- "950 d2927f7a6056ab6be96cd0812640ce\n",
- "951 d29ac16ee01e78164acdd4e9ae56b65c\n",
- "952 d298f1ab24787baabadc2c79489857b\n",
- "953 d2934db68cdb24285a4bfe4c45de83\n",
- "954 d296a2c4fd479d35942e20779121cd2b\n",
- "955 d292aedad670eb23c0de67d754c9f\n",
- "956 d292f67c97843c616fe91b24b833e81\n",
- "957 d294b46b302a24644766c7449594721f\n",
- "958 d292961146b9cbbb547223db2a8a9\n",
- "959 d296a012631260f8f4d62a553b79b2d9\n",
- "960 d296fe9aac4d48e7bf61db9aac5bcb8\n",
- "961 d29c64939a3116d25d2baea9fa5ca2\n",
- "962 d2921bc19d4534ab7fa7a85bf67e1faa\n",
- "963 d29e44e97f49146198417e4ab07cf7e6\n",
- "964 d29e7f55fdb62ca7f29191e6f3551ebb\n",
- "965 d294807c2d6877a01b863757ccbf\n",
- "966 d29399f926878adeae85b9126c9c545\n",
- "967 d295684772ee4705d79a7ecfa44572\n",
- "968 d299e639d6e22972f6789e1f7613dee2\n",
- "969 d2955e19f597df6c42b37859b59b4a\n",
- "970 d295648026dce77c96bb4f94cb1b6ae\n",
- "971 d296b192e72f956789e68dd798faecd\n",
- "972 d2927984b7b4badce29cbef261244\n",
- "973 d2981e54d04b40b869399c3ae30dea3\n",
- "974 d29ace284cb77abebfe84a87eace985\n",
- "975 d29f28f637ff8952889657bebddfed5\n",
- "976 d292e945bbd333b72c4951321587958d\n",
- "977 d29b28c6e5e48c4d898cb786c3ddc\n",
- "978 d2919df6a0b0c198a55db2b82c9e8a\n",
- "979 d29d73b4807db874afb1951d5c6fe58\n",
- "980 d2998145f1a42e419e9c669f3ce36f5\n",
- "981 d2967bcc651b29e9e7bd65fab12d5a3\n",
- "982 d291736293c558225a0cebe457a6f2\n",
- "983 d29e9483c1c73bda7d7d74e869b4e7e\n",
- "984 d299d5f6b506c6236dc858da34f1cc\n",
- "985 d2913ad1734310694a6c2c35a1c569e8\n",
- "986 d294bdca75f6d53d497559412a7a3d\n",
- "987 d29aecc65b7df1f508c83df595ff4e\n",
- "988 d29cda9cb047b6bdbcd4d3b50feec7e\n",
- "989 d29739396b17f9e255c7726de428c5f\n",
- "990 d29b475454526ecffec9fefcf8f01c8e\n",
- "991 d29667e51ed875183825ab53d44fa70\n",
- "992 d297e8ed757593d67a2771257a27be4\n",
- "993 d295c322fc9ee4dca758544c942f2d53\n",
- "994 d298372c48d5c8aaa16ee2f3a5a5380\n",
- "995 d2946559a807388662cd0308ad666dd\n",
- "996 d29dcc2038b89c365b3aba17f94bf52\n",
- "997 d29fcaee2537fda115ad172ed10778\n",
- "998 d29ca7d044203e0242084cb958ef464\n",
- "999 d299349d8bd55ccae1dcea12b2b7ca73\n",
- "returning 0\n"
- ]
- },
- {
- "ename": "IndexError",
- "evalue": "index out of bounds",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/tmp/ipykernel_617634/3764770081.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m filtered_dataset = dataset.map(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mselect_existing_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mremove_columns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumn_names\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mbatched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mnum_proc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m 1655\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1656\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnum_proc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mnum_proc\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1657\u001b[0;31m return self._map_single(\n\u001b[0m\u001b[1;32m 1658\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0mwith_indices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwith_indices\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 183\u001b[0m }\n\u001b[1;32m 184\u001b[0m \u001b[0;31m# apply actual function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mout\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Dataset\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"DatasetDict\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Dataset\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;31m# re-apply format to the output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/code/hf_jax/datasets/src/datasets/fingerprint.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 395\u001b[0m \u001b[0;31m# Call actual function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 397\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[0;31m# Update fingerprint of in-place transforms + update in-place history of transforms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36m_map_single\u001b[0;34m(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset, desc)\u001b[0m\n\u001b[1;32m 2022\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2023\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcast_to_python_objects\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2024\u001b[0;31m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2025\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mupdate_data\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mwriter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2026\u001b[0m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# close_stream=bool(buf_writer is None)) # We only close if we are writing in a file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_writer.py\u001b[0m in \u001b[0;36mwrite_batch\u001b[0;34m(self, batch_examples, writer_batch_size)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0mtyped_sequence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOptimizedTypedSequence\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_examples\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtry_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol_try_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0mtyped_sequence_examples\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtyped_sequence\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m \u001b[0mpa_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pydict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtyped_sequence_examples\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwriter_batch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/table.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.Table.from_pydict\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.asarray\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.array\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib._handle_arrow_array_protocol\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_writer.py\u001b[0m in \u001b[0;36m__arrow_array__\u001b[0;34m(self, type)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mtrying_type\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m raise TypeError(\n\u001b[1;32m 102\u001b[0m \u001b[0;34m\"Specified try_type alters data. Please check that the type/feature that you provided match the type/features of the data.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.Array.__getitem__\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib._normalize_index\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;31mIndexError\u001b[0m: index out of bounds"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "filtered_dataset = dataset.map(\n",
- " select_existing_rows,\n",
- " remove_columns = dataset.column_names,\n",
- " batched = True,\n",
- " num_proc = 1,\n",
- " desc = \"Selecting rows with images that exist\"\n",
- ")"
+ "# This cell does it all, grabs each chunk, cleans it up based on image existing condition, etc.\n",
+ "global_df = pd.DataFrame()\n",
+ "chunks_dir = \"./chunks\"\n",
+ "for filename in os.listdir(chunks_dir):\n",
+ " df = pd.read_csv(f\"./chunks/{str(filename)}\", sep=\"\\t\")[[\"key\", \"title_clean\", \"description_clean\", \"ext\"]]\n",
+ " df['caption'] = df[\"title_clean\"]+\". \"+df['description_clean']\n",
+ " df['is_exist'] = df.apply(image_exists, axis=1)\n",
+ " df = df.dropna()[[\"key\", \"caption\"]]\n",
+ " df.columns = ['image_file', 'caption']\n",
+ " global_df = global_df.append(df, ignore_index=True)"
]
},
{
"cell_type": "code",
- "execution_count": 109,
- "id": "7060ff8f",
+ "execution_count": 89,
+ "id": "45024fdc",
"metadata": {},
"outputs": [],
"source": [
- "# df['image_exists'] = df.apply(lambda row: image_exists(row['key']), axis=1)"
+ "# saving the tsv to disk\n",
+ "global_df.to_csv('./chunks/YFCC_subset_clean.tsv', sep=\"\\t\", index=False)"
]
},
{
"cell_type": "code",
- "execution_count": 113,
- "id": "fecc9a00",
+ "execution_count": 101,
+ "id": "dca4eb73",
"metadata": {},
"outputs": [],
"source": [
- "image_size = 256\n",
- "def image_transform(image):\n",
- " s = min(image.size)\n",
- " r = image_size / s\n",
- " s = (round(r * image.size[1]), round(r * image.size[0]))\n",
- " image = TF.resize(image, s, interpolation=InterpolationMode.LANCZOS)\n",
- " image = TF.center_crop(image, output_size = 2 * [image_size])\n",
- " image = torch.unsqueeze(T.ToTensor()(image), 0)\n",
- " image = image.permute(0, 2, 3, 1).numpy()\n",
- " return image"
+ "# loading the tsv from disk (for explicitness, also my electricity was gone, glad it happened after I saved to the disk :( )\n",
+ "\n",
+ "dataset = pd.read_csv(f\"./chunks/YFCC_subset_clean.tsv\", sep=\"\\t\")"
]
},
{
"cell_type": "code",
- "execution_count": 98,
- "id": "1a065700",
+ "execution_count": 153,
+ "id": "a511264a",
"metadata": {},
"outputs": [],
"source": [
- "class YFC100Dataset(Dataset):\n",
- " def __init__(self, image_list_path: str, images_root: str, image_size: int, max_items=None):\n",
+ "\"\"\"\n",
+ "Luke Melas-Kyriazi's dataset.py's modified version for YFCC\n",
+ "\"\"\"\n",
+ "import warnings\n",
+ "from typing import Optional, Callable\n",
+ "from pathlib import Path\n",
+ "import numpy as np\n",
+ "import torch\n",
+ "import pandas as pd\n",
+ "from torch.utils.data import Dataset\n",
+ "from torchvision.datasets.folder import default_loader\n",
+ "from PIL import ImageFile\n",
+ "from PIL.Image import DecompressionBombWarning\n",
+ "ImageFile.LOAD_TRUNCATED_IMAGES = True\n",
+ "warnings.filterwarnings(\"ignore\", category=UserWarning)\n",
+ "warnings.filterwarnings(\"ignore\", category=DecompressionBombWarning)\n",
+ "\n",
+ "\n",
+ "class CaptionDataset(Dataset):\n",
+ " \"\"\"\n",
+ " A PyTorch Dataset class for (image, texts) tasks. Note that this dataset \n",
+ " returns the raw text rather than tokens. This is done on purpose, because\n",
+ " it's easy to tokenize a batch of text after loading it from this dataset.\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, *, images_root: str, captions_path: str, text_transform: Optional[Callable] = None, \n",
+ " image_transform: Optional[Callable] = None, image_transform_type: str = 'torchvision',\n",
+ " include_captions: bool = True):\n",
" \"\"\"\n",
- " :param image_list_path: Path to a file containing a list of all images, in jsonl format.\n",
- " :param images_root: Root directory containing the images\n",
- " :param image_size: Image size. Source images will be resized and center-cropped.\n",
- " :max_items: Limit dataset size for debugging\n",
+ " :param images_root: folder where images are stored\n",
+ " :param captions_path: path to csv that maps image filenames to captions\n",
+ " :param image_transform: image transform pipeline\n",
+ " :param text_transform: image transform pipeline\n",
+ " :param image_transform_type: image transform type, either `torchvision` or `albumentations`\n",
+ " :param include_captions: Returns a dictionary with `image`, `text` if `true`; otherwise returns just the images.\n",
" \"\"\"\n",
- " self.image_list = pd.read_json(image_list_path, orient=\"records\", lines=True)\n",
+ "\n",
+ " # Base path for images\n",
" self.images_root = Path(images_root)\n",
- " if max_items is not None: self.image_list = self.image_list[:max_items]\n",
- " self.image_size = image_size\n",
- " \n",
- " def __len__(self):\n",
- " return len(self.image_list)\n",
+ "\n",
+ " # Load captions as DataFrame\n",
+ " self.captions = pd.read_csv(f\"./chunks/YFCC_subset_clean.tsv\", sep=\"\\t\")\n",
+ " self.captions['image_file'] = self.captions['image_file'].astype(str)\n",
+ "\n",
+ " # PyTorch transformation pipeline for the image (normalizing, etc.)\n",
+ " self.text_transform = text_transform\n",
+ " self.image_transform = image_transform\n",
+ " self.image_transform_type = image_transform_type.lower()\n",
+ " assert self.image_transform_type in ['torchvision', 'albumentations']\n",
+ "\n",
+ " # Total number of datapoints\n",
+ " self.size = len(self.captions)\n",
+ "\n",
+ " # Return image+captions or just images\n",
+ " self.include_captions = include_captions\n",
" \n",
+ " def image_exists(item):\n",
+ " name, caption = item\n",
+ " root=str(self.images_root)\n",
+ " image_path = (Path(root)/name[0:3]/name[3:6]/name).with_suffix(\".jpg\")\n",
+ "\n",
+ " return image_path.exists()\n",
+ "\n",
+ " def verify_that_all_images_exist(self):\n",
+ " for image_file in self.captions['image_file']:\n",
+ " if not image_exists:\n",
+ " print(f'file does not exist: {p}')\n",
+ "\n",
" def _get_raw_image(self, i):\n",
- " image_name = self.image_list.iloc[0].key\n",
- " image_path = (self.images_root/image_name[0:3]/image_name[3:6]/image_name).with_suffix('.jpg')\n",
- " return default_loader(image_path) if image_path.exists() else None\n",
+ " name = self.captions.iloc[i]['image_file']\n",
+ " image_path = (Path(self.images_root)/name[0:3]/name[3:6]/name).with_suffix(\".jpg\")\n",
+ " image = default_loader(image_path)\n",
+ " return image\n",
+ "\n",
+ " def _get_raw_text(self, i):\n",
+ " return self.captions.iloc[i]['caption']\n",
+ "\n",
+ " def __getitem__(self, i):\n",
+ " image = self._get_raw_image(i)\n",
+ " caption = self._get_raw_text(i)\n",
+ " if self.image_transform is not None:\n",
+ " if self.image_transform_type == 'torchvision':\n",
+ " image = self.image_transform(image)\n",
+ " elif self.image_transform_type == 'albumentations':\n",
+ " image = self.image_transform(image=np.array(image))['image']\n",
+ " else:\n",
+ " raise NotImplementedError(f\"{self.image_transform_type=}\")\n",
+ " return {'image': image, 'text': caption} if self.include_captions else image\n",
+ "\n",
+ " def __len__(self):\n",
+ " return self.size\n",
+ "\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " import albumentations as A\n",
+ " from albumentations.pytorch import ToTensorV2\n",
+ " from transformers import AutoTokenizer\n",
+ " \n",
+ "\n",
+ " images_root = \"/home/khali/TPU-Test/YFCC100M_OpenAI_subset/data/data/images\"\n",
+ " captions_path = './YFCC_subset_clean.tsv'\n",
+ " image_size = 256\n",
" \n",
- " # TODO: we could maybe use jax resizing / scaling functions\n",
- " def resize_image(self, image):\n",
+ " # Create transforms\n",
+ " def image_transform(image):\n",
" s = min(image.size)\n",
- " r = self.image_size / s\n",
+ " r = image_size / s\n",
" s = (round(r * image.size[1]), round(r * image.size[0]))\n",
" image = TF.resize(image, s, interpolation=InterpolationMode.LANCZOS)\n",
- " image = TF.center_crop(image, output_size = 2 * [self.image_size])\n",
+ " image = TF.center_crop(image, output_size = 2 * [image_size])\n",
" image = torch.unsqueeze(T.ToTensor()(image), 0)\n",
" image = image.permute(0, 2, 3, 1).numpy()\n",
" return image\n",
" \n",
- " def _get_caption(self, i):\n",
- " # We are currently appending title and caption. Should we use another separator?\n",
- " row = self.image_list.iloc[i]\n",
- " return ' '.join(row.title_clean, row.description_clean)\n",
- " \n",
- " def __getitem__(self, i):\n",
- " image = self._get_raw_image(i)\n",
- " if image is None: return None\n",
- " image = self.resize_image(image)\n",
- " caption = self._get_caption(i)\n",
- " return {'image': image, 'text': caption}"
+ " # Create dataset\n",
+ " dataset = CaptionDataset(\n",
+ " images_root=images_root,\n",
+ " captions_path=captions_path,\n",
+ " image_transform=image_transform,\n",
+ " image_transform_type='torchvision',\n",
+ " include_captions=False\n",
+ " )"
]
},
{
"cell_type": "code",
- "execution_count": 99,
- "id": "4ce2211f",
- "metadata": {},
- "outputs": [],
- "source": [
- "dataset = YFC100Dataset(\n",
- " image_list_path = yfc100m_metadata,\n",
- " images_root = yfc100m_images,\n",
- " image_size = 256,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 100,
+ "execution_count": 155,
"id": "cc922704",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "5000"
+ "2483316"
]
},
- "execution_count": 100,
+ "execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
@@ -1416,7 +965,7 @@
},
{
"cell_type": "code",
- "execution_count": 102,
+ "execution_count": 156,
"id": "6e47ba46",
"metadata": {},
"outputs": [],
@@ -1426,30 +975,29 @@
},
{
"cell_type": "code",
- "execution_count": 103,
+ "execution_count": 1,
"id": "c8a130eb",
"metadata": {},
- "outputs": [
- {
- "ename": "TypeError",
- "evalue": "Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found \n",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/tmp/ipykernel_320049/1409168804.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataloader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sampler_iter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_yielded\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_kind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_DatasetKind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1201\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1202\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_task_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1203\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_process_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_process_data\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 1227\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1228\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExceptionWrapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1229\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1230\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/_utils.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0;31m# have message field\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 425\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mTypeError\u001b[0m: Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
+ "# looking at a batch\n",
"next(iter(dataloader))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c192fd44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import matplotlib.pyplot as plt\n",
+ "# for tensor_image, _ in dataloader:\n",
+ "# print(tensor_image)\n",
+ "# plt.imshow(tensor_image.permute(1, 2, 0))\n",
+ "# break"
+ ]
+ },
{
"cell_type": "markdown",
"id": "62ad01c3",
@@ -1460,23 +1008,20 @@
},
{
"cell_type": "code",
- "execution_count": 89,
+ "execution_count": 158,
"id": "88f36d0b",
"metadata": {},
"outputs": [],
"source": [
"def encode(model, batch):\n",
- " print(\"jitting encode function\")\n",
- "# _, indices = model.encode(batch)\n",
- "\n",
- " # The model does not run in my computer (no cudNN currently installed) - faking it\n",
- " indices = [random.randint(0, 16384) for _ in range(256)]\n",
+ "# print(\"jitting encode function\")\n",
+ " _, indices = model.encode(batch)\n",
" return indices"
]
},
{
"cell_type": "code",
- "execution_count": 90,
+ "execution_count": 160,
"id": "1f35f0cb",
"metadata": {},
"outputs": [],
@@ -1501,20 +1046,19 @@
},
{
"cell_type": "code",
- "execution_count": 93,
+ "execution_count": 170,
"id": "2210705b",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
- "import jax\n",
"\n",
- "def encode_captioned_dataset(dataset, output_jsonl, batch_size=32, num_workers=16):\n",
- " if os.path.isfile(output_jsonl):\n",
- " print(f\"Destination file {output_jsonl} already exists, please move away.\")\n",
+ "def encode_captioned_dataset(dataset, output_tsv, batch_size=32, num_workers=16):\n",
+ " if os.path.isfile(output_tsv):\n",
+ " print(f\"Destination file {output_tsv} already exists, please move away.\")\n",
" return\n",
" \n",
- " num_tpus = jax.device_count()\n",
+ " num_tpus = 8 \n",
" dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)\n",
" superbatches = superbatch_generator(dataloader, num_tpus=num_tpus)\n",
" \n",
@@ -1522,7 +1066,7 @@
"\n",
" # We save each superbatch to avoid reallocation of buffers as we process them.\n",
" # We keep the file open to prevent excessive file seeks.\n",
- " with open(output_jsonl, \"w\") as file:\n",
+ " with open(output_tsv, \"w\") as file:\n",
" iterations = len(dataset) // (batch_size * num_tpus)\n",
" for n in tqdm(range(iterations)):\n",
" superbatch = next(superbatches)\n",
@@ -1536,14 +1080,12 @@
" captions = dataset.captions[\"caption\"][start_index:end_index].values\n",
" encoded_as_string = list(map(lambda item: np.array2string(item, separator=',', max_line_width=50000, formatter={'int':lambda x: str(x)}), encoded))\n",
" batch_df = pd.DataFrame.from_dict({\"image_file\": paths, \"caption\": captions, \"encoding\": encoded_as_string})\n",
- " batch_df = batch_df.dropna()\n",
- " batch_df.to_json(file, orient='records', lines=True, index=None)\n",
- " "
+ " batch_df.to_csv(file, sep='\\t', header=(n==0), index=None)"
]
},
{
"cell_type": "code",
- "execution_count": 94,
+ "execution_count": 171,
"id": "7704863d",
"metadata": {},
"outputs": [
@@ -1551,29 +1093,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/78 [00:00, ?it/s]\n"
- ]
- },
- {
- "ename": "TypeError",
- "evalue": "Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found \n",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/tmp/ipykernel_320049/140243368.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mencode_captioned_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myfc100m_output\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m64\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_workers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/tmp/ipykernel_320049/2954345319.py\u001b[0m in \u001b[0;36mencode_captioned_dataset\u001b[0;34m(dataset, output_jsonl, batch_size, num_workers)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0miterations\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mbatch_size\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mnum_tpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterations\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0msuperbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msuperbatches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mp_encoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msuperbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/tmp/ipykernel_320049/4148450576.py\u001b[0m in \u001b[0;36msuperbatch_generator\u001b[0;34m(dataloader, num_tpus)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0msuperbatch_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataloader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_tpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0miter_loader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataloader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miter_loader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0msuperbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sampler_iter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_yielded\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_kind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_DatasetKind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1201\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1202\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_task_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1203\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_process_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_process_data\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 1227\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1228\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExceptionWrapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1229\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1230\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/_utils.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0;31m# have message field\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 425\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mTypeError\u001b[0m: Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found \n"
+ "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4850/4850 [2:27:51<00:00, 1.83s/it]\n"
]
}
],
"source": [
- "encode_captioned_dataset(dataset, yfc100m_output, batch_size=64, num_workers=16)"
+ "encode_captioned_dataset(dataset, yfcc100m_output, batch_size=64, num_workers=16)"
]
},
{
@@ -1587,9 +1112,8 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
+ "name": "python3",
+ "display_name": "Python 3.9.0 64-bit ('Python39')"
},
"language_info": {
"codemirror_mode": {
@@ -1601,9 +1125,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.9.0"
+ },
+ "interpreter": {
+ "hash": "db471c52d602b4f5f40ecaf278e88ccfef85c29d0a1a07185b0d51fc7acf4e26"
}
},
"nbformat": 4,
"nbformat_minor": 5
-}
+}
\ No newline at end of file