Spaces:
Running
Running
Merge pull request #19 from pcuenca/main
Browse files
encoding/vqgan-jax-encoding-yfcc100m-splitted.ipynb
ADDED
@@ -0,0 +1,462 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "d0b72877",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# vqgan-jax-encoding-yfcc100m"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "747733a4",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"Same as `vqgan-jax-encoding-with-captions`, but for YFCC100M.\n",
|
17 |
+
"\n",
|
18 |
+
"This dataset was prepared by @borisdayma in Json lines format."
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 1,
|
24 |
+
"id": "3b59489e",
|
25 |
+
"metadata": {},
|
26 |
+
"outputs": [],
|
27 |
+
"source": [
|
28 |
+
"import io\n",
|
29 |
+
"\n",
|
30 |
+
"import requests\n",
|
31 |
+
"from PIL import Image\n",
|
32 |
+
"import numpy as np\n",
|
33 |
+
"from tqdm import tqdm\n",
|
34 |
+
"\n",
|
35 |
+
"import torch\n",
|
36 |
+
"import torchvision.transforms as T\n",
|
37 |
+
"import torchvision.transforms.functional as TF\n",
|
38 |
+
"from torchvision.transforms import InterpolationMode\n",
|
39 |
+
"from torch.utils.data import Dataset, DataLoader\n",
|
40 |
+
"from torchvision.datasets.folder import default_loader\n",
|
41 |
+
"\n",
|
42 |
+
"import jax\n",
|
43 |
+
"from jax import pmap"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"cell_type": "markdown",
|
48 |
+
"id": "511c3b9e",
|
49 |
+
"metadata": {},
|
50 |
+
"source": [
|
51 |
+
"## VQGAN-JAX model"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cell_type": "markdown",
|
56 |
+
"id": "bb408f6c",
|
57 |
+
"metadata": {},
|
58 |
+
"source": [
|
59 |
+
"`dalle_mini` is a local package that contains the VQGAN-JAX model and other utilities."
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": 2,
|
65 |
+
"id": "2ca50dc7",
|
66 |
+
"metadata": {},
|
67 |
+
"outputs": [],
|
68 |
+
"source": [
|
69 |
+
"from dalle_mini.vqgan_jax.modeling_flax_vqgan import VQModel"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"cell_type": "markdown",
|
74 |
+
"id": "7b60da9a",
|
75 |
+
"metadata": {},
|
76 |
+
"source": [
|
77 |
+
"We'll use a VQGAN trained by using Taming Transformers and converted to a JAX model."
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"cell_type": "code",
|
82 |
+
"execution_count": 4,
|
83 |
+
"id": "29ce8b15",
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [],
|
86 |
+
"source": [
|
87 |
+
"model = VQModel.from_pretrained(\"flax-community/vqgan_f16_16384\")"
|
88 |
+
]
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "markdown",
|
92 |
+
"id": "c7c4c1e6",
|
93 |
+
"metadata": {},
|
94 |
+
"source": [
|
95 |
+
"## Dataset"
|
96 |
+
]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"cell_type": "markdown",
|
100 |
+
"id": "fd4c608e",
|
101 |
+
"metadata": {},
|
102 |
+
"source": [
|
103 |
+
"I splitted the files to do the process iteratively. Pandas struggles with memory and `datasets` has problems when filtering files, as described [in this issue](https://github.com/huggingface/datasets/issues/2644)."
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 5,
|
109 |
+
"id": "6c058636",
|
110 |
+
"metadata": {},
|
111 |
+
"outputs": [],
|
112 |
+
"source": [
|
113 |
+
"import pandas as pd\n",
|
114 |
+
"from pathlib import Path"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 6,
|
120 |
+
"id": "81b19eca",
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [],
|
123 |
+
"source": [
|
124 |
+
"yfcc100m = Path('/sddata/dalle-mini/YFCC100M_OpenAI_subset')\n",
|
125 |
+
"# Images are 'sharded' from the following directory\n",
|
126 |
+
"yfcc100m_images = yfcc100m/'data'/'images'\n",
|
127 |
+
"yfcc100m_metadata_splits = yfcc100m/'metadata_splitted'\n",
|
128 |
+
"yfcc100m_output = yfcc100m/'metadata_encoded'"
|
129 |
+
]
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"cell_type": "code",
|
133 |
+
"execution_count": 7,
|
134 |
+
"id": "40873de9",
|
135 |
+
"metadata": {},
|
136 |
+
"outputs": [
|
137 |
+
{
|
138 |
+
"data": {
|
139 |
+
"text/plain": [
|
140 |
+
"[PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_04'),\n",
|
141 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_25'),\n",
|
142 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_17'),\n",
|
143 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_10'),\n",
|
144 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_22'),\n",
|
145 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_28'),\n",
|
146 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_09'),\n",
|
147 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_03'),\n",
|
148 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_07'),\n",
|
149 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_26'),\n",
|
150 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_14'),\n",
|
151 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_19'),\n",
|
152 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_13'),\n",
|
153 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_21'),\n",
|
154 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_00'),\n",
|
155 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_02'),\n",
|
156 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_08'),\n",
|
157 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_11'),\n",
|
158 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_29'),\n",
|
159 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_23'),\n",
|
160 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_24'),\n",
|
161 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_16'),\n",
|
162 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_05'),\n",
|
163 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_01'),\n",
|
164 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_12'),\n",
|
165 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_18'),\n",
|
166 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_20'),\n",
|
167 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_27'),\n",
|
168 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_15'),\n",
|
169 |
+
" PosixPath('/sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_06')]"
|
170 |
+
]
|
171 |
+
},
|
172 |
+
"execution_count": 7,
|
173 |
+
"metadata": {},
|
174 |
+
"output_type": "execute_result"
|
175 |
+
}
|
176 |
+
],
|
177 |
+
"source": [
|
178 |
+
"all_splits = [x for x in yfcc100m_metadata_splits.iterdir() if x.is_file()]\n",
|
179 |
+
"all_splits"
|
180 |
+
]
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"cell_type": "markdown",
|
184 |
+
"id": "f604e3c9",
|
185 |
+
"metadata": {},
|
186 |
+
"source": [
|
187 |
+
"### Cleanup"
|
188 |
+
]
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"cell_type": "code",
|
192 |
+
"execution_count": 8,
|
193 |
+
"id": "dea06b92",
|
194 |
+
"metadata": {},
|
195 |
+
"outputs": [],
|
196 |
+
"source": [
|
197 |
+
"def image_exists(root: str, name: str, ext: str):\n",
|
198 |
+
" image_path = (Path(root)/name[0:3]/name[3:6]/name).with_suffix(ext)\n",
|
199 |
+
" return image_path.exists()"
|
200 |
+
]
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"cell_type": "code",
|
204 |
+
"execution_count": 9,
|
205 |
+
"id": "1d34d7aa",
|
206 |
+
"metadata": {},
|
207 |
+
"outputs": [],
|
208 |
+
"source": [
|
209 |
+
"class YFC100Dataset(Dataset):\n",
|
210 |
+
" def __init__(self, image_list: pd.DataFrame, images_root: str, image_size: int, max_items=None):\n",
|
211 |
+
" \"\"\"\n",
|
212 |
+
" :param image_list: DataFrame with clean entries - all images must exist.\n",
|
213 |
+
" :param images_root: Root directory containing the images\n",
|
214 |
+
" :param image_size: Image size. Source images will be resized and center-cropped.\n",
|
215 |
+
" :max_items: Limit dataset size for debugging\n",
|
216 |
+
" \"\"\"\n",
|
217 |
+
" self.image_list = image_list\n",
|
218 |
+
" self.images_root = Path(images_root)\n",
|
219 |
+
" if max_items is not None: self.image_list = self.image_list[:max_items]\n",
|
220 |
+
" self.image_size = image_size\n",
|
221 |
+
" \n",
|
222 |
+
" def __len__(self):\n",
|
223 |
+
" return len(self.image_list)\n",
|
224 |
+
" \n",
|
225 |
+
" def _get_raw_image(self, i):\n",
|
226 |
+
" image_name = self.image_list.iloc[0].key\n",
|
227 |
+
" image_path = (self.images_root/image_name[0:3]/image_name[3:6]/image_name).with_suffix('.jpg')\n",
|
228 |
+
" return default_loader(image_path)\n",
|
229 |
+
" \n",
|
230 |
+
" def resize_image(self, image):\n",
|
231 |
+
" s = min(image.size)\n",
|
232 |
+
" r = self.image_size / s\n",
|
233 |
+
" s = (round(r * image.size[1]), round(r * image.size[0]))\n",
|
234 |
+
" image = TF.resize(image, s, interpolation=InterpolationMode.LANCZOS)\n",
|
235 |
+
" image = TF.center_crop(image, output_size = 2 * [self.image_size])\n",
|
236 |
+
" # FIXME: np.array is necessary in my installation, but it should be automatic\n",
|
237 |
+
" image = torch.unsqueeze(T.ToTensor()(np.array(image)), 0)\n",
|
238 |
+
" image = image.permute(0, 2, 3, 1).numpy()\n",
|
239 |
+
" return image\n",
|
240 |
+
" \n",
|
241 |
+
" def __getitem__(self, i):\n",
|
242 |
+
" image = self._get_raw_image(i)\n",
|
243 |
+
" image = self.resize_image(image)\n",
|
244 |
+
" # Just return the image, not the caption\n",
|
245 |
+
" return image"
|
246 |
+
]
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"cell_type": "markdown",
|
250 |
+
"id": "62ad01c3",
|
251 |
+
"metadata": {},
|
252 |
+
"source": [
|
253 |
+
"## Encoding"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"cell_type": "code",
|
258 |
+
"execution_count": 10,
|
259 |
+
"id": "88f36d0b",
|
260 |
+
"metadata": {},
|
261 |
+
"outputs": [],
|
262 |
+
"source": [
|
263 |
+
"def encode(model, batch):\n",
|
264 |
+
" print(\"jitting encode function\")\n",
|
265 |
+
" _, indices = model.encode(batch)\n",
|
266 |
+
"\n",
|
267 |
+
"# # FIXME: The model does not run in my computer (no cudNN currently installed) - faking it\n",
|
268 |
+
"# indices = np.random.randint(0, 16384, (batch.shape[0], 256))\n",
|
269 |
+
" return indices"
|
270 |
+
]
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"cell_type": "code",
|
274 |
+
"execution_count": null,
|
275 |
+
"id": "d1f45dd8",
|
276 |
+
"metadata": {},
|
277 |
+
"outputs": [],
|
278 |
+
"source": [
|
279 |
+
"#FIXME\n",
|
280 |
+
"# import random\n",
|
281 |
+
"# model = {}"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"cell_type": "code",
|
286 |
+
"execution_count": 11,
|
287 |
+
"id": "1f35f0cb",
|
288 |
+
"metadata": {},
|
289 |
+
"outputs": [],
|
290 |
+
"source": [
|
291 |
+
"from flax.training.common_utils import shard\n",
|
292 |
+
"\n",
|
293 |
+
"def superbatch_generator(dataloader):\n",
|
294 |
+
" iter_loader = iter(dataloader)\n",
|
295 |
+
" for batch in iter_loader:\n",
|
296 |
+
" batch = batch.squeeze(1)\n",
|
297 |
+
" # Skip incomplete last batch\n",
|
298 |
+
" if batch.shape[0] == dataloader.batch_size:\n",
|
299 |
+
" yield shard(batch)"
|
300 |
+
]
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"cell_type": "code",
|
304 |
+
"execution_count": 13,
|
305 |
+
"id": "2210705b",
|
306 |
+
"metadata": {},
|
307 |
+
"outputs": [],
|
308 |
+
"source": [
|
309 |
+
"import os\n",
|
310 |
+
"import jax\n",
|
311 |
+
"\n",
|
312 |
+
"def encode_captioned_dataset(dataset, output_jsonl, batch_size=32, num_workers=16):\n",
|
313 |
+
" if os.path.isfile(output_jsonl):\n",
|
314 |
+
" print(f\"Destination file {output_jsonl} already exists, please move away.\")\n",
|
315 |
+
" return\n",
|
316 |
+
" \n",
|
317 |
+
" num_tpus = jax.device_count()\n",
|
318 |
+
" dataloader = DataLoader(dataset, batch_size=num_tpus*batch_size, num_workers=num_workers)\n",
|
319 |
+
" superbatches = superbatch_generator(dataloader)\n",
|
320 |
+
" \n",
|
321 |
+
" p_encoder = pmap(lambda batch: encode(model, batch))\n",
|
322 |
+
"\n",
|
323 |
+
" # We save each superbatch to avoid reallocation of buffers as we process them.\n",
|
324 |
+
" # We keep the file open to prevent excessive file seeks.\n",
|
325 |
+
" with open(output_jsonl, \"w\") as file:\n",
|
326 |
+
" iterations = len(dataset) // (batch_size * num_tpus)\n",
|
327 |
+
" for n in tqdm(range(iterations)):\n",
|
328 |
+
" superbatch = next(superbatches)\n",
|
329 |
+
" encoded = p_encoder(superbatch.numpy())\n",
|
330 |
+
" encoded = encoded.reshape(-1, encoded.shape[-1])\n",
|
331 |
+
"\n",
|
332 |
+
" # Extract fields from the dataset internal `image_list` property, and save to disk\n",
|
333 |
+
" # We need to read from the df because the Dataset only returns images\n",
|
334 |
+
" start_index = n * batch_size * num_tpus\n",
|
335 |
+
" end_index = (n+1) * batch_size * num_tpus\n",
|
336 |
+
" keys = dataset.image_list[\"key\"][start_index:end_index].values\n",
|
337 |
+
" captions = dataset.image_list[\"caption\"][start_index:end_index].values\n",
|
338 |
+
"# encoded_as_string = list(map(lambda item: np.array2string(item, separator=',', max_line_width=50000, formatter={'int':lambda x: str(x)}), encoded))\n",
|
339 |
+
" batch_df = pd.DataFrame.from_dict({\"key\": keys, \"caption\": captions, \"encoding\": encoded})\n",
|
340 |
+
" batch_df.to_json(file, orient='records', lines=True)"
|
341 |
+
]
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"cell_type": "code",
|
345 |
+
"execution_count": 14,
|
346 |
+
"id": "7704863d",
|
347 |
+
"metadata": {},
|
348 |
+
"outputs": [
|
349 |
+
{
|
350 |
+
"name": "stdout",
|
351 |
+
"output_type": "stream",
|
352 |
+
"text": [
|
353 |
+
"Processing /sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_04\n",
|
354 |
+
"54024 selected from 500000 total entries\n"
|
355 |
+
]
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"name": "stderr",
|
359 |
+
"output_type": "stream",
|
360 |
+
"text": [
|
361 |
+
"INFO:absl:Starting the local TPU driver.\n",
|
362 |
+
"INFO:absl:Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local://\n",
|
363 |
+
"INFO:absl:Unable to initialize backend 'tpu': Invalid argument: TpuPlatform is not available.\n",
|
364 |
+
" 0%| | 0/31 [00:00<?, ?it/s]"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"name": "stdout",
|
369 |
+
"output_type": "stream",
|
370 |
+
"text": [
|
371 |
+
"jitting encode function\n"
|
372 |
+
]
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"name": "stderr",
|
376 |
+
"output_type": "stream",
|
377 |
+
"text": [
|
378 |
+
"100%|███████████████████████████████████████████████████████████████████████████████| 31/31 [00:02<00:00, 10.61it/s]\n"
|
379 |
+
]
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"name": "stdout",
|
383 |
+
"output_type": "stream",
|
384 |
+
"text": [
|
385 |
+
"Processing /sddata/dalle-mini/YFCC100M_OpenAI_subset/metadata_splitted/metadata_split_25\n",
|
386 |
+
"99530 selected from 500000 total entries\n"
|
387 |
+
]
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"name": "stderr",
|
391 |
+
"output_type": "stream",
|
392 |
+
"text": [
|
393 |
+
" 3%|██▌ | 1/31 [00:01<00:53, 1.79s/it]"
|
394 |
+
]
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"name": "stdout",
|
398 |
+
"output_type": "stream",
|
399 |
+
"text": [
|
400 |
+
"jitting encode function\n"
|
401 |
+
]
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"name": "stderr",
|
405 |
+
"output_type": "stream",
|
406 |
+
"text": [
|
407 |
+
"100%|███████████████████████████████████████████████████████████████████████████████| 31/31 [00:03<00:00, 9.92it/s]\n"
|
408 |
+
]
|
409 |
+
}
|
410 |
+
],
|
411 |
+
"source": [
|
412 |
+
"for split in all_splits:\n",
|
413 |
+
" print(f\"Processing {split}\")\n",
|
414 |
+
" df = pd.read_json(split, orient=\"records\", lines=True)\n",
|
415 |
+
" df['image_exists'] = df.apply(lambda row: image_exists(yfcc100m_images, row['key'], '.' + row['ext']), axis=1)\n",
|
416 |
+
" print(f\"{len(df[df.image_exists])} selected from {len(df)} total entries\")\n",
|
417 |
+
" \n",
|
418 |
+
" df = df[df.image_exists]\n",
|
419 |
+
" captions = df.apply(lambda row: ' '.join([row[\"title_clean\"], row[\"description_clean\"]]), axis=1)\n",
|
420 |
+
" df[\"caption\"] = captions.values\n",
|
421 |
+
" \n",
|
422 |
+
" dataset = YFC100Dataset(\n",
|
423 |
+
" image_list = df,\n",
|
424 |
+
" images_root = yfcc100m_images,\n",
|
425 |
+
" image_size = 256,\n",
|
426 |
+
"# max_items = 2000,\n",
|
427 |
+
" )\n",
|
428 |
+
" \n",
|
429 |
+
" encode_captioned_dataset(dataset, yfcc100m_output/split.name, batch_size=64, num_workers=16)"
|
430 |
+
]
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"cell_type": "markdown",
|
434 |
+
"id": "8953dd84",
|
435 |
+
"metadata": {},
|
436 |
+
"source": [
|
437 |
+
"----"
|
438 |
+
]
|
439 |
+
}
|
440 |
+
],
|
441 |
+
"metadata": {
|
442 |
+
"kernelspec": {
|
443 |
+
"display_name": "Python 3 (ipykernel)",
|
444 |
+
"language": "python",
|
445 |
+
"name": "python3"
|
446 |
+
},
|
447 |
+
"language_info": {
|
448 |
+
"codemirror_mode": {
|
449 |
+
"name": "ipython",
|
450 |
+
"version": 3
|
451 |
+
},
|
452 |
+
"file_extension": ".py",
|
453 |
+
"mimetype": "text/x-python",
|
454 |
+
"name": "python",
|
455 |
+
"nbconvert_exporter": "python",
|
456 |
+
"pygments_lexer": "ipython3",
|
457 |
+
"version": "3.8.10"
|
458 |
+
}
|
459 |
+
},
|
460 |
+
"nbformat": 4,
|
461 |
+
"nbformat_minor": 5
|
462 |
+
}
|
encoding/vqgan-jax-encoding-yfcc100m.ipynb
ADDED
@@ -0,0 +1,1609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "d0b72877",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# vqgan-jax-encoding-yfcc100m"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "ba7b31e6",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"Same as `vqgan-jax-encoding-with-captions`, but for YFCC100M.\n",
|
17 |
+
"\n",
|
18 |
+
"This dataset was prepared by @borisdayma in Json lines format."
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 1,
|
24 |
+
"id": "3b59489e",
|
25 |
+
"metadata": {},
|
26 |
+
"outputs": [],
|
27 |
+
"source": [
|
28 |
+
"import io\n",
|
29 |
+
"\n",
|
30 |
+
"import requests\n",
|
31 |
+
"from PIL import Image\n",
|
32 |
+
"import numpy as np\n",
|
33 |
+
"from tqdm import tqdm\n",
|
34 |
+
"\n",
|
35 |
+
"import torch\n",
|
36 |
+
"import torchvision.transforms as T\n",
|
37 |
+
"import torchvision.transforms.functional as TF\n",
|
38 |
+
"from torchvision.transforms import InterpolationMode\n",
|
39 |
+
"from torch.utils.data import Dataset, DataLoader\n",
|
40 |
+
"from torchvision.datasets.folder import default_loader\n",
|
41 |
+
"\n",
|
42 |
+
"import jax\n",
|
43 |
+
"from jax import pmap"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"cell_type": "markdown",
|
48 |
+
"id": "511c3b9e",
|
49 |
+
"metadata": {},
|
50 |
+
"source": [
|
51 |
+
"## VQGAN-JAX model"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cell_type": "markdown",
|
56 |
+
"id": "bb408f6c",
|
57 |
+
"metadata": {},
|
58 |
+
"source": [
|
59 |
+
"`dalle_mini` is a local package that contains the VQGAN-JAX model and other utilities."
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": 2,
|
65 |
+
"id": "2ca50dc7",
|
66 |
+
"metadata": {},
|
67 |
+
"outputs": [],
|
68 |
+
"source": [
|
69 |
+
"from dalle_mini.vqgan_jax.modeling_flax_vqgan import VQModel"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"cell_type": "markdown",
|
74 |
+
"id": "7b60da9a",
|
75 |
+
"metadata": {},
|
76 |
+
"source": [
|
77 |
+
"We'll use a VQGAN trained by using Taming Transformers and converted to a JAX model."
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"cell_type": "markdown",
|
82 |
+
"id": "ad05a1bd",
|
83 |
+
"metadata": {},
|
84 |
+
"source": [
|
85 |
+
"**Disabling** Does not work in my local system right now."
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"cell_type": "code",
|
90 |
+
"execution_count": 3,
|
91 |
+
"id": "29ce8b15",
|
92 |
+
"metadata": {},
|
93 |
+
"outputs": [],
|
94 |
+
"source": [
|
95 |
+
"#model = VQModel.from_pretrained(\"flax-community/vqgan_f16_16384\")"
|
96 |
+
]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"cell_type": "markdown",
|
100 |
+
"id": "c7c4c1e6",
|
101 |
+
"metadata": {},
|
102 |
+
"source": [
|
103 |
+
"## Dataset"
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 79,
|
109 |
+
"id": "33861477",
|
110 |
+
"metadata": {},
|
111 |
+
"outputs": [],
|
112 |
+
"source": [
|
113 |
+
"import pandas as pd\n",
|
114 |
+
"from pathlib import Path"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 80,
|
120 |
+
"id": "81b19eca",
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [],
|
123 |
+
"source": [
|
124 |
+
"yfcc100m = Path('/sddata/dalle-mini/YFCC100M_OpenAI_subset')\n",
|
125 |
+
"# Images are 'sharded' from the following directory\n",
|
126 |
+
"yfcc100m_images = yfcc100m/'data'/'images'\n",
|
127 |
+
"yfcc100m_metadata = yfcc100m/'metadata_YFCC100M.jsonl'\n",
|
128 |
+
"yfcc100m_output = yfcc100m/'metadata_encoded.jsonl'"
|
129 |
+
]
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"cell_type": "markdown",
|
133 |
+
"id": "1c58bb4a",
|
134 |
+
"metadata": {},
|
135 |
+
"source": [
|
136 |
+
"### Cleanup"
|
137 |
+
]
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"cell_type": "markdown",
|
141 |
+
"id": "1a14ae3d",
|
142 |
+
"metadata": {},
|
143 |
+
"source": [
|
144 |
+
"We need to select entries with images that exist. Otherwise we can't build batches because `Dataloader` does not support `None` in batches. We use Huggingface Datasets, I understand they support threaded reading of jsonl files, and I was running out of memory when using pandas."
|
145 |
+
]
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"cell_type": "code",
|
149 |
+
"execution_count": 81,
|
150 |
+
"id": "7811648c",
|
151 |
+
"metadata": {},
|
152 |
+
"outputs": [],
|
153 |
+
"source": [
|
154 |
+
"import datasets\n",
|
155 |
+
"from datasets import Dataset, load_dataset"
|
156 |
+
]
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"cell_type": "code",
|
160 |
+
"execution_count": 82,
|
161 |
+
"id": "753659fe",
|
162 |
+
"metadata": {},
|
163 |
+
"outputs": [
|
164 |
+
{
|
165 |
+
"name": "stderr",
|
166 |
+
"output_type": "stream",
|
167 |
+
"text": [
|
168 |
+
"Using custom data configuration default-57592e8ed16d752b\n",
|
169 |
+
"Reusing dataset json (/home/pedro/.cache/huggingface/datasets/json/default-57592e8ed16d752b/0.0.0/793d004298099bd3c4e61eb7878475bcf1dc212bf2e34437d85126758720d7f9)\n"
|
170 |
+
]
|
171 |
+
}
|
172 |
+
],
|
173 |
+
"source": [
|
174 |
+
"dataset = load_dataset(\"json\", data_files=[str(yfcc100m_metadata)])"
|
175 |
+
]
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"cell_type": "code",
|
179 |
+
"execution_count": 83,
|
180 |
+
"id": "9343df1b",
|
181 |
+
"metadata": {},
|
182 |
+
"outputs": [
|
183 |
+
{
|
184 |
+
"data": {
|
185 |
+
"text/plain": [
|
186 |
+
"Dataset({\n",
|
187 |
+
" features: ['photoid', 'uid', 'unickname', 'datetaken', 'dateuploaded', 'capturedevice', 'title', 'description', 'usertags', 'machinetags', 'longitude', 'latitude', 'accuracy', 'pageurl', 'downloadurl', 'licensename', 'licenseurl', 'serverid', 'farmid', 'secret', 'secretoriginal', 'ext', 'marker', 'key', 'title_clean', 'description_clean'],\n",
|
188 |
+
" num_rows: 14825233\n",
|
189 |
+
"})"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
"execution_count": 83,
|
193 |
+
"metadata": {},
|
194 |
+
"output_type": "execute_result"
|
195 |
+
}
|
196 |
+
],
|
197 |
+
"source": [
|
198 |
+
"dataset = dataset['train']\n",
|
199 |
+
"dataset"
|
200 |
+
]
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"cell_type": "code",
|
204 |
+
"execution_count": 84,
|
205 |
+
"id": "c4794c29",
|
206 |
+
"metadata": {},
|
207 |
+
"outputs": [],
|
208 |
+
"source": [
|
209 |
+
"def image_exists(root: str, name: str, ext: str):\n",
|
210 |
+
" image_path = (Path(root)/name[0:3]/name[3:6]/name).with_suffix(ext)\n",
|
211 |
+
" return image_path.exists()"
|
212 |
+
]
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"cell_type": "code",
|
216 |
+
"execution_count": 90,
|
217 |
+
"id": "1b500078",
|
218 |
+
"metadata": {},
|
219 |
+
"outputs": [],
|
220 |
+
"source": [
|
221 |
+
"def select_existing_rows(examples):\n",
|
222 |
+
" # Select lists we want to keep\n",
|
223 |
+
" keys = examples['key']\n",
|
224 |
+
" titles_clean = examples['title_clean']\n",
|
225 |
+
" descriptions_clean = examples.get('description_clean', '')\n",
|
226 |
+
" exts = examples['ext']\n",
|
227 |
+
" \n",
|
228 |
+
" result = {'key': [], 'title_clean': [], 'description_clean': [], 'ext': []}\n",
|
229 |
+
" for i, image_name in enumerate(keys):\n",
|
230 |
+
" print(i, image_name)\n",
|
231 |
+
" if image_exists(root=str(yfcc100m_images), name=image_name, ext='.' + exts[i]):\n",
|
232 |
+
" result[\"key\"].append(image_name)\n",
|
233 |
+
" result[\"title_clean\"].append(titles_clean[i])\n",
|
234 |
+
" result[\"description_clean\"].append(descriptions_clean[i])\n",
|
235 |
+
" result[\"ext\"].append(exts[i])\n",
|
236 |
+
" print(f'returning {len(result[\"key\"])}')\n",
|
237 |
+
" return result"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"cell_type": "code",
|
242 |
+
"execution_count": 91,
|
243 |
+
"id": "467378c1",
|
244 |
+
"metadata": {},
|
245 |
+
"outputs": [
|
246 |
+
{
|
247 |
+
"data": {
|
248 |
+
"application/vnd.jupyter.widget-view+json": {
|
249 |
+
"model_id": "b72e866c3f174e9e9aa2430e204f2baf",
|
250 |
+
"version_major": 2,
|
251 |
+
"version_minor": 0
|
252 |
+
},
|
253 |
+
"text/plain": [
|
254 |
+
"Selecting rows with images that exist: 0%| | 0/14826 [00:00<?, ?ba/s]"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
"metadata": {},
|
258 |
+
"output_type": "display_data"
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"name": "stdout",
|
262 |
+
"output_type": "stream",
|
263 |
+
"text": [
|
264 |
+
"0 d29e7c6a3028418c64eb15e3cf577c2\n",
|
265 |
+
"1 d29f01b149167d683f9ddde464bb3db\n",
|
266 |
+
"2 d296e9e34bdae41edb6c679ff824ab2a\n",
|
267 |
+
"3 d29ce96395848478b1e8396e44899\n",
|
268 |
+
"4 d29abf32c4e12ff881f975b70e0cec0\n",
|
269 |
+
"5 d298a61f2f7be6c9e2c2af81755b489\n",
|
270 |
+
"6 d29b1b973ab1a95a37cd4cda37999fb\n",
|
271 |
+
"7 d290d566266ad568e94128d4135b41a\n",
|
272 |
+
"8 d29b1ac2a497b0d9a4a43c3a51d13fb\n",
|
273 |
+
"9 d29ebe6c96f53b2f5d7f5eed9b2b2898\n",
|
274 |
+
"10 d29ec1b3f75749a231ee1d9d206baf6e\n",
|
275 |
+
"11 d290bee419ce98d9a79ccf512a47a79\n",
|
276 |
+
"12 d29bc1eff62a477131516c40a54f2dce\n",
|
277 |
+
"13 d292a123bcf58e13128d2067593d81\n",
|
278 |
+
"14 d294424637d532d8cfbcf2ca99b85f\n",
|
279 |
+
"15 d29a51d8502f531115b108d59c811ab\n",
|
280 |
+
"16 d29a9f0fce210c7e050877a53697031\n",
|
281 |
+
"17 d290c750469f11795ed85fa62e4b52\n",
|
282 |
+
"18 d29e13badf42d839b421478be4452dbe\n",
|
283 |
+
"19 d29c1d635348aa35474a90f57aafb7\n",
|
284 |
+
"20 d291a7c7c71455d5b3cdd97ca5e4c\n",
|
285 |
+
"21 d295f95d7cb204dc812a476af5f4f8a\n",
|
286 |
+
"22 d2932ecd1053165aa3d7b9e68547e0b6\n",
|
287 |
+
"23 d29cd5a4b1d6a759b63df357ef2b\n",
|
288 |
+
"24 d294e885117ca7d9b328c5b9388f52\n",
|
289 |
+
"25 d2999b54832bb275a7e2eea47e98f11\n",
|
290 |
+
"26 d29f89d491812beb84e62223b4541d7\n",
|
291 |
+
"27 d2993599afe456ba786060129fc9cdfd\n",
|
292 |
+
"28 d290ceb78d0f7c8c49930cd96b12b27\n",
|
293 |
+
"29 d29db640e6943c341e3df6b4a815a17\n",
|
294 |
+
"30 d29d8ae6354fd9c1613ac3750feb298\n",
|
295 |
+
"31 d29b7d6de63ce541b71ceb78745fbfc\n",
|
296 |
+
"32 d290cc9739e6f554b8f27f6496af5a6\n",
|
297 |
+
"33 d29dcfe9b1c7381614d5bd8290d435\n",
|
298 |
+
"34 d297e414783424b8d8339d8c9b54ca72\n",
|
299 |
+
"35 d2918885fdc74e96cdec3fd49e409667\n",
|
300 |
+
"36 d29d30289c89a2e9fc6234283b0397\n",
|
301 |
+
"37 d291536c1f1b3be24034663d3e57c84b\n",
|
302 |
+
"38 d29cccb1954ac8268963b8614d4541\n",
|
303 |
+
"39 d2945c8676633807e51169a7d123f49\n",
|
304 |
+
"40 d29b46d79ae192685dfebbafe681bf5\n",
|
305 |
+
"41 d298c4194cae157a78d9c85f6965ced6\n",
|
306 |
+
"42 d29f4f4050493b5b11bf029731250\n",
|
307 |
+
"43 d29fff53248edfe2539d2e2edf9bc4e\n",
|
308 |
+
"44 d2913017cf52bd6b239c16119ee955d\n",
|
309 |
+
"45 d29b53425360629f945b442e1819182b\n",
|
310 |
+
"46 d29f737d2137b6a58c8a3db2673dfa\n",
|
311 |
+
"47 d295c31c46ee0107f3224a12bb18e0\n",
|
312 |
+
"48 d29735527519d9efd3477ede346d077\n",
|
313 |
+
"49 d29549dc1a2f31e65add5159aca6ca7e\n",
|
314 |
+
"50 d29a56bba12ada01da573a325d2bbd\n",
|
315 |
+
"51 d29eb4236f7a4564cb0368c98b9d15a\n",
|
316 |
+
"52 d29417751c506a499af2bf9bba1c91dc\n",
|
317 |
+
"53 d299335cdf1679c9a5fe6b655e22cfc6\n",
|
318 |
+
"54 d29ad98d3118d6e21a94156f427812d3\n",
|
319 |
+
"55 d29a7bd530fa6cec73f55f5fdec35\n",
|
320 |
+
"56 d2983c9adab124234cf170b157d986aa\n",
|
321 |
+
"57 d299aa496b76993e1fbfaca5fadefa82\n",
|
322 |
+
"58 d29444db0139d8bdbdd96723aaba0\n",
|
323 |
+
"59 d29c2fcbaeb4a6bd7b7eb13e467823\n",
|
324 |
+
"60 d29b9d5c68124a5e56a4594974c9e7ec\n",
|
325 |
+
"61 d29c941e4349d152939733a01debb9ce\n",
|
326 |
+
"62 d2971493e2bdea2a48c3e3f7b9f3b9f9\n",
|
327 |
+
"63 d29f28e4b5254594fb581803aeaf1d7\n",
|
328 |
+
"64 d292145142221a995b2b17ef267fd5\n",
|
329 |
+
"65 d299bbbad76835c2edf1f012bd899883\n",
|
330 |
+
"66 d29462a3fdd5c994b2a09958f4413f39\n",
|
331 |
+
"67 d294a4f2273696fbcbc52c9ae3fa4cc7\n",
|
332 |
+
"68 d29b29cbf37f4b3278d34a9e2274cdc\n",
|
333 |
+
"69 d294f51aaae56a1eca32fac5551330c9\n",
|
334 |
+
"70 d29925d6bef3c5e318a6ef9461281ae2\n",
|
335 |
+
"71 d2914df9dabb24aad3b9a14f76bcaaf5\n",
|
336 |
+
"72 d290aae57d883ac77e882f082725753\n",
|
337 |
+
"73 d29d63a276a23f42f2011975bbb1432\n",
|
338 |
+
"74 d2914a2414672f2edc55afe090faa68b\n",
|
339 |
+
"75 d2943dd2a17e8b96766f47a75beceb6\n",
|
340 |
+
"76 d2915b7155c94423748e2a5f102d273d\n",
|
341 |
+
"77 d292f2a2d138a6606ec6acbea4d4b8a6\n",
|
342 |
+
"78 d29bf6fa58a7401fb2efd8f7b55473e\n",
|
343 |
+
"79 d29a7085f4c7a3ef1caab1d33c7772\n",
|
344 |
+
"80 d29e29aa2149be589126184fa6ba95b\n",
|
345 |
+
"81 d29d6e2963309e7b5c5978ea71f593\n",
|
346 |
+
"82 d291adb1933c79228e1fdfe1762f5b9f\n",
|
347 |
+
"83 d294bcf7cc7eeb78fa6439c66c135359\n",
|
348 |
+
"84 d29d12c15f67b3b6968abe771a5fd0\n",
|
349 |
+
"85 d29ba798d4f1c1e8cbcb46beda14c8f\n",
|
350 |
+
"86 d29056682c23fa206f7c952e512499ee\n",
|
351 |
+
"87 d2969725825c9da68e49c0e7be8daf1d\n",
|
352 |
+
"88 d291bc5a6b35c53f4117ad2415baa3\n",
|
353 |
+
"89 d29d45ca9fb464c9c60561fae2948\n",
|
354 |
+
"90 d291d29ff97c66bf1c6b4c956ed81\n",
|
355 |
+
"91 d298d5e0d479c7ec8add78c4aa80\n",
|
356 |
+
"92 d2943f69e59528b6e4a9a696763545\n",
|
357 |
+
"93 d299e68a3d7d50448da951f8792693b\n",
|
358 |
+
"94 d291ea441ccf3cc8f34073d4ce4d8d20\n",
|
359 |
+
"95 d29680dcc84f825828708b3d9427a8ae\n",
|
360 |
+
"96 d291201e72010356e91c88917aacfc\n",
|
361 |
+
"97 d297a5b71895f7bfbfb3d156d8ed3b83\n",
|
362 |
+
"98 d29d7e64a92ea1bd6ba08ff76c1a3bd8\n",
|
363 |
+
"99 d291f75dc99bee639fd680af7e7a4fb0\n",
|
364 |
+
"100 d29ddb6ca8a49f3369733f4d3a8887\n",
|
365 |
+
"101 d292ff4251b9a36b19fa6cbbc87851f\n",
|
366 |
+
"102 d29ca1839cfbb1c40ac2ba9cb30dea3\n",
|
367 |
+
"103 d29919029e2eeec95d7a41e9472e86e\n",
|
368 |
+
"104 d29b393acb76bf3dd45f1b4ef4f513\n",
|
369 |
+
"105 d29bcff1a7b2f4b109b42e99a49cb7\n",
|
370 |
+
"106 d2909c7f45b39247ba0c2ed811067ee\n",
|
371 |
+
"107 d29964417f32c1420bf235613af9c9\n",
|
372 |
+
"108 d29a9e825141d6605e6d9c4e658ae7f0\n",
|
373 |
+
"109 d29f2e7fea71dc911403e8c2b12414f\n",
|
374 |
+
"110 d2999b51a0e9a37f7d670ac979ae8c0\n",
|
375 |
+
"111 d2944c2f78b758c1b141104492f3f2\n",
|
376 |
+
"112 d2934e0baba9f344e85f7326e902b97\n",
|
377 |
+
"113 d29bda80ffd5d46aa798d715ff2c0\n",
|
378 |
+
"114 d2912c43811337b61f9c119f35781e56\n",
|
379 |
+
"115 d2945a077f98fdbde0fca07e22a41\n",
|
380 |
+
"116 d29aa68847eb4a66e71ab2626f84e\n",
|
381 |
+
"117 d291edb7ec46c41048967ab971c3c29c\n",
|
382 |
+
"118 d29ba93b2642d9f937316694da15b22b\n",
|
383 |
+
"119 d292aa457e9d7c8ffe8e55761f7d4f5\n",
|
384 |
+
"120 d29da11457a35654323fa3d93834a34\n",
|
385 |
+
"121 d295ba252458b15eab8957ac679509c\n",
|
386 |
+
"122 d299e43dd7d1ea48ddc02a823ddd7cc\n",
|
387 |
+
"123 d29d94f1c378e42b6fe990f76b94fe6\n",
|
388 |
+
"124 d29c52f12fede24a2caf6170655b558d\n",
|
389 |
+
"125 d2927cdb877b23f7c8356f8619aafc\n",
|
390 |
+
"126 d296d62fbd28d32a4d6f9f42a336ffdf\n",
|
391 |
+
"127 d294691643464e92e6afea29b7e6784\n",
|
392 |
+
"128 d29e9baa87e9b85136f85a397bcd126\n",
|
393 |
+
"129 d29a539bdde71c478e3655983d1ea\n",
|
394 |
+
"130 d29ff2366b398bac938d46e82945141\n",
|
395 |
+
"131 d29998f21f7db92b129b3567da7a546\n",
|
396 |
+
"132 d29c9c1908eeb3fd0fd8737ba53b59f\n",
|
397 |
+
"133 d29ae0f149cc6b67fefcaa79add5b873\n",
|
398 |
+
"134 d2988df2a66eccca85d12020f1896558\n",
|
399 |
+
"135 d29f85ed0efd46c7a46539e18a622\n",
|
400 |
+
"136 d296bfdc77c0cb5479361fdee3fa6\n",
|
401 |
+
"137 d29479e2d8f288ea2445659243bea37\n",
|
402 |
+
"138 d29bed60bd16db97ef5ec5f65cb7f1\n",
|
403 |
+
"139 d29595ef1a163f68659b56815b2fb21\n",
|
404 |
+
"140 d29f6ef4562c75bdc5d82dfd49394a\n",
|
405 |
+
"141 d2911c3e4ed5a8574991583e9e713f49\n",
|
406 |
+
"142 d29094b0977ffff0ff18a7715a96bda6\n",
|
407 |
+
"143 d291a0104cc6b978ddfb6d9eee0ec7f\n",
|
408 |
+
"144 d29d542a1dd0a0682c355e7ad45db14\n",
|
409 |
+
"145 d29f79dd32c418886791cbdb4ebe90f1\n",
|
410 |
+
"146 d29aeec1dced477c94bd396fc3127\n",
|
411 |
+
"147 d293eaabfefd938d71278825339494\n",
|
412 |
+
"148 d29e64339b9568b37fd4bdd7edb9836a\n",
|
413 |
+
"149 d298a529ba8a82918fe096e81110981e\n",
|
414 |
+
"150 d29ce5cf8539329ae4204e6ce327b2b\n",
|
415 |
+
"151 d29142b3b0e68e94a8522dfd3a2b690\n",
|
416 |
+
"152 d29e96e6b1b7eff768dff0ed8955edf1\n",
|
417 |
+
"153 d29265dae85a59cdd9615daffbcefe5\n",
|
418 |
+
"154 d29d9b1d50f747cb9adfc2b32fc32c\n",
|
419 |
+
"155 d29c5e12470d4e366c4bb8fa9baddc\n",
|
420 |
+
"156 d294fe93887dd377520c64839dc139\n",
|
421 |
+
"157 d29371933762af2ed19cafc3dd4fabd9\n",
|
422 |
+
"158 d2981a42980199daf93915c1eef93e\n",
|
423 |
+
"159 d29ea4ef385cf37b9045b06b4ba7252\n",
|
424 |
+
"160 d299a091d31f65129d3396a58f92642d\n",
|
425 |
+
"161 d29f3628139676f4ceb6bfe0cbc9aaf6\n",
|
426 |
+
"162 d29b114fa2616f46bcebb37f5d9f59d\n",
|
427 |
+
"163 d299a8d174e1f54ec383a241a8bb50ae\n",
|
428 |
+
"164 d2959843a6d4fc265637b63543a9419a\n",
|
429 |
+
"165 d29638c081713aa4dc5cd32ce88c158d\n",
|
430 |
+
"166 d2967886b6b0cbfb6b98b175822c956a\n",
|
431 |
+
"167 d29a6bc47f95f7668fbf43a1a1e947\n",
|
432 |
+
"168 d29e6265a813f02f2dcd114e5f2748be\n",
|
433 |
+
"169 d29960478ff0d270e822e84644638f7\n",
|
434 |
+
"170 d290a8ea4c1d4fda164e6ba0f978876c\n",
|
435 |
+
"171 d2934645777ae1ec25b2a4dfeb6036ed\n",
|
436 |
+
"172 d299ecad1261a988e0dcce7d1c7f6c42\n",
|
437 |
+
"173 d29b6eb6dc77c7be34481fa54754c\n",
|
438 |
+
"174 d2956739c7f8791ed7d4aff4f92b948\n",
|
439 |
+
"175 d293aa871b9e4c6913c43e91cf48040\n",
|
440 |
+
"176 d292d08afc78df9d38f6f1f1f28ec0\n",
|
441 |
+
"177 d29f448b6f32d4b9b64719beaaab3e2\n",
|
442 |
+
"178 d29e4bf8ed852972ff75aa6e4e6964\n",
|
443 |
+
"179 d29a2088d388e1799d144cdd88025ff\n",
|
444 |
+
"180 d29ad328dfb255da22af6aa72f15888\n",
|
445 |
+
"181 d29cf0f71eee59ccdecc8cb9e1aef482\n",
|
446 |
+
"182 d29a84ccf19a324aff1ece8d1831c8d\n",
|
447 |
+
"183 d298b98b2dcd287d31b7e5446ba8284\n",
|
448 |
+
"184 d29055e4ee873415ae12477e46fba\n",
|
449 |
+
"185 d2922e6430bb75a26072149343cc191a\n",
|
450 |
+
"186 d2955ecab0c76472d6f9dad25d165b8b\n",
|
451 |
+
"187 d29113a376ed16c6c1a65370cb845e65\n",
|
452 |
+
"188 d2944e25fb89b2466feb547d29e63975\n",
|
453 |
+
"189 d29d40ee1841d4dcf143925564f347\n",
|
454 |
+
"190 d2988f40eb3522d26574e7947840e116\n",
|
455 |
+
"191 d29fbc1e7c3c137f783ba57222cd98f6\n",
|
456 |
+
"192 d29fab615e3124424ba7eb1726c64b\n",
|
457 |
+
"193 d29e8e17d964c0aea5e52fe3c8c28270\n",
|
458 |
+
"194 d29b2838685267fc64f64a41211df2f5\n",
|
459 |
+
"195 d29a8a9d8edacfece418c8ea31eaf6\n",
|
460 |
+
"196 d29d6d371f46b5a31dd175a8a7f41\n",
|
461 |
+
"197 d29fbb7cc17c4e374232af541d4e4240\n",
|
462 |
+
"198 d293279e99aecab63ef09872b93f8d\n",
|
463 |
+
"199 d29fb59416e4c74dbed06129b130d828\n",
|
464 |
+
"200 d290dd1a636d24fa2fc96ef667f91a3\n",
|
465 |
+
"201 d29dc37487691ba4417611ab9b1187\n",
|
466 |
+
"202 d29f93114ac4dc3baace4d5e0bd3e57\n",
|
467 |
+
"203 d294259c4b3c35a1559d7f742a36c034\n",
|
468 |
+
"204 d29470f8213e53e2aa7164f3623ccc1f\n",
|
469 |
+
"205 d29faf1d9d6b7f77deb5385ea85111f\n",
|
470 |
+
"206 d29b18455b80dc37d81024d1ab6a99f\n",
|
471 |
+
"207 d29eb148320276099cd37f74b5b9ed8\n",
|
472 |
+
"208 d2961799bc3a8866450f9dedf3753da\n",
|
473 |
+
"209 d29826568875bdfe2f9b479a7e64ac4\n",
|
474 |
+
"210 d2928972156f2e5372cce81c5434655\n",
|
475 |
+
"211 d2992f331841f8c161554ea0f41faff\n",
|
476 |
+
"212 d291c33cbc5075d4be40b790c7435e\n",
|
477 |
+
"213 d29c655b787cb0e02be7272a57686cfa\n",
|
478 |
+
"214 d2939d29b1d1d5c3aa21d3b46127a1f8\n",
|
479 |
+
"215 d29839b090823f6d915a2bf4338b5b\n",
|
480 |
+
"216 d29334188dda37bfc48b84d1c2ad9f\n",
|
481 |
+
"217 d29e7e8ebfaae9b98b4141eb5242fb1\n",
|
482 |
+
"218 d295057d1bf5e65929485f92a4c1ab\n",
|
483 |
+
"219 d29184e22ba47ae9c449fc21086a82\n",
|
484 |
+
"220 d29e384d3221b25ec6221376c1a025d8\n",
|
485 |
+
"221 d290b2c211d229a262082c646856d57\n",
|
486 |
+
"222 d295472dea1c94a5a4a2443f83b82\n",
|
487 |
+
"223 d29ca6ac3858de4c4a0b6fd88547aac\n",
|
488 |
+
"224 d2956a39fbc3081f9fe394aff50aa\n",
|
489 |
+
"225 d29dc4779f78f63bca6252c4bd65bd46\n",
|
490 |
+
"226 d29960f8b3be7860b0af7497bd16d6bd\n",
|
491 |
+
"227 d2912134838dc88b389b60571f73c358\n",
|
492 |
+
"228 d295fc5a26d7da3cfa4fd4a824bf0\n",
|
493 |
+
"229 d29b4efbfa2b68093ee70fb2381fbfc\n",
|
494 |
+
"230 d293694f84e8ffafa8f42d55941bbd76\n",
|
495 |
+
"231 d29833fb44ce2852ef764a8fa87631d4\n",
|
496 |
+
"232 d296962ad593259fc695a76170c4f097\n",
|
497 |
+
"233 d2915c686462926e7e53b112ddcd1ac2\n",
|
498 |
+
"234 d298a5dcab9e7da513cf278d3e19f2\n",
|
499 |
+
"235 d29266df8098c511507828f4632363\n",
|
500 |
+
"236 d296413b2724e3c51954834ded46211b\n",
|
501 |
+
"237 d29514a1ed5b50893427ebefe1e5f8ab\n",
|
502 |
+
"238 d298bce28583de4cb34f279882d66bd\n",
|
503 |
+
"239 d2906ed28478ab13689e4ef0165376a4\n",
|
504 |
+
"240 d293d91cc7bad938f8cf20b372ac93b\n",
|
505 |
+
"241 d29fc131a3fe8ca955e6b59d2c8c981\n",
|
506 |
+
"242 d29215ab9be9ff9185479e24cf4ce56a\n",
|
507 |
+
"243 d29f9bc02cb942b447e9f99db525c\n",
|
508 |
+
"244 d29149d031f6d431bd3015219f275bd5\n",
|
509 |
+
"245 d29aae5bdb354bd96e46cf598dadb0\n",
|
510 |
+
"246 d2932362faf4ca9ac8697cbdd6c34bf\n",
|
511 |
+
"247 d2929a57d3d8a6bea06b5f4f9afb452\n",
|
512 |
+
"248 d29a5ac5d9ac764db856509640a5142a\n",
|
513 |
+
"249 d299c482f3314c3dad947db3a3156\n",
|
514 |
+
"250 d29d1183e0959ae7ac6c8e7b66374db\n",
|
515 |
+
"251 d29953bd569ce38442fc69fbcd8bd83c\n",
|
516 |
+
"252 d291a8ac4849b85a59b8c68dc752d0\n",
|
517 |
+
"253 d291e4aed5fb43f4f325d11ad66af251\n",
|
518 |
+
"254 d29675b4a3dfee18769a3ac674c1f3f\n",
|
519 |
+
"255 d29e648fed5b29ad1c152aa9cea0c6fc\n",
|
520 |
+
"256 d29b629bab26f59f9117f69a679db8\n",
|
521 |
+
"257 d29c593a5f55b48a9aefc728a6ccda8c\n",
|
522 |
+
"258 d29a4fe79f1cf5298b9feac44547d56\n",
|
523 |
+
"259 d290c12092a99314f219c6a4e6387eb1\n",
|
524 |
+
"260 d29fb6bcfb2d1e53fe919c26df36b578\n",
|
525 |
+
"261 d29482e12c94103037566fa2b227b21\n",
|
526 |
+
"262 d29e6ccc3336f45a4f216c83d5f72f1\n",
|
527 |
+
"263 d2931ac79c95d7526bf543407a54c3\n",
|
528 |
+
"264 d29450397c2bda2c15071e551dc4cfe\n",
|
529 |
+
"265 d29bd6f895319993cfa28439d6f561e7\n",
|
530 |
+
"266 d29038e417e3faf2e364fbae694f1a5f\n",
|
531 |
+
"267 d2958160bdd4a498a8fdbb78f464b7\n",
|
532 |
+
"268 d29b8defd5fc3ddf3048cf72176f19b\n",
|
533 |
+
"269 d296bb80e1db5d5fd3379dc8d92122a\n",
|
534 |
+
"270 d294c804331bd4d1c57f01642f61f7\n",
|
535 |
+
"271 d29461e7a136d19d529d286f5f2fdf3\n",
|
536 |
+
"272 d291d4646e9cddd811b3944076644810\n",
|
537 |
+
"273 d299760693b62a5eae3f1b3b075d75\n",
|
538 |
+
"274 d299bbb989b35ffd8f1ce25446f0d78c\n",
|
539 |
+
"275 d29962a38b71686cf1b772e97325a78a\n",
|
540 |
+
"276 d29f63192e11d1199628d01420131fef\n",
|
541 |
+
"277 d296de83a8761980102db948e468ae6\n",
|
542 |
+
"278 d291ab4d3f535975431e6af3b4640e6\n",
|
543 |
+
"279 d2978ef0cb5ef834b43202e51e33dc1\n",
|
544 |
+
"280 d29258d726278a56dd3919cc188a896\n",
|
545 |
+
"281 d2915e331f6388e5ca27935d52b9148\n",
|
546 |
+
"282 d29c604cc496ec9f12437c94fbac2864\n",
|
547 |
+
"283 d29a46723eb8d9632698ca567bd5568d\n",
|
548 |
+
"284 d29659d98358b82f7f5db4ef5a5e\n",
|
549 |
+
"285 d296207f2b1399f9f1af5dfcda227947\n",
|
550 |
+
"286 d2919ae436a2fb4c57d3b033b335a9ca\n",
|
551 |
+
"287 d2934474c6a34db57e3ff64a1845aff\n",
|
552 |
+
"288 d2992c53edf3df75ba139316ee933b\n",
|
553 |
+
"289 d29c21b8de09a7635e787d148742371\n",
|
554 |
+
"290 d29dac31e0d9ae8a8b1b9dc4463fd891\n",
|
555 |
+
"291 d29f958ccbae9457c11cb31f7d96bb5c\n",
|
556 |
+
"292 d2928d424132804e6fb2732910640\n",
|
557 |
+
"293 d29947c65eb219959c9ca5e701e75ce\n",
|
558 |
+
"294 d29c5d39572d4358239862d22b36ec6\n",
|
559 |
+
"295 d2925f6193f32511b1afd8a1508a6179\n",
|
560 |
+
"296 d29a91e0249311e7a9d58675153b1d33\n",
|
561 |
+
"297 d2997d5a8416e7371663096084e3\n",
|
562 |
+
"298 d29519b713f78f3d8fa21622c7681af2\n",
|
563 |
+
"299 d292f7f7a6f8b994f62993eed8482f39\n",
|
564 |
+
"300 d29b243ee2f2b8b043d2f8f09d611ce1\n",
|
565 |
+
"301 d290d43bb17551605962218450a0f179\n",
|
566 |
+
"302 d29ffa62178f054ec2094ed374db1c1\n",
|
567 |
+
"303 d293de44bad13998ce435eb50b7bf\n",
|
568 |
+
"304 d293e015cf9f815306545116ad229ab\n",
|
569 |
+
"305 d2919a98989c3b9758b76ca2f1c7379\n",
|
570 |
+
"306 d293ecf2a8a2e3e2074c45719737e91\n",
|
571 |
+
"307 d29fa524eb54e62d943e9bfa7c489\n",
|
572 |
+
"308 d29d70e5f39dd79c41bfa01367b3d96e\n",
|
573 |
+
"309 d297a1b5faf9c99b1a862c8cde3a0d5\n",
|
574 |
+
"310 d2954ff7073f7b8889b8fdaf9bf803\n",
|
575 |
+
"311 d2934a55774d6351ca6f0bf85ae6bbd\n",
|
576 |
+
"312 d297d2161bdeda48298bdf679089240\n",
|
577 |
+
"313 d29dfbb7e8746e19c769ed08cf9ab4\n",
|
578 |
+
"314 d29eac9d5137123aa873d7872a266572\n",
|
579 |
+
"315 d2989a38a0351440c9c3ea531f48b782\n",
|
580 |
+
"316 d294c2e6796f8f38524c4f36196f7d6\n",
|
581 |
+
"317 d29e843d1b3e22f97c2121d849eb43\n",
|
582 |
+
"318 d29b8763a0409c173651f57d2cc732\n",
|
583 |
+
"319 d29fe485da174c44309b2d2893666583\n",
|
584 |
+
"320 d29fbf9d5b504081915276afb3ff171\n",
|
585 |
+
"321 d294b740bb69be81a4a05e945afcd26b\n",
|
586 |
+
"322 d299cda76b9a1deafd15c8d029c814ca\n",
|
587 |
+
"323 d29f87767b65523073bbc0d62f4a1137\n",
|
588 |
+
"324 d29a59c28c2a20c9ea582bac4f3d803c\n",
|
589 |
+
"325 d29292389f7dfef416fcc59f3dab3445\n",
|
590 |
+
"326 d29f3eca85aaac96afd5b439733ce3a\n",
|
591 |
+
"327 d2924afd49df91267aacbcd3a55d87e\n",
|
592 |
+
"328 d299228447c3965210c9e2a0287d62bc\n",
|
593 |
+
"329 d299dfd6e5ed848f24adb34a1e66fa81\n",
|
594 |
+
"330 d29248b6cd344ac354ed3573c2ead3\n",
|
595 |
+
"331 d2917a5b3d58c3a1bde2f19029d4db1\n",
|
596 |
+
"332 d29a88af89763c811ad42f71e1adcd\n",
|
597 |
+
"333 d29fad4f12b05bc6316078f393c594fc\n",
|
598 |
+
"334 d293f6cd8ec2298d61c4c51bb43cb71\n",
|
599 |
+
"335 d298e6a75eadc0c941252a9f2a7d53b1\n",
|
600 |
+
"336 d290e285c5b531f06c921a5f20874ec6\n",
|
601 |
+
"337 d29d3cb79958d1e43568f1c63d34e39c\n",
|
602 |
+
"338 d29bbde017cfdde5da2e3fe91dc17e15\n",
|
603 |
+
"339 d2992c66109486fe8e2585e81e547c94\n",
|
604 |
+
"340 d29e5c8da4a9c63a963321b74cbe1b9\n",
|
605 |
+
"341 d290552ac332bf7fe8925333cf3a132\n",
|
606 |
+
"342 d29d596ffc266f1bc5f0637472499d55\n",
|
607 |
+
"343 d29beee88cc15b282a2dda98d6331f5f\n",
|
608 |
+
"344 d29357e935842c476dff143abfc5d1de\n",
|
609 |
+
"345 d2968522cfd5bf7d71331553902af0\n",
|
610 |
+
"346 d29b589144c4cef74c5888dd073b4\n",
|
611 |
+
"347 d29f6794bb3380d5388abc10c1d18b68\n",
|
612 |
+
"348 d294f6f9b9d1f9aa1cb516a095199c25\n",
|
613 |
+
"349 d2977b12141f2a60378f424936328c2\n",
|
614 |
+
"350 d2909ecedecb8fcfb434e1e563ac\n",
|
615 |
+
"351 d293365d7f87b7390888e6df1e8b2b8\n",
|
616 |
+
"352 d296aea77833d92f0a596fe811b28f9\n",
|
617 |
+
"353 d29fc025178de971df2b671296e885a8\n",
|
618 |
+
"354 d29d3e58a9f013c875a9728dfb7bc5b\n",
|
619 |
+
"355 d29033a3c4e8957464ad3290a213e8\n",
|
620 |
+
"356 d29c12a61ed81d6ec44664df4dfcf\n",
|
621 |
+
"357 d29fa842cadb65b73211e71a91f160\n",
|
622 |
+
"358 d29f5d646fc70ded5b8147931cc42bc\n",
|
623 |
+
"359 d2929e6a9fb9d2b0dd2d6ed6212e1c44\n",
|
624 |
+
"360 d29719b14e693377102b3c4153208158\n",
|
625 |
+
"361 d296f8b2a05c5c840fd3bf274d24de\n",
|
626 |
+
"362 d29d8ddb8d04a849453aebafb197ff6\n",
|
627 |
+
"363 d29923d45ae9d1ce1b2f013d93cf076\n",
|
628 |
+
"364 d29d77467587445028d29d9bb078739\n",
|
629 |
+
"365 d293ad8814801a96ef38962c19c2fe2\n",
|
630 |
+
"366 d299588fa03492cd5dc1f4699f66c6f3\n",
|
631 |
+
"367 d291c3e7637a0827245c44ad9afd09a\n",
|
632 |
+
"368 d29599987170f4af26232b57acd2\n",
|
633 |
+
"369 d299c7086bd48d25ff6ed1bdbfd2fc6\n",
|
634 |
+
"370 d2926f81cde09deba0c1c6267496ead3\n",
|
635 |
+
"371 d296ae4789c8813ae39495c3d6c574\n",
|
636 |
+
"372 d29f75b10f3ffbafced8c67fa76fb55\n",
|
637 |
+
"373 d293cec43d8db67d452f15c8796c3fd\n",
|
638 |
+
"374 d295132ca6905e8ed6ad506833d2061\n",
|
639 |
+
"375 d29cb2d78a6c4714148eb5bbe9b5b64a\n",
|
640 |
+
"376 d29ee1d7ab8a5d5246edc558c6faea\n",
|
641 |
+
"377 d297cc2b8df01184d81cc24b0ccf450\n",
|
642 |
+
"378 d292cb986c671084d12f6036607af6f\n",
|
643 |
+
"379 d29dc92b9e1f40308b373c1feaba64b6\n",
|
644 |
+
"380 d29f68ab824de5c9641813ca85ac691\n",
|
645 |
+
"381 d29c1d9c597176505d43699a47d7ee2e\n",
|
646 |
+
"382 d291acdbd759cb8da5cb32ec36ff7c8\n",
|
647 |
+
"383 d296d76b5e39f47645de56169e8b75a\n",
|
648 |
+
"384 d29cdb95cfa957d17f5c598e9a7d1fd\n",
|
649 |
+
"385 d29afe6fc2ba2256a1d54d1841fd09a\n",
|
650 |
+
"386 d2926e08a211c431631b9e3ad1ff3a\n",
|
651 |
+
"387 d29c978c20ada05bbda59b5b4df01946\n",
|
652 |
+
"388 d2905d2da31be4d896e01cb989b7a33b\n",
|
653 |
+
"389 d298a3fcabed7531fa21f6e31ffa44\n",
|
654 |
+
"390 d2912f36916842608b6f026888558\n",
|
655 |
+
"391 d29979bec95bd937836ad8f513c4343\n",
|
656 |
+
"392 d29a198573f1bdd60deabe1a1d0e669\n",
|
657 |
+
"393 d294a57d4c3b634cba3c9786f83d746\n",
|
658 |
+
"394 d29876388196118296732cc9de5c4c3\n",
|
659 |
+
"395 d2904fcf257ab1d071adb3d0197fbea6\n",
|
660 |
+
"396 d29de6c749995a657f686cc7312a66f0\n",
|
661 |
+
"397 d29c1db1e79d573aaee391d16536b764\n",
|
662 |
+
"398 d299d9295eec3275ac6766c777828a59\n",
|
663 |
+
"399 d2921a88927731d2f586f923e8a83fc\n",
|
664 |
+
"400 d29e39a136dad6ad234debf2f8d4facf\n",
|
665 |
+
"401 d294cb1721f626926b83e263f18b55\n",
|
666 |
+
"402 d2905bc59d4c1b55d3cd1bd2c5abd15c\n",
|
667 |
+
"403 d294d51d4d2df9f7bd7ecc5a7fb8011\n",
|
668 |
+
"404 d290c0f68de49420548fc674cf29b01\n",
|
669 |
+
"405 d299d4a536ccb9f0722cc4fa44a32f13\n",
|
670 |
+
"406 d29a3db0e17adda5d1b81986dbdbaed\n",
|
671 |
+
"407 d29a8087a55cb855bf4019a44bb1dcf8\n",
|
672 |
+
"408 d29fc641d016cafe244f5e1cd96ee2ab\n",
|
673 |
+
"409 d29bcf7bfe6309e8f64a65d1b4f8e51\n",
|
674 |
+
"410 d291bf19c28b57b8b86e43d04f2bcac\n",
|
675 |
+
"411 d292bf6fa98d48cd5549b022328fef64\n",
|
676 |
+
"412 d290644911735ee948b705a85e35c8\n",
|
677 |
+
"413 d29df0f5a31d7cdc1b8d37ff824c9832\n",
|
678 |
+
"414 d29450e886b5b1f825a9ebd50693638\n",
|
679 |
+
"415 d293554f84d67d1d40fa834f2cdc19c5\n",
|
680 |
+
"416 d297f65eb6778396ffa0fbc4a19217f4\n",
|
681 |
+
"417 d291f16b3f979b1d8f5b6e90ac289073\n",
|
682 |
+
"418 d29a9d3159b08ef8c7c0c59e90579758\n",
|
683 |
+
"419 d29af2f570815fc2ca7475d4c994e28\n",
|
684 |
+
"420 d29915b11f3faba2ef25837b5e2f3\n",
|
685 |
+
"421 d292ba3bdbce131b4b8eaba527568c\n",
|
686 |
+
"422 d29d4f93ee7b46b311aa51239528f\n",
|
687 |
+
"423 d291a3a9822042b33f6fcb1c9784923b\n",
|
688 |
+
"424 d2994c646d1a58a51e059e4866794f\n",
|
689 |
+
"425 d29f5ae959310c31a201342a95ad0be\n",
|
690 |
+
"426 d29b56938e0e851fea353dffcf7bdfa\n",
|
691 |
+
"427 d29db91d89a1a8768ad269a57768755\n",
|
692 |
+
"428 d29af5cbce13275d12295feb3c634838\n",
|
693 |
+
"429 d29cf5c5ab5aa85e642e48e6af7fd21d\n",
|
694 |
+
"430 d29fc380ed8a16c5ac9166a2b7a5587\n",
|
695 |
+
"431 d29b43aaea1f53f10fe7a6dbada\n",
|
696 |
+
"432 d29f689aa8dcc25f3e3633426a192a\n",
|
697 |
+
"433 d29053b6bd4b624167afbd4f517b4b95\n",
|
698 |
+
"434 d29145cf1aef12781ba46fd06bf974a\n",
|
699 |
+
"435 d29dc0e926c665e23678acdeea24aaac\n",
|
700 |
+
"436 d29c8e62cbfe4d14a646c9bbc8fcca5\n",
|
701 |
+
"437 d29e8837c56f5267630e5ae169bfd7\n",
|
702 |
+
"438 d29b74122114c49f36ab28ee4b97a2b\n",
|
703 |
+
"439 d2987c6c6453c84fb481f9cf337a3c7\n",
|
704 |
+
"440 d29173c29c7c5fcc92299e4b26f5efa\n",
|
705 |
+
"441 d29a3731ab79ce73a4d593b9fec56cd\n",
|
706 |
+
"442 d29aa2edfcd3bb4fb534027c494881\n",
|
707 |
+
"443 d29d6222f59352c7883c9851cbb8d8d\n",
|
708 |
+
"444 d296ed54c0a533659da6d5c8cdeed961\n",
|
709 |
+
"445 d29f7ae54b96b167c4f3eb5772ba67\n",
|
710 |
+
"446 d2962af850a2dcfc528eb58175e373b\n",
|
711 |
+
"447 d29ce7f2934f6874fcebd63986193911\n",
|
712 |
+
"448 d29a34ff699ecd4f359039902365f2ac\n",
|
713 |
+
"449 d29da8178da009efa3663ef49a5bbbc\n",
|
714 |
+
"450 d2986f88e06afb263353345729bc9fb0\n",
|
715 |
+
"451 d297313017d2278a6b5414f0c32045ed\n",
|
716 |
+
"452 d29492bd2642d4c1cb38cf556ba15141\n",
|
717 |
+
"453 d292040f0d9472364211ba7f61858e\n",
|
718 |
+
"454 d29b7145e356aa5c5636e0183ddb92e\n",
|
719 |
+
"455 d29c45ab915f4d22ea37bee6b340c43a\n",
|
720 |
+
"456 d29854a78878f17d04a4465ef1c1d33\n",
|
721 |
+
"457 d299c9d4b94b8a6aca9a8c59a1a3d92\n",
|
722 |
+
"458 d2994c6132ce56145bb5cb1040668360\n",
|
723 |
+
"459 d2957b49289677c103bfa3c8c426ee2\n",
|
724 |
+
"460 d29f6a26cd131dd82b3b54151431a4f8\n",
|
725 |
+
"461 d2931df6c55137e6aa899bad4e4b02\n",
|
726 |
+
"462 d29955bc44e4a34b495b72a355cb88\n",
|
727 |
+
"463 d29c1dc294983dee299fc1d81caa72ad\n",
|
728 |
+
"464 d29b5b3e656eb0ffc0494e01532cdcb\n",
|
729 |
+
"465 d29fceda623a8e845eec231294b34289\n",
|
730 |
+
"466 d2981ba75e6c7a85fe7dfceaebf659\n",
|
731 |
+
"467 d2987a6bc7bfc7a2c6aff6fb466652e\n",
|
732 |
+
"468 d29ab7c2bc5df4e1eda994590eeb294\n",
|
733 |
+
"469 d2925d7b49ad90233c57b664e6ed6879\n",
|
734 |
+
"470 d29ab251b6ea37c9868bf7724850e0a1\n",
|
735 |
+
"471 d299902666d823548fe9cce52d60bad0\n",
|
736 |
+
"472 d29851d275ac962358b158e0de1b13\n",
|
737 |
+
"473 d2944768b1e5c897c545cbf240ea5367\n",
|
738 |
+
"474 d29c87dac6749e7ecfac0860edab4b\n",
|
739 |
+
"475 d29bf455ae5baf7df36aea784cad8edf\n",
|
740 |
+
"476 d29d9e3fcba5e57e95b8a091c11a4e18\n",
|
741 |
+
"477 d29de84d67cb66a9b4f68a4e36244d\n",
|
742 |
+
"478 d297f29c3c57ba46c5e4759d7e275b92\n",
|
743 |
+
"479 d299864d4459c82c9d417123fb7ed0e0\n",
|
744 |
+
"480 d2956a07cc7b735cb2fc39940e079c2\n",
|
745 |
+
"481 d29e8a8d225666058d1519cd1d9bcb\n",
|
746 |
+
"482 d29ea1a95113e741e995195bef381eb\n",
|
747 |
+
"483 d2924d93a760bbb9fdaa3580d6fb40\n",
|
748 |
+
"484 d29a83c0ff462b1bee23b4914e28470\n",
|
749 |
+
"485 d2933dd155f25bacd950a3e77785ae53\n",
|
750 |
+
"486 d293a48ce261f88a756931de8fe812a2\n",
|
751 |
+
"487 d2923e4fb7105a50964af5f793bb312\n",
|
752 |
+
"488 d29ba0eb4b1aed94e2f514fa22d3144c\n",
|
753 |
+
"489 d29bb961e35c744ceaa8d934f56c38\n",
|
754 |
+
"490 d2996b3456a45a3d41238d9776d2e8d1\n",
|
755 |
+
"491 d290558fbcd5a1c42c88e6a688391d5\n",
|
756 |
+
"492 d29ca6fc962ba76eb2f6dd94dfa1b51\n",
|
757 |
+
"493 d292e6dae1547d0c1771389fe9454d5\n",
|
758 |
+
"494 d29fa8e372d613b21f876bc4e861164\n",
|
759 |
+
"495 d2954d2b6a9d4e255f22dd209678\n",
|
760 |
+
"496 d29cb4599b081c0fb829e394c3e3b67\n",
|
761 |
+
"497 d29b8a9a436ed6e2b57f994238cfc3f\n",
|
762 |
+
"498 d29dd04bba37b924be68b994d1ee2e4b\n",
|
763 |
+
"499 d293db1c82edad55c8a8299e6b438793\n",
|
764 |
+
"500 d29886f24fe094ddb7e46aaf5e0dab6\n",
|
765 |
+
"501 d29dc2bde2c4827c70ec2be14799bc\n",
|
766 |
+
"502 d29e5578df4d7f59092a02be4bce080\n",
|
767 |
+
"503 d29d7a2f8b3f1bba983d6f2966f9e7d\n",
|
768 |
+
"504 d29e81db43bf6a7161d2ec361371ff\n",
|
769 |
+
"505 d29744b420d887a067757a3545822e\n",
|
770 |
+
"506 d2907f4872a14b616ad6559a6bd68414\n",
|
771 |
+
"507 d2919bfb4baf1ce16c76b5cffebd4177\n",
|
772 |
+
"508 d29dc9ce25a91e984119614aba432934\n",
|
773 |
+
"509 d29a39a665d71a1f95bd630a8f7b798\n",
|
774 |
+
"510 d29e5e4fe2bb33d13aca84411dfe\n",
|
775 |
+
"511 d2923a1af99c914ca2332a25cfc219a\n",
|
776 |
+
"512 d293a3fc5bebde784f86994dcaaafcc\n",
|
777 |
+
"513 d2937777a67145a58baf14705ad36ba7\n",
|
778 |
+
"514 d296453ce16df5aab685a113b28d1a0\n",
|
779 |
+
"515 d29e9754b3c64e1a4403898d9c91893\n",
|
780 |
+
"516 d292a6e2c0d1928ddb1e773cbfaa4b6\n",
|
781 |
+
"517 d2994c52c4139a85acb2643be15ae92d\n",
|
782 |
+
"518 d29e843f3a842ff08412ad598a6c8f72\n",
|
783 |
+
"519 d29b8be313f882b734a4c15de6578\n",
|
784 |
+
"520 d297cc9a5828aaa149722a0a99bd31a\n",
|
785 |
+
"521 d29f53d4d383ec62ede38be2624ca47f\n",
|
786 |
+
"522 d291a8d61746094db6cfb5a37a5e811\n",
|
787 |
+
"523 d293f937fe7825ef9266c34b0ab47e\n",
|
788 |
+
"524 d29bc0c163c4dbf2c82b3648cb9ddd\n",
|
789 |
+
"525 d29a2340f91e4bb4c3982245f5abe91\n",
|
790 |
+
"526 d29e72d22e7849766784a5c1d57e1898\n",
|
791 |
+
"527 d299ae8d8e8e5b1479981ba7e3d620d8\n",
|
792 |
+
"528 d29fd3352226e4cc839cfb19ff05a61\n",
|
793 |
+
"529 d29e74ce55d334f67db6ec34837b33f4\n",
|
794 |
+
"530 d29c3d44fb84d8f711b37b742d957b\n",
|
795 |
+
"531 d294236f1cd30b04c665afbfefa1a3\n",
|
796 |
+
"532 d29ac034f114febf34847edf9d36f5\n",
|
797 |
+
"533 d29eb139aeaaf7fa3d1bca4e44ab477a\n",
|
798 |
+
"534 d295e75c7ac697329dd61a4ed47b64a5\n",
|
799 |
+
"535 d29f1334244f1b3db941e3e410412d18\n",
|
800 |
+
"536 d29e897cb34d9749afa9b714d36fef9e\n",
|
801 |
+
"537 d29e5aad6acd594189b6cec37ca5b4\n",
|
802 |
+
"538 d29925c2742e847a96765883d5df7f6\n",
|
803 |
+
"539 d294517196a46fc47b02564e3c521a\n",
|
804 |
+
"540 d2924adc342d2534f5eb7c53c110d482\n",
|
805 |
+
"541 d292ce95c7f45a3c3493b8e4ba4268f\n",
|
806 |
+
"542 d29f5120673264b9e6351343569c8643\n",
|
807 |
+
"543 d29da888145eab653f71aaaa3e3b721\n",
|
808 |
+
"544 d2915e2dc9768d7c9e293ba8ff78ccef\n",
|
809 |
+
"545 d293e1e962479acb87de21b7b4de047\n",
|
810 |
+
"546 d2959ed48cb926ba9835f41778c29d6e\n",
|
811 |
+
"547 d295281dbf716fd08f070a0629d5da\n",
|
812 |
+
"548 d290a6b81821ff7f9ad19afa2fd84ac\n",
|
813 |
+
"549 d298531ae57af88b8433a7f282ea2b\n",
|
814 |
+
"550 d297f18142c314a35af6c63b14b9f9e\n",
|
815 |
+
"551 d2962446fd5abe52567fe06a1b38cce7\n",
|
816 |
+
"552 d29a0cac08f5ed2323feda06c8bea28\n",
|
817 |
+
"553 d29e9e1e2949c704454b2b6611d6d8c\n",
|
818 |
+
"554 d29cd042363ce647ef1b76b1b391fc\n",
|
819 |
+
"555 d295873185d9a9c1792b8b59d4bae4\n",
|
820 |
+
"556 d29a4bb262d1b0729424e7f1bc7b44b\n",
|
821 |
+
"557 d2908589c031ebf2abcd76a75fc11a0\n",
|
822 |
+
"558 d2949f30dbbae0207b4a391fe658bb\n",
|
823 |
+
"559 d297ebbd26c1ffc19841283c3da33b5\n",
|
824 |
+
"560 d29e913733a376e1d68dc1d98937659\n",
|
825 |
+
"561 d29d0cdfd7d93dd29577df869a06dd7\n",
|
826 |
+
"562 d29af95d81eed483fd7fe25ef7f68a2\n",
|
827 |
+
"563 d294afd91d3f92eeac312f73cc12395a\n",
|
828 |
+
"564 d2952a96e1218338870efc098f19e\n",
|
829 |
+
"565 d29c8e6ecad3413f7cb21c3f1f26e0\n",
|
830 |
+
"566 d29a77eec1c5179561cb35204f6bae7e\n",
|
831 |
+
"567 d2904a3dc19b4e8ab52f24f0f49ed576\n",
|
832 |
+
"568 d292ea27ddc8522669b2aa0ab2a401a\n",
|
833 |
+
"569 d29ed6ae6be3816d379b98efc8fb8b3b\n",
|
834 |
+
"570 d29b13aaa457d8bacd6806919dfc1e\n",
|
835 |
+
"571 d290a2953354a07cc6711c284edd2a88\n",
|
836 |
+
"572 d295f3be27879c4aab716ef8d20156b\n",
|
837 |
+
"573 d29748307bcfec96dd14fe6ba36abc56\n",
|
838 |
+
"574 d295b1b47195707cb92faa7d952627\n",
|
839 |
+
"575 d29b1acbe5886cdaccb0bafccb27e024\n",
|
840 |
+
"576 d29b44b8afcaefa5769cd3bfdb96997\n",
|
841 |
+
"577 d293c0c7285c877e805d50c2196d3c78\n",
|
842 |
+
"578 d2997dee4dfcc7bc62bfe5db828c5\n",
|
843 |
+
"579 d29399cc8460364389d2cd1a6392e54\n",
|
844 |
+
"580 d2944cf6f785198d86773c4b44cc5c0\n",
|
845 |
+
"581 d29285ab48f5a6ecd9c6a45b229b335\n",
|
846 |
+
"582 d29d8536b0504ab1ac1eed17f1ccaebe\n",
|
847 |
+
"583 d2988eddef757af7d9814858e4d980f6\n",
|
848 |
+
"584 d296cda8b065f1315df7d284722773f\n",
|
849 |
+
"585 d29577d5147cbbca8a97cf13fe1ae27c\n",
|
850 |
+
"586 d2927f9825ab537f697784afbdc9bf9\n",
|
851 |
+
"587 d297b94ebc79c5d68bd1bf46999cc8\n",
|
852 |
+
"588 d2981b64d66b3ddae5b9d547df7f4bc4\n",
|
853 |
+
"589 d29b4e8491a1454ac5b4effde5f29\n",
|
854 |
+
"590 d294da8d22ec4c14683d3fbcb346298f\n",
|
855 |
+
"591 d299e568ccd4bfe473214f116f16cd51\n",
|
856 |
+
"592 d29d9be9054b13d6d9391419c09b1d\n",
|
857 |
+
"593 d29067138de3e6fc96ac8837bc4d217\n",
|
858 |
+
"594 d29d8eced5755ee1574b85cb0dbc390\n",
|
859 |
+
"595 d294918dea66792e53ef27496fb42a\n",
|
860 |
+
"596 d290c72ed624368549d9da1b296f127\n",
|
861 |
+
"597 d29fdc2a87e9197967743cf0afc02b2d\n",
|
862 |
+
"598 d29f5e962f897871323880579646c1c\n",
|
863 |
+
"599 d29cd4a5555bde50a6c9cc3d6eaa93a\n",
|
864 |
+
"600 d29da0cb710d6764f708cb3a16bd058\n",
|
865 |
+
"601 d29938dd4780fef53cb4502944c2ae1\n",
|
866 |
+
"602 d29dd6fd1e45279df9f83a8d16b481a\n",
|
867 |
+
"603 d298a69ab72fc0784b72e43e42acd\n",
|
868 |
+
"604 d29dfb75856f9761de28eec281a4412a\n",
|
869 |
+
"605 d29f50837e6202bb0ff8db95626eaa3\n",
|
870 |
+
"606 d2911cb3cee24be5abb2c7ebd92ea9b\n",
|
871 |
+
"607 d29a9927caceae6c14dc6d7ec3eed2a\n",
|
872 |
+
"608 d2937bef46c0e329fc3465dbf7d4b3e3\n",
|
873 |
+
"609 d29af7e273912a5b4217277c54b8e26\n",
|
874 |
+
"610 d2916e2d11436160344632db0dee96\n",
|
875 |
+
"611 d29fce919dc47333a5c20f84960403f\n",
|
876 |
+
"612 d295dc6f26f8e6379e2de3584d8a5fa5\n",
|
877 |
+
"613 d2941541ad8d453ff7ffd6301e4d2818\n",
|
878 |
+
"614 d29d6bb724e9eaf716f34c5c9f3690\n",
|
879 |
+
"615 d2944b67e4a43a6362214585122ba\n",
|
880 |
+
"616 d292493e4de6937f7515a7747e1ecd3d\n",
|
881 |
+
"617 d29b656c1314f13cd9761279f1afcaa\n",
|
882 |
+
"618 d29036667479f953455c613c75b274a9\n",
|
883 |
+
"619 d29181e24020b418ea3ea3b8fc44f4d6\n",
|
884 |
+
"620 d29fa49f6edefd91b8e854cc89b1d284\n",
|
885 |
+
"621 d291e0cb8aa28d3b11a03e513da6d3\n",
|
886 |
+
"622 d2992973f5ad4d39a1f19328663af85\n",
|
887 |
+
"623 d297297a667b971328636252dd708\n",
|
888 |
+
"624 d2998240a3682259f1f6ed2b36824d58\n",
|
889 |
+
"625 d29b375f07fdd1976e6f41bc7d86559\n",
|
890 |
+
"626 d29558b35bdcc2453d7a3067adcac60\n",
|
891 |
+
"627 d2946353e753729d4a5127ed6b3b898\n",
|
892 |
+
"628 d298165c933ec382a997c6c9665824\n",
|
893 |
+
"629 d29c273bea347330addd7eb689651f8b\n",
|
894 |
+
"630 d295a0f3693929e2c8ba2362f9b0acca\n",
|
895 |
+
"631 d2998924cf5a2c1cfdfee129d4f5a46a\n",
|
896 |
+
"632 d299d317d99a45faaff48b2772b0f6f6\n",
|
897 |
+
"633 d29858f839fa221075a9bb494374bcc\n",
|
898 |
+
"634 d29cd7edc74f467aed9f6321528ebdf\n",
|
899 |
+
"635 d2992f3279c543fff277528149c59f3\n",
|
900 |
+
"636 d29694d25d472eea115117b871659\n",
|
901 |
+
"637 d29b51fa7b0b19c8557e94a6687e8a1\n",
|
902 |
+
"638 d29015f0e054a3efbb9be424e58dbf4\n",
|
903 |
+
"639 d296ed355cbf57ab8b69f99c70f0f3\n",
|
904 |
+
"640 d291f57f60fb372b2ff12a8ce8704569\n",
|
905 |
+
"641 d2932924f67936b51bb26840a99c6539\n",
|
906 |
+
"642 d29899f48469ec2054cdd9a1a2918468\n",
|
907 |
+
"643 d2941558d8eb2cd1dd944137bdfb8b29\n",
|
908 |
+
"644 d295582dbe4e6a7243d6bc3a2833488d\n",
|
909 |
+
"645 d294bd593ed959668e48c92881f73ffc\n",
|
910 |
+
"646 d2956ff88a4af4ec6bb1eb08b8df32a\n",
|
911 |
+
"647 d291ed4d52f2596f310212beac7bbd\n",
|
912 |
+
"648 d294bff890fdb108792581b118c82\n",
|
913 |
+
"649 d29e7e09384d5b8ea2e80d12ef553\n",
|
914 |
+
"650 d297bf2df519cb82918959c3ccc0cb55\n",
|
915 |
+
"651 d29737c8bac680c1a4b820f6e21457\n",
|
916 |
+
"652 d29d705d34814746b26010a9cdaafb4e\n",
|
917 |
+
"653 d293e464cbd68fcb8343eca51fbf8194\n",
|
918 |
+
"654 d29996eb36de8886d68dba475d21c3a3\n",
|
919 |
+
"655 d298cc71c18291df579944ee3cd93ac\n",
|
920 |
+
"656 d29d81c9f42bfb5f5dd3321c5e9fb671\n",
|
921 |
+
"657 d292e52e76e8ba45356e47255cdfe350\n",
|
922 |
+
"658 d29b5f55f4116aa6de6f713c9cc2582\n",
|
923 |
+
"659 d29ed238e14a28b3f41e33b454abb15f\n",
|
924 |
+
"660 d2906b3162ee858fc6add2cb5f4b276\n",
|
925 |
+
"661 d29830b5f42099f12c90f830a2ade11b\n",
|
926 |
+
"662 d29215bf492daaaf2d54aa8022b6165\n",
|
927 |
+
"663 d290668a4ca023fde6853e8b48ec9b4f\n",
|
928 |
+
"664 d29ab760cfaf4b8232dd94e8ffe71b25\n",
|
929 |
+
"665 d2912760a642287432653899a490a229\n",
|
930 |
+
"666 d29d911a3e0b877c95a2962b7377a79\n",
|
931 |
+
"667 d291a7e575ad7f83deade677c4f27246\n",
|
932 |
+
"668 d29c7f972b1716113edc6a68c64e79\n",
|
933 |
+
"669 d29fc373b6c588d3c81fe753f88257ac\n",
|
934 |
+
"670 d29769564e8289f43c8ff769359ea9c6\n",
|
935 |
+
"671 d29d92153f44fdba926224ac7b8bec3\n",
|
936 |
+
"672 d297b8d58478a65c5ca79df8ad88b\n",
|
937 |
+
"673 d29351fb392d71e61da1ffac96a29ae\n",
|
938 |
+
"674 d29194b067e9dab16f189287ffc779ad\n",
|
939 |
+
"675 d299f4c2f6fa7ef665b935f6d3bbd3\n",
|
940 |
+
"676 d29af155d2d7c947b5e77397b64a544\n",
|
941 |
+
"677 d29b42d6ece2ecac3d784a915b7485ac\n",
|
942 |
+
"678 d2982667801c9f5171c6c1f04a797498\n",
|
943 |
+
"679 d298e8bff0871d2249bf487971c377ca\n",
|
944 |
+
"680 d2979222961dafe3cb68dbf3f0518b\n",
|
945 |
+
"681 d29a6752837d58368e53925569908f1c\n",
|
946 |
+
"682 d299d0b46a9eb5c41c4469e189a7eed\n",
|
947 |
+
"683 d297b34212b5b49e1a1b28a8b3866773\n",
|
948 |
+
"684 d293f2acede4ebe470106a623b6a515c\n",
|
949 |
+
"685 d29a44a041d991b77794fb01167395\n",
|
950 |
+
"686 d2972c64bf3d7131b17912d56db8\n",
|
951 |
+
"687 d29e50c918f8b4b4c8bcf34364afbdd8\n",
|
952 |
+
"688 d29ea7580511e498046f389bce9d53\n",
|
953 |
+
"689 d29025f886bcdc88e3cf3795c96262\n",
|
954 |
+
"690 d29ffc11409938bbb8eb13bde312d777\n",
|
955 |
+
"691 d29ee0be2178e838fa8ea3861d9b7172\n",
|
956 |
+
"692 d2904623523d8119d2b1a28364cedfc9\n",
|
957 |
+
"693 d296e64238df7f7f47a2d664d34ed85\n",
|
958 |
+
"694 d29b441881275c44f3c2c62e8e78c\n",
|
959 |
+
"695 d29cdbb47a6b9db377c38f6666cf526\n",
|
960 |
+
"696 d294e9faa752812dfbfd612fe6e8f3\n",
|
961 |
+
"697 d29637c1fb7be3ba592c873ccfb3fac\n",
|
962 |
+
"698 d296164d9647a69fc2a22855ea2207b\n",
|
963 |
+
"699 d292bbd49cfbc285ae38abc7f581f37e\n",
|
964 |
+
"700 d29b8659d7fcc6bfa6ab48a7eee32e96\n",
|
965 |
+
"701 d297388166d5f101c5c3496d223c48b\n",
|
966 |
+
"702 d291ceb7ef4442ca3ff9f6437c2460\n",
|
967 |
+
"703 d29accbd24c24f7621e05afcb067e\n",
|
968 |
+
"704 d29da88fc29a3bca4dcfdc190f9dff\n",
|
969 |
+
"705 d2987a60afd375f03612b54f3b9cd93\n",
|
970 |
+
"706 d29517c88f943233dfbb38b5bfe7245\n",
|
971 |
+
"707 d296e19f3d1bc89a4f5aa0e55bf190ae\n",
|
972 |
+
"708 d29a651edf99216156e1b366ad3f9469\n",
|
973 |
+
"709 d297de9d0b1b1a58fb375c5ce4a1f6\n",
|
974 |
+
"710 d29191dc4ad5586eb86162e8c6ffa8a6\n",
|
975 |
+
"711 d2926d26636aa631e6ca74ff32ec99a\n",
|
976 |
+
"712 d29a13c54979d5667bd0a06c231587cb\n",
|
977 |
+
"713 d294ad142f70de4eee6ef9aabd051f7\n",
|
978 |
+
"714 d29e758920ad853db60a175dbf16\n",
|
979 |
+
"715 d2941b4a896ac7205845efc1e3d311f4\n",
|
980 |
+
"716 d2979692498dda6be501a2c19abbec1\n",
|
981 |
+
"717 d29035fa19ce8d2db8209c6dd9cf18c\n",
|
982 |
+
"718 d297902857ecceb79df42eb2b8286830\n",
|
983 |
+
"719 d29ebd7d6556655ba798a563c3c81b4\n",
|
984 |
+
"720 d2996ccbc7aef032ddc9c40c9f4e2\n",
|
985 |
+
"721 d29f3bf78d3e4e659e8c65b2a06063ef\n",
|
986 |
+
"722 d2973089b678f4f783f7687fdfd3c413\n",
|
987 |
+
"723 d29e239ea96cdd0f59bc7e3d299a723\n",
|
988 |
+
"724 d29e50b79ce062e5e8bc6c1b2622ed1d\n",
|
989 |
+
"725 d297a8f1a68efcf7f6a3747f9b99bc16\n",
|
990 |
+
"726 d29463a2ef7ee7ab996621fff62cce4\n",
|
991 |
+
"727 d29977ed567f23fe5d5b13f5186ea89\n",
|
992 |
+
"728 d2985efb90f03916d7d36c121cec28\n",
|
993 |
+
"729 d29842dcb0b1fccd70c834c2e92e5a1a\n",
|
994 |
+
"730 d290aa2a5091fac7d791ed22707cdcac\n",
|
995 |
+
"731 d29e107f6963ad47284ed6a47366a3f\n",
|
996 |
+
"732 d29711f5d625e953559d1a6b33462bd4\n",
|
997 |
+
"733 d2955ca3d3d6f6cefdf20b350ebb9c7\n",
|
998 |
+
"734 d29c3a4c14fc71c7d050fd293c2737\n",
|
999 |
+
"735 d2955b7c292299d843e3a2bd7a631816\n",
|
1000 |
+
"736 d29b3807fcbecdbc37d7a11421a612e\n",
|
1001 |
+
"737 d29cec30dfa4a2bb22b2785522d8bac\n",
|
1002 |
+
"738 d29afc07a96bbdc641abdbc71862f7\n",
|
1003 |
+
"739 d298116eb1cdf4135a5761a4774c5e59\n",
|
1004 |
+
"740 d2905e5f8949982586204e633e322fe\n",
|
1005 |
+
"741 d299fe3aae57bbc76f66867395063\n",
|
1006 |
+
"742 d29d987f35742fa36bc11ea02ef6c395\n",
|
1007 |
+
"743 d296257f78985bee2f732d4bdcd9787\n",
|
1008 |
+
"744 d29aa8dcf7d7bbb373adf71c8f05418\n",
|
1009 |
+
"745 d29e4124dacad46ca69bb3952f1654\n",
|
1010 |
+
"746 d299bfd25f0b38f9e9b1cb076999968\n",
|
1011 |
+
"747 d29cb18dc082b9abf777775c244bcb\n",
|
1012 |
+
"748 d295ea8e422a5599c2e7865ed482eb5\n",
|
1013 |
+
"749 d293878ad47054666535fbc2bb4783d\n",
|
1014 |
+
"750 d29c78cf63ccaa475b9be293c50dd48\n",
|
1015 |
+
"751 d292bdf79b415e4fdc426be3a29eb62\n",
|
1016 |
+
"752 d293c4bad6e52c345a5c6a37168672\n",
|
1017 |
+
"753 d297224294f55bc425603b6e261e88c8\n",
|
1018 |
+
"754 d2924655f5f018d7f2a153691444f1bc\n",
|
1019 |
+
"755 d292eea19488127d5c4247ff3832c7\n",
|
1020 |
+
"756 d29ee24ef7d324f6c94df7ac4f534b3\n",
|
1021 |
+
"757 d29ac7c01ebbb7d56b12929253a2e711\n",
|
1022 |
+
"758 d2925f53be7ac95079be431cfb3393e\n",
|
1023 |
+
"759 d29ad94d7541ad3f4569386c1311570\n",
|
1024 |
+
"760 d2982ce8d6fd3d7891d7a5244d6e46\n",
|
1025 |
+
"761 d298dc1f3a5423b0c848d648f3578de\n",
|
1026 |
+
"762 d29866f8452bc5e5a93495d309824f6\n",
|
1027 |
+
"763 d29237ee31a09c1143df8ad7e5edd7e\n",
|
1028 |
+
"764 d29f1036bfa749f2844986e4ec2b08f\n",
|
1029 |
+
"765 d290c145d34ccf80b624421a3a9862c\n",
|
1030 |
+
"766 d290c5efc4398d2f99b8806260b3bd5\n",
|
1031 |
+
"767 d29bce9d439282122cacfd9a3994ee6\n",
|
1032 |
+
"768 d29487ffb1f785b1dec81bcb1644a5\n",
|
1033 |
+
"769 d294c93212c56d4c638e1015c7a4cd\n",
|
1034 |
+
"770 d29ca0b321bf709c9314bf546435a014\n",
|
1035 |
+
"771 d2944be7a48692e91c12e44924e8c378\n",
|
1036 |
+
"772 d29599c0222fe18127894979b2d9e195\n",
|
1037 |
+
"773 d291b5269b7e2be302bfc2e35cd9493\n",
|
1038 |
+
"774 d29ad7050d32f987bd1ec29956537e\n",
|
1039 |
+
"775 d29e847361db8a6d95dd31e02656adf3\n",
|
1040 |
+
"776 d2938c8eb48754392aa2979628517cf8\n",
|
1041 |
+
"777 d2985b3835dd12d38b4ee2fe44f5d51\n",
|
1042 |
+
"778 d292bc4f15b8a333420c036e552dbef\n",
|
1043 |
+
"779 d29fd3b52d428dfcbabc1e5dce0b218\n",
|
1044 |
+
"780 d2925265e9c682772c6fcc801fd567ed\n",
|
1045 |
+
"781 d29385b2d29b13dc8531bc0e3ae3c3\n",
|
1046 |
+
"782 d29862aaf54f52438142c03ec5fd9\n",
|
1047 |
+
"783 d29517565aef432bca2c56111eb6bb8\n",
|
1048 |
+
"784 d295e1fea5f25c2ae638080cd4fd986\n",
|
1049 |
+
"785 d29d7de1daa871be5dcea8b895860c9\n",
|
1050 |
+
"786 d29b2dfc8c1aa0b5d35e12298ece91db\n",
|
1051 |
+
"787 d29dd7cf86216b8880f4bcac6c5ac89e\n",
|
1052 |
+
"788 d296ea2bba6573f1a42eb1f4b861ee\n",
|
1053 |
+
"789 d29db474e03b44f26b1918293eb880ab\n",
|
1054 |
+
"790 d29c5bba5ada4ff67ea059ec58bec4ff\n",
|
1055 |
+
"791 d29f8eded8bd5e5b39384682b2de5\n",
|
1056 |
+
"792 d2983d5eb4a476b5d821989ac7f7c99\n",
|
1057 |
+
"793 d29cee44da3e77889c2fab8a3145aab9\n",
|
1058 |
+
"794 d29ba9669bc9321fecefc1d11f498d2\n",
|
1059 |
+
"795 d29825d027a364754199952ca1334046\n",
|
1060 |
+
"796 d29aad8d9afcaf7e6ed0fae648833447\n",
|
1061 |
+
"797 d2975510d6eefcc0eda919f86f148b1c\n",
|
1062 |
+
"798 d291a0ce1a15ea52f6eeaed1edf58dce\n",
|
1063 |
+
"799 d2904816b05dc67c8537bb6582de982f\n",
|
1064 |
+
"800 d29fab6e11338ab587f16136cf9444\n",
|
1065 |
+
"801 d296acd93e6038c342a36b18a7909fd0\n",
|
1066 |
+
"802 d2982cd07bbc8e64c6e135232b8b6925\n",
|
1067 |
+
"803 d299dd6b21d0c6f1cbd19185d579dc\n",
|
1068 |
+
"804 d29544126d3e23f648bd112ef6a61ce\n",
|
1069 |
+
"805 d29826cc8d2dd83b92e86337d0c031c\n",
|
1070 |
+
"806 d29dd510e5e7614b5ca4379d6ed707\n",
|
1071 |
+
"807 d29ad167c0ce4337b9589eafe03494af\n",
|
1072 |
+
"808 d29960b34deb26acde68498c9f6f9cf\n",
|
1073 |
+
"809 d299c3dd50eff7982fcfb87421d424f\n",
|
1074 |
+
"810 d292f74f748e4d1973c5b9515e96f\n",
|
1075 |
+
"811 d29aab4e896a43874eb098ff95b7d359\n",
|
1076 |
+
"812 d29750922a9834d5a9dd10bb748832b\n",
|
1077 |
+
"813 d29fa86517414edfbdf9a96e18ac67\n",
|
1078 |
+
"814 d29aa42bf76a9f2cd1c51157b2368a2\n",
|
1079 |
+
"815 d29f267452f46384c1eee391630f\n",
|
1080 |
+
"816 d29ff196e725e8639fbc3a09899e8\n",
|
1081 |
+
"817 d296c63571356a27fbdf0afd31afb\n",
|
1082 |
+
"818 d29be82641878d0b8f02762222f8256\n",
|
1083 |
+
"819 d29f3c32b0e180c8636dbe96571c47ca\n",
|
1084 |
+
"820 d29c3829d6808ff566be34ff1f3f3a21\n",
|
1085 |
+
"821 d2966dfabc587217328be7627c64a4a\n",
|
1086 |
+
"822 d299e265e3e3e6535a294dc4adeddf\n",
|
1087 |
+
"823 d29e7ba26ed08b7b5499d2853bbe1440\n",
|
1088 |
+
"824 d2943428447dafe9c198f43443dec1\n",
|
1089 |
+
"825 d29aea1d82818daffabb7f916ca6e80\n",
|
1090 |
+
"826 d294b396de33e8ec9e19064faffa6c7\n",
|
1091 |
+
"827 d292357ac7c73ffe69394836cbe6c81\n",
|
1092 |
+
"828 d2946d777ace8d523759eed85c97d\n",
|
1093 |
+
"829 d2993a76ad5a82137a158a3cd69a3498\n",
|
1094 |
+
"830 d29ced67ec73d4466990b2a0c39dc952\n",
|
1095 |
+
"831 d298d6a3641f7a81d7e810f3178c3ffb\n",
|
1096 |
+
"832 d29c3acabee676411107c46199976a\n",
|
1097 |
+
"833 d292f399f7e6c1748a6697fbdb8a3b0\n",
|
1098 |
+
"834 d299cd99a36be18b25f23d3578a819cb\n",
|
1099 |
+
"835 d293983a451ca4bb7ded3ec6ee2e934\n",
|
1100 |
+
"836 d29a176ba9b216e746d4fcd147dd01e\n",
|
1101 |
+
"837 d29c48fc5e615a222bf8f16cc683d217\n",
|
1102 |
+
"838 d29f3f15a8513c1281e157c6e990d5bc\n",
|
1103 |
+
"839 d299e63ed9cbd0dd4279a0c1b8406f2\n",
|
1104 |
+
"840 d2913834d6c820d0cae85aab464764d\n",
|
1105 |
+
"841 d29528f1fba2a9365522f0528dd69c4\n",
|
1106 |
+
"842 d296d5361b93faa231da4da432b8720\n",
|
1107 |
+
"843 d29ebee84c287cd5ed27dfbe37df79a\n",
|
1108 |
+
"844 d29cd3f3444df8673be61bd7499332c\n",
|
1109 |
+
"845 d290855d546a7a55da3548fe7deb5c2\n",
|
1110 |
+
"846 d29425170a41b4b75177787b5c65ab\n",
|
1111 |
+
"847 d294fb4a65a5d037676413a1d95cee12\n",
|
1112 |
+
"848 d297cd83a3c4d24264d8fad18c4ab9\n",
|
1113 |
+
"849 d29c1ee453c62685c16851bb257f1b7\n",
|
1114 |
+
"850 d29da361eded18e5845516efa1c146\n",
|
1115 |
+
"851 d29316f1b419cca78b8eb21c91d094f2\n",
|
1116 |
+
"852 d2907ecfc6276dca050fc71a479048\n",
|
1117 |
+
"853 d29e9a4d1b55913b632a057c282add\n",
|
1118 |
+
"854 d29d5849151bfa133bacad8ab196d7\n",
|
1119 |
+
"855 d29f52a596a66fd22c99f7292fcbd4\n",
|
1120 |
+
"856 d2995314458dc296fa8a50d578907f63\n",
|
1121 |
+
"857 d299729169810aee8cf863b98885472\n",
|
1122 |
+
"858 d29098775cb7d920da1dd2d5d444df1e\n",
|
1123 |
+
"859 d29dc3f4bf446d5bff72ccb27c631\n",
|
1124 |
+
"860 d29ddc6413cd9fdaa9b97bfb38697166\n",
|
1125 |
+
"861 d29a189d80fb1b8d64b2b92d6e25576\n",
|
1126 |
+
"862 d297d4df9a1ec83dbf159293653ff71\n",
|
1127 |
+
"863 d290b698b6489f39e8f2ad3ce4261594\n",
|
1128 |
+
"864 d29fa9ad34a40779b3736a71e3e8956\n",
|
1129 |
+
"865 d295d0175728998b8560507b74416d81\n",
|
1130 |
+
"866 d295b028a2ba30598e75b46cc28feafb\n",
|
1131 |
+
"867 d29a33b2188451fca68da55e201b57d3\n",
|
1132 |
+
"868 d29273375d45edaa581a111ce22e5482\n",
|
1133 |
+
"869 d29307629e27a419c4c9459a918ff6f\n",
|
1134 |
+
"870 d296de62b4fa21342a695aebf897b4f\n",
|
1135 |
+
"871 d29691391d561ed43b68c891792e946\n",
|
1136 |
+
"872 d29e81413ac99fb57626e48278e783\n",
|
1137 |
+
"873 d2925c82aa7fd47f6ea9e20f5715719\n",
|
1138 |
+
"874 d29f6adb7938f5a11ffcd43fa17be269\n",
|
1139 |
+
"875 d2956a499e1e38b4538a34c88af8438\n",
|
1140 |
+
"876 d2941bd03c33d2bb4921405e52677319\n",
|
1141 |
+
"877 d294252778388bab43954be9d64bf111\n",
|
1142 |
+
"878 d29fd97555f88c8f56515d5ef6aa35a\n",
|
1143 |
+
"879 d2966cf6fc7be1abf528520b894d077\n",
|
1144 |
+
"880 d29ee684b54476792bddd1dcf5455a5d\n",
|
1145 |
+
"881 d29be915e4b91c211a954dea9af625\n",
|
1146 |
+
"882 d297fd4966bb91d5809164c3eed20f3\n",
|
1147 |
+
"883 d299c65b6d9e92f0e34f3ca5452f2142\n",
|
1148 |
+
"884 d297bce473a261d5a8fdc04b62a9b9a\n",
|
1149 |
+
"885 d29ebb6fb2285a61486c87fff8d38e\n",
|
1150 |
+
"886 d29f118564ee222d29141c5670a3f3cc\n",
|
1151 |
+
"887 d29152bcecd9b6abd8cc15c33d28948\n",
|
1152 |
+
"888 d29b5b2817bf309dcf124afcc41096\n",
|
1153 |
+
"889 d298ebd5120ab3cf7a4184a43c5cf\n",
|
1154 |
+
"890 d29c328293364cd58d99c33f46a7c1\n",
|
1155 |
+
"891 d297c62a9eb99ba18b46df6c43ef0e5\n",
|
1156 |
+
"892 d296d4312ccba6f29387ac3a67a0ce\n",
|
1157 |
+
"893 d2972baeb06f9d30176a6a2b8f5c535\n",
|
1158 |
+
"894 d29cb9ffbaa7fca2aa38c5aa0050a8\n",
|
1159 |
+
"895 d2941e7858af7221c16d1bfe1792b3a\n",
|
1160 |
+
"896 d29ed990f5cbb0eaeb1cdd1645e216cb\n",
|
1161 |
+
"897 d29f8616ffa1c0e16437ecda5833578e\n",
|
1162 |
+
"898 d29f264016fb567fc523f16c99dd63d\n",
|
1163 |
+
"899 d297f883cdb8fba51734bfcb78ac\n",
|
1164 |
+
"900 d295f5e269934ac4bf355afa329b0ac\n",
|
1165 |
+
"901 d2961171e684b9d624ecf5439932a537\n",
|
1166 |
+
"902 d291c634c899a6c3fe5f28c54680f4ac\n",
|
1167 |
+
"903 d29cd457bd71c490bb53521b1730eb2c\n",
|
1168 |
+
"904 d292daf9834286ad9850fc67f4c3d69\n",
|
1169 |
+
"905 d29effc9b2125ce69c928f41dffefb3\n",
|
1170 |
+
"906 d298d24472564871585443c2ba9f6625\n",
|
1171 |
+
"907 d29d42f9314878c6bb302c1a73d6f1\n",
|
1172 |
+
"908 d296bcc710b43b719bcdd93e2cdaf29\n",
|
1173 |
+
"909 d29825d81e57c0c317ae93b5dbe78\n",
|
1174 |
+
"910 d29b39adc7a9e4ef33cbd8a6ef32879\n",
|
1175 |
+
"911 d298f52822e8a1b966f359eef53869ed\n",
|
1176 |
+
"912 d2909f79c3d51e9b8b41366d851791\n",
|
1177 |
+
"913 d29f4b4ed084c42652b62b0b6182269\n",
|
1178 |
+
"914 d2952bad222361a6263b53cef5c08fd7\n",
|
1179 |
+
"915 d29e95a3a24e6f548dc5bc66e9534ef9\n",
|
1180 |
+
"916 d2983e18a5eb44da8b9cd2955d2598\n",
|
1181 |
+
"917 d292380b6f7a4791e6829215b4df483\n",
|
1182 |
+
"918 d2966824b928a21da99f327dcc25b2c\n",
|
1183 |
+
"919 d29690a89ccff87641923adb266ace\n",
|
1184 |
+
"920 d297d320fc6e1221f2939dead1829f1\n",
|
1185 |
+
"921 d297dac5dd61868c16393413e9df419\n",
|
1186 |
+
"922 d291f0fce6e2b09f27c637d1def6fda0\n",
|
1187 |
+
"923 d2937ba495db231c9f863bdd5e2efc2\n",
|
1188 |
+
"924 d2945f89f43315f3fcee9ccc5f14fde\n",
|
1189 |
+
"925 d294d41c45ea9fd8cf1df22214f7f65\n",
|
1190 |
+
"926 d2923eda9fd98f1fa0fdc85c2c6a8f58\n",
|
1191 |
+
"927 d2919a159571de2c8ae87fcee7f72\n",
|
1192 |
+
"928 d294bd134345a46391dcec1cd27248fd\n",
|
1193 |
+
"929 d299b714d28830663458662681e041e4\n",
|
1194 |
+
"930 d2994917ae56468019ace55110693b\n",
|
1195 |
+
"931 d29638364c5166dc5ce5040424db5\n",
|
1196 |
+
"932 d297a7b0a91ff4e9d999dfad446501d\n",
|
1197 |
+
"933 d29883a44e13226a369554c0f826474\n",
|
1198 |
+
"934 d291879da81e887f31e11fe0c54b69ed\n",
|
1199 |
+
"935 d290fd3d51f8d62324b0338a84278ba8\n",
|
1200 |
+
"936 d29465e1fe608a4bdd4b3cba5f985129\n",
|
1201 |
+
"937 d293d623b63e47b96e812ac2fe5565f\n",
|
1202 |
+
"938 d29fffdf16211b8d5aa41487a8daa5ca\n",
|
1203 |
+
"939 d299fc7fb7f458ec1b976a5a52b8b04a\n",
|
1204 |
+
"940 d296a995f653a0335e447e0f9f8804c\n",
|
1205 |
+
"941 d296f252693c6130da6fbaadc08469\n",
|
1206 |
+
"942 d29cc9dcde13c9371a28cc1bf9836e3c\n",
|
1207 |
+
"943 d295918d4f51d352b3c83bdf3d16f861\n",
|
1208 |
+
"944 d29832ee32acfc4c7b56c4d1eed42\n",
|
1209 |
+
"945 d296ef3360d4f5ddfbd530d479d2992c\n",
|
1210 |
+
"946 d2965113b74b1a9ec3cbc33602811e9\n",
|
1211 |
+
"947 d2956451b5c77299969f87aea3621e3\n",
|
1212 |
+
"948 d29ab427ff507dbbe13ae25ebbbace6f\n",
|
1213 |
+
"949 d29a5ba29763bc916b853c15293689f\n",
|
1214 |
+
"950 d2927f7a6056ab6be96cd0812640ce\n",
|
1215 |
+
"951 d29ac16ee01e78164acdd4e9ae56b65c\n",
|
1216 |
+
"952 d298f1ab24787baabadc2c79489857b\n",
|
1217 |
+
"953 d2934db68cdb24285a4bfe4c45de83\n",
|
1218 |
+
"954 d296a2c4fd479d35942e20779121cd2b\n",
|
1219 |
+
"955 d292aedad670eb23c0de67d754c9f\n",
|
1220 |
+
"956 d292f67c97843c616fe91b24b833e81\n",
|
1221 |
+
"957 d294b46b302a24644766c7449594721f\n",
|
1222 |
+
"958 d292961146b9cbbb547223db2a8a9\n",
|
1223 |
+
"959 d296a012631260f8f4d62a553b79b2d9\n",
|
1224 |
+
"960 d296fe9aac4d48e7bf61db9aac5bcb8\n",
|
1225 |
+
"961 d29c64939a3116d25d2baea9fa5ca2\n",
|
1226 |
+
"962 d2921bc19d4534ab7fa7a85bf67e1faa\n",
|
1227 |
+
"963 d29e44e97f49146198417e4ab07cf7e6\n",
|
1228 |
+
"964 d29e7f55fdb62ca7f29191e6f3551ebb\n",
|
1229 |
+
"965 d294807c2d6877a01b863757ccbf\n",
|
1230 |
+
"966 d29399f926878adeae85b9126c9c545\n",
|
1231 |
+
"967 d295684772ee4705d79a7ecfa44572\n",
|
1232 |
+
"968 d299e639d6e22972f6789e1f7613dee2\n",
|
1233 |
+
"969 d2955e19f597df6c42b37859b59b4a\n",
|
1234 |
+
"970 d295648026dce77c96bb4f94cb1b6ae\n",
|
1235 |
+
"971 d296b192e72f956789e68dd798faecd\n",
|
1236 |
+
"972 d2927984b7b4badce29cbef261244\n",
|
1237 |
+
"973 d2981e54d04b40b869399c3ae30dea3\n",
|
1238 |
+
"974 d29ace284cb77abebfe84a87eace985\n",
|
1239 |
+
"975 d29f28f637ff8952889657bebddfed5\n",
|
1240 |
+
"976 d292e945bbd333b72c4951321587958d\n",
|
1241 |
+
"977 d29b28c6e5e48c4d898cb786c3ddc\n",
|
1242 |
+
"978 d2919df6a0b0c198a55db2b82c9e8a\n",
|
1243 |
+
"979 d29d73b4807db874afb1951d5c6fe58\n",
|
1244 |
+
"980 d2998145f1a42e419e9c669f3ce36f5\n",
|
1245 |
+
"981 d2967bcc651b29e9e7bd65fab12d5a3\n",
|
1246 |
+
"982 d291736293c558225a0cebe457a6f2\n",
|
1247 |
+
"983 d29e9483c1c73bda7d7d74e869b4e7e\n",
|
1248 |
+
"984 d299d5f6b506c6236dc858da34f1cc\n",
|
1249 |
+
"985 d2913ad1734310694a6c2c35a1c569e8\n",
|
1250 |
+
"986 d294bdca75f6d53d497559412a7a3d\n",
|
1251 |
+
"987 d29aecc65b7df1f508c83df595ff4e\n",
|
1252 |
+
"988 d29cda9cb047b6bdbcd4d3b50feec7e\n",
|
1253 |
+
"989 d29739396b17f9e255c7726de428c5f\n",
|
1254 |
+
"990 d29b475454526ecffec9fefcf8f01c8e\n",
|
1255 |
+
"991 d29667e51ed875183825ab53d44fa70\n",
|
1256 |
+
"992 d297e8ed757593d67a2771257a27be4\n",
|
1257 |
+
"993 d295c322fc9ee4dca758544c942f2d53\n",
|
1258 |
+
"994 d298372c48d5c8aaa16ee2f3a5a5380\n",
|
1259 |
+
"995 d2946559a807388662cd0308ad666dd\n",
|
1260 |
+
"996 d29dcc2038b89c365b3aba17f94bf52\n",
|
1261 |
+
"997 d29fcaee2537fda115ad172ed10778\n",
|
1262 |
+
"998 d29ca7d044203e0242084cb958ef464\n",
|
1263 |
+
"999 d299349d8bd55ccae1dcea12b2b7ca73\n",
|
1264 |
+
"returning 0\n"
|
1265 |
+
]
|
1266 |
+
},
|
1267 |
+
{
|
1268 |
+
"ename": "IndexError",
|
1269 |
+
"evalue": "index out of bounds",
|
1270 |
+
"output_type": "error",
|
1271 |
+
"traceback": [
|
1272 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
1273 |
+
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
|
1274 |
+
"\u001b[0;32m/tmp/ipykernel_617634/3764770081.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m filtered_dataset = dataset.map(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mselect_existing_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mremove_columns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumn_names\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mbatched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mnum_proc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1275 |
+
"\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m 1655\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1656\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnum_proc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mnum_proc\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1657\u001b[0;31m return self._map_single(\n\u001b[0m\u001b[1;32m 1658\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0mwith_indices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwith_indices\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1276 |
+
"\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 183\u001b[0m }\n\u001b[1;32m 184\u001b[0m \u001b[0;31m# apply actual function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mout\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Dataset\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"DatasetDict\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Dataset\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;31m# re-apply format to the output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1277 |
+
"\u001b[0;32m~/code/hf_jax/datasets/src/datasets/fingerprint.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 395\u001b[0m \u001b[0;31m# Call actual function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 397\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[0;31m# Update fingerprint of in-place transforms + update in-place history of transforms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1278 |
+
"\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36m_map_single\u001b[0;34m(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset, desc)\u001b[0m\n\u001b[1;32m 2022\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2023\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcast_to_python_objects\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2024\u001b[0;31m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2025\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mupdate_data\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mwriter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2026\u001b[0m \u001b[0mwriter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# close_stream=bool(buf_writer is None)) # We only close if we are writing in a file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1279 |
+
"\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_writer.py\u001b[0m in \u001b[0;36mwrite_batch\u001b[0;34m(self, batch_examples, writer_batch_size)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0mtyped_sequence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOptimizedTypedSequence\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_examples\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtry_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol_try_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0mtyped_sequence_examples\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtyped_sequence\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m \u001b[0mpa_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pydict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtyped_sequence_examples\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwriter_batch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
1280 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/table.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.Table.from_pydict\u001b[0;34m()\u001b[0m\n",
|
1281 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.asarray\u001b[0;34m()\u001b[0m\n",
|
1282 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.array\u001b[0;34m()\u001b[0m\n",
|
1283 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib._handle_arrow_array_protocol\u001b[0;34m()\u001b[0m\n",
|
1284 |
+
"\u001b[0;32m~/code/hf_jax/datasets/src/datasets/arrow_writer.py\u001b[0m in \u001b[0;36m__arrow_array__\u001b[0;34m(self, type)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mtrying_type\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m raise TypeError(\n\u001b[1;32m 102\u001b[0m \u001b[0;34m\"Specified try_type alters data. Please check that the type/feature that you provided match the type/features of the data.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1285 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.Array.__getitem__\u001b[0;34m()\u001b[0m\n",
|
1286 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/pyarrow/array.pxi\u001b[0m in \u001b[0;36mpyarrow.lib._normalize_index\u001b[0;34m()\u001b[0m\n",
|
1287 |
+
"\u001b[0;31mIndexError\u001b[0m: index out of bounds"
|
1288 |
+
]
|
1289 |
+
}
|
1290 |
+
],
|
1291 |
+
"source": [
|
1292 |
+
"filtered_dataset = dataset.map(\n",
|
1293 |
+
" select_existing_rows,\n",
|
1294 |
+
" remove_columns = dataset.column_names,\n",
|
1295 |
+
" batched = True,\n",
|
1296 |
+
" num_proc = 1,\n",
|
1297 |
+
" desc = \"Selecting rows with images that exist\"\n",
|
1298 |
+
")"
|
1299 |
+
]
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"cell_type": "code",
|
1303 |
+
"execution_count": 109,
|
1304 |
+
"id": "7060ff8f",
|
1305 |
+
"metadata": {},
|
1306 |
+
"outputs": [],
|
1307 |
+
"source": [
|
1308 |
+
"# df['image_exists'] = df.apply(lambda row: image_exists(row['key']), axis=1)"
|
1309 |
+
]
|
1310 |
+
},
|
1311 |
+
{
|
1312 |
+
"cell_type": "code",
|
1313 |
+
"execution_count": 113,
|
1314 |
+
"id": "fecc9a00",
|
1315 |
+
"metadata": {},
|
1316 |
+
"outputs": [],
|
1317 |
+
"source": [
|
1318 |
+
"image_size = 256\n",
|
1319 |
+
"def image_transform(image):\n",
|
1320 |
+
" s = min(image.size)\n",
|
1321 |
+
" r = image_size / s\n",
|
1322 |
+
" s = (round(r * image.size[1]), round(r * image.size[0]))\n",
|
1323 |
+
" image = TF.resize(image, s, interpolation=InterpolationMode.LANCZOS)\n",
|
1324 |
+
" image = TF.center_crop(image, output_size = 2 * [image_size])\n",
|
1325 |
+
" image = torch.unsqueeze(T.ToTensor()(image), 0)\n",
|
1326 |
+
" image = image.permute(0, 2, 3, 1).numpy()\n",
|
1327 |
+
" return image"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
{
|
1331 |
+
"cell_type": "code",
|
1332 |
+
"execution_count": 98,
|
1333 |
+
"id": "1a065700",
|
1334 |
+
"metadata": {},
|
1335 |
+
"outputs": [],
|
1336 |
+
"source": [
|
1337 |
+
"class YFC100Dataset(Dataset):\n",
|
1338 |
+
" def __init__(self, image_list_path: str, images_root: str, image_size: int, max_items=None):\n",
|
1339 |
+
" \"\"\"\n",
|
1340 |
+
" :param image_list_path: Path to a file containing a list of all images, in jsonl format.\n",
|
1341 |
+
" :param images_root: Root directory containing the images\n",
|
1342 |
+
" :param image_size: Image size. Source images will be resized and center-cropped.\n",
|
1343 |
+
" :max_items: Limit dataset size for debugging\n",
|
1344 |
+
" \"\"\"\n",
|
1345 |
+
" self.image_list = pd.read_json(image_list_path, orient=\"records\", lines=True)\n",
|
1346 |
+
" self.images_root = Path(images_root)\n",
|
1347 |
+
" if max_items is not None: self.image_list = self.image_list[:max_items]\n",
|
1348 |
+
" self.image_size = image_size\n",
|
1349 |
+
" \n",
|
1350 |
+
" def __len__(self):\n",
|
1351 |
+
" return len(self.image_list)\n",
|
1352 |
+
" \n",
|
1353 |
+
" def _get_raw_image(self, i):\n",
|
1354 |
+
" image_name = self.image_list.iloc[0].key\n",
|
1355 |
+
" image_path = (self.images_root/image_name[0:3]/image_name[3:6]/image_name).with_suffix('.jpg')\n",
|
1356 |
+
" return default_loader(image_path) if image_path.exists() else None\n",
|
1357 |
+
" \n",
|
1358 |
+
" # TODO: we could maybe use jax resizing / scaling functions\n",
|
1359 |
+
" def resize_image(self, image):\n",
|
1360 |
+
" s = min(image.size)\n",
|
1361 |
+
" r = self.image_size / s\n",
|
1362 |
+
" s = (round(r * image.size[1]), round(r * image.size[0]))\n",
|
1363 |
+
" image = TF.resize(image, s, interpolation=InterpolationMode.LANCZOS)\n",
|
1364 |
+
" image = TF.center_crop(image, output_size = 2 * [self.image_size])\n",
|
1365 |
+
" image = torch.unsqueeze(T.ToTensor()(image), 0)\n",
|
1366 |
+
" image = image.permute(0, 2, 3, 1).numpy()\n",
|
1367 |
+
" return image\n",
|
1368 |
+
" \n",
|
1369 |
+
" def _get_caption(self, i):\n",
|
1370 |
+
" # We are currently appending title and caption. Should we use another separator?\n",
|
1371 |
+
" row = self.image_list.iloc[i]\n",
|
1372 |
+
" return ' '.join(row.title_clean, row.description_clean)\n",
|
1373 |
+
" \n",
|
1374 |
+
" def __getitem__(self, i):\n",
|
1375 |
+
" image = self._get_raw_image(i)\n",
|
1376 |
+
" if image is None: return None\n",
|
1377 |
+
" image = self.resize_image(image)\n",
|
1378 |
+
" caption = self._get_caption(i)\n",
|
1379 |
+
" return {'image': image, 'text': caption}"
|
1380 |
+
]
|
1381 |
+
},
|
1382 |
+
{
|
1383 |
+
"cell_type": "code",
|
1384 |
+
"execution_count": 99,
|
1385 |
+
"id": "4ce2211f",
|
1386 |
+
"metadata": {},
|
1387 |
+
"outputs": [],
|
1388 |
+
"source": [
|
1389 |
+
"dataset = YFC100Dataset(\n",
|
1390 |
+
" image_list_path = yfc100m_metadata,\n",
|
1391 |
+
" images_root = yfc100m_images,\n",
|
1392 |
+
" image_size = 256,\n",
|
1393 |
+
")"
|
1394 |
+
]
|
1395 |
+
},
|
1396 |
+
{
|
1397 |
+
"cell_type": "code",
|
1398 |
+
"execution_count": 100,
|
1399 |
+
"id": "cc922704",
|
1400 |
+
"metadata": {},
|
1401 |
+
"outputs": [
|
1402 |
+
{
|
1403 |
+
"data": {
|
1404 |
+
"text/plain": [
|
1405 |
+
"5000"
|
1406 |
+
]
|
1407 |
+
},
|
1408 |
+
"execution_count": 100,
|
1409 |
+
"metadata": {},
|
1410 |
+
"output_type": "execute_result"
|
1411 |
+
}
|
1412 |
+
],
|
1413 |
+
"source": [
|
1414 |
+
"len(dataset)"
|
1415 |
+
]
|
1416 |
+
},
|
1417 |
+
{
|
1418 |
+
"cell_type": "code",
|
1419 |
+
"execution_count": 102,
|
1420 |
+
"id": "6e47ba46",
|
1421 |
+
"metadata": {},
|
1422 |
+
"outputs": [],
|
1423 |
+
"source": [
|
1424 |
+
"dataloader = DataLoader(dataset, batch_size=32, num_workers=4)"
|
1425 |
+
]
|
1426 |
+
},
|
1427 |
+
{
|
1428 |
+
"cell_type": "code",
|
1429 |
+
"execution_count": 103,
|
1430 |
+
"id": "c8a130eb",
|
1431 |
+
"metadata": {},
|
1432 |
+
"outputs": [
|
1433 |
+
{
|
1434 |
+
"ename": "TypeError",
|
1435 |
+
"evalue": "Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>\n",
|
1436 |
+
"output_type": "error",
|
1437 |
+
"traceback": [
|
1438 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
1439 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
1440 |
+
"\u001b[0;32m/tmp/ipykernel_320049/1409168804.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataloader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
1441 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sampler_iter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_yielded\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_kind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_DatasetKind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1442 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1201\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1202\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_task_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1203\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_process_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1443 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_process_data\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 1227\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1228\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExceptionWrapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1229\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1230\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
1444 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/_utils.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0;31m# have message field\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 425\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
1445 |
+
"\u001b[0;31mTypeError\u001b[0m: Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>\n"
|
1446 |
+
]
|
1447 |
+
}
|
1448 |
+
],
|
1449 |
+
"source": [
|
1450 |
+
"next(iter(dataloader))"
|
1451 |
+
]
|
1452 |
+
},
|
1453 |
+
{
|
1454 |
+
"cell_type": "markdown",
|
1455 |
+
"id": "62ad01c3",
|
1456 |
+
"metadata": {},
|
1457 |
+
"source": [
|
1458 |
+
"## Encoding"
|
1459 |
+
]
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"cell_type": "code",
|
1463 |
+
"execution_count": 89,
|
1464 |
+
"id": "88f36d0b",
|
1465 |
+
"metadata": {},
|
1466 |
+
"outputs": [],
|
1467 |
+
"source": [
|
1468 |
+
"def encode(model, batch):\n",
|
1469 |
+
" print(\"jitting encode function\")\n",
|
1470 |
+
"# _, indices = model.encode(batch)\n",
|
1471 |
+
"\n",
|
1472 |
+
" # The model does not run in my computer (no cudNN currently installed) - faking it\n",
|
1473 |
+
" indices = [random.randint(0, 16384) for _ in range(256)]\n",
|
1474 |
+
" return indices"
|
1475 |
+
]
|
1476 |
+
},
|
1477 |
+
{
|
1478 |
+
"cell_type": "code",
|
1479 |
+
"execution_count": 90,
|
1480 |
+
"id": "1f35f0cb",
|
1481 |
+
"metadata": {},
|
1482 |
+
"outputs": [],
|
1483 |
+
"source": [
|
1484 |
+
"def superbatch_generator(dataloader, num_tpus):\n",
|
1485 |
+
" iter_loader = iter(dataloader)\n",
|
1486 |
+
" for batch in iter_loader:\n",
|
1487 |
+
" superbatch = [batch.squeeze(1)]\n",
|
1488 |
+
" try:\n",
|
1489 |
+
" for b in range(num_tpus-1):\n",
|
1490 |
+
" batch = next(iter_loader)\n",
|
1491 |
+
" if batch is None:\n",
|
1492 |
+
" break\n",
|
1493 |
+
" # Skip incomplete last batch\n",
|
1494 |
+
" if batch.shape[0] == dataloader.batch_size:\n",
|
1495 |
+
" superbatch.append(batch.squeeze(1))\n",
|
1496 |
+
" except StopIteration:\n",
|
1497 |
+
" pass\n",
|
1498 |
+
" superbatch = torch.stack(superbatch, axis=0)\n",
|
1499 |
+
" yield superbatch"
|
1500 |
+
]
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"cell_type": "code",
|
1504 |
+
"execution_count": 93,
|
1505 |
+
"id": "2210705b",
|
1506 |
+
"metadata": {},
|
1507 |
+
"outputs": [],
|
1508 |
+
"source": [
|
1509 |
+
"import os\n",
|
1510 |
+
"import jax\n",
|
1511 |
+
"\n",
|
1512 |
+
"def encode_captioned_dataset(dataset, output_jsonl, batch_size=32, num_workers=16):\n",
|
1513 |
+
" if os.path.isfile(output_jsonl):\n",
|
1514 |
+
" print(f\"Destination file {output_jsonl} already exists, please move away.\")\n",
|
1515 |
+
" return\n",
|
1516 |
+
" \n",
|
1517 |
+
" num_tpus = jax.device_count()\n",
|
1518 |
+
" dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)\n",
|
1519 |
+
" superbatches = superbatch_generator(dataloader, num_tpus=num_tpus)\n",
|
1520 |
+
" \n",
|
1521 |
+
" p_encoder = pmap(lambda batch: encode(model, batch))\n",
|
1522 |
+
"\n",
|
1523 |
+
" # We save each superbatch to avoid reallocation of buffers as we process them.\n",
|
1524 |
+
" # We keep the file open to prevent excessive file seeks.\n",
|
1525 |
+
" with open(output_jsonl, \"w\") as file:\n",
|
1526 |
+
" iterations = len(dataset) // (batch_size * num_tpus)\n",
|
1527 |
+
" for n in tqdm(range(iterations)):\n",
|
1528 |
+
" superbatch = next(superbatches)\n",
|
1529 |
+
" encoded = p_encoder(superbatch.numpy())\n",
|
1530 |
+
" encoded = encoded.reshape(-1, encoded.shape[-1])\n",
|
1531 |
+
"\n",
|
1532 |
+
" # Extract fields from the dataset internal `captions` property, and save to disk\n",
|
1533 |
+
" start_index = n * batch_size * num_tpus\n",
|
1534 |
+
" end_index = (n+1) * batch_size * num_tpus\n",
|
1535 |
+
" paths = dataset.captions[\"image_file\"][start_index:end_index].values\n",
|
1536 |
+
" captions = dataset.captions[\"caption\"][start_index:end_index].values\n",
|
1537 |
+
" encoded_as_string = list(map(lambda item: np.array2string(item, separator=',', max_line_width=50000, formatter={'int':lambda x: str(x)}), encoded))\n",
|
1538 |
+
" batch_df = pd.DataFrame.from_dict({\"image_file\": paths, \"caption\": captions, \"encoding\": encoded_as_string})\n",
|
1539 |
+
" batch_df = batch_df.dropna()\n",
|
1540 |
+
" batch_df.to_json(file, orient='records', lines=True, index=None)\n",
|
1541 |
+
" "
|
1542 |
+
]
|
1543 |
+
},
|
1544 |
+
{
|
1545 |
+
"cell_type": "code",
|
1546 |
+
"execution_count": 94,
|
1547 |
+
"id": "7704863d",
|
1548 |
+
"metadata": {},
|
1549 |
+
"outputs": [
|
1550 |
+
{
|
1551 |
+
"name": "stderr",
|
1552 |
+
"output_type": "stream",
|
1553 |
+
"text": [
|
1554 |
+
" 0%| | 0/78 [00:00<?, ?it/s]\n"
|
1555 |
+
]
|
1556 |
+
},
|
1557 |
+
{
|
1558 |
+
"ename": "TypeError",
|
1559 |
+
"evalue": "Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>\n",
|
1560 |
+
"output_type": "error",
|
1561 |
+
"traceback": [
|
1562 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
1563 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
1564 |
+
"\u001b[0;32m/tmp/ipykernel_320049/140243368.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mencode_captioned_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myfc100m_output\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m64\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_workers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
1565 |
+
"\u001b[0;32m/tmp/ipykernel_320049/2954345319.py\u001b[0m in \u001b[0;36mencode_captioned_dataset\u001b[0;34m(dataset, output_jsonl, batch_size, num_workers)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0miterations\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mbatch_size\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mnum_tpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterations\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0msuperbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msuperbatches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mp_encoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msuperbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1566 |
+
"\u001b[0;32m/tmp/ipykernel_320049/4148450576.py\u001b[0m in \u001b[0;36msuperbatch_generator\u001b[0;34m(dataloader, num_tpus)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0msuperbatch_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataloader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_tpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0miter_loader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataloader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miter_loader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0msuperbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1567 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sampler_iter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_yielded\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_kind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_DatasetKind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1568 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1201\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1202\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_task_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1203\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_process_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
1569 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_process_data\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 1227\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_try_put_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1228\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExceptionWrapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1229\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1230\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
1570 |
+
"\u001b[0;32m~/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/_utils.py\u001b[0m in \u001b[0;36mreraise\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0;31m# have message field\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 425\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
1571 |
+
"\u001b[0;31mTypeError\u001b[0m: Caught TypeError in DataLoader worker process 0.\nOriginal Traceback (most recent call last):\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py\", line 287, in _worker_loop\n data = fetcher.fetch(index)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\", line 47, in fetch\n return self.collate_fn(data)\n File \"/home/pedro/miniconda3/envs/hf_jax/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py\", line 86, in default_collate\n raise TypeError(default_collate_err_msg_format.format(elem_type))\nTypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>\n"
|
1572 |
+
]
|
1573 |
+
}
|
1574 |
+
],
|
1575 |
+
"source": [
|
1576 |
+
"encode_captioned_dataset(dataset, yfc100m_output, batch_size=64, num_workers=16)"
|
1577 |
+
]
|
1578 |
+
},
|
1579 |
+
{
|
1580 |
+
"cell_type": "markdown",
|
1581 |
+
"id": "8953dd84",
|
1582 |
+
"metadata": {},
|
1583 |
+
"source": [
|
1584 |
+
"----"
|
1585 |
+
]
|
1586 |
+
}
|
1587 |
+
],
|
1588 |
+
"metadata": {
|
1589 |
+
"kernelspec": {
|
1590 |
+
"display_name": "Python 3 (ipykernel)",
|
1591 |
+
"language": "python",
|
1592 |
+
"name": "python3"
|
1593 |
+
},
|
1594 |
+
"language_info": {
|
1595 |
+
"codemirror_mode": {
|
1596 |
+
"name": "ipython",
|
1597 |
+
"version": 3
|
1598 |
+
},
|
1599 |
+
"file_extension": ".py",
|
1600 |
+
"mimetype": "text/x-python",
|
1601 |
+
"name": "python",
|
1602 |
+
"nbconvert_exporter": "python",
|
1603 |
+
"pygments_lexer": "ipython3",
|
1604 |
+
"version": "3.8.10"
|
1605 |
+
}
|
1606 |
+
},
|
1607 |
+
"nbformat": 4,
|
1608 |
+
"nbformat_minor": 5
|
1609 |
+
}
|