Charles Kabui
commited on
Commit
·
663c04c
1
Parent(s):
98bdd2e
preview data
Browse files- data/preview.ipynb +11 -18
data/preview.ipynb
CHANGED
@@ -25,30 +25,23 @@
|
|
25 |
"### Preview"
|
26 |
]
|
27 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
{
|
29 |
"cell_type": "code",
|
30 |
-
"execution_count":
|
31 |
"metadata": {},
|
32 |
-
"outputs": [
|
33 |
-
{
|
34 |
-
"ename": "BadZipFile",
|
35 |
-
"evalue": "File is not a zip file",
|
36 |
-
"output_type": "error",
|
37 |
-
"traceback": [
|
38 |
-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
39 |
-
"\u001b[0;31mBadZipFile\u001b[0m Traceback (most recent call last)",
|
40 |
-
"Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mzipfile\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m filelist \u001b[38;5;241m=\u001b[39m \u001b[43mzipfile\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mZipFile\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m./raw/DocLayNet_core.zip\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mfilelist\n\u001b[1;32m 4\u001b[0m filelist\n",
|
41 |
-
"File \u001b[0;32m~/miniconda3/envs/dss-env/lib/python3.10/zipfile.py:1269\u001b[0m, in \u001b[0;36mZipFile.__init__\u001b[0;34m(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps)\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m-> 1269\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_RealGetContents\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m mode \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;66;03m# set the modified flag so central directory gets written\u001b[39;00m\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;66;03m# even if no files are added to the archive\u001b[39;00m\n\u001b[1;32m 1273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_didModify \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
|
42 |
-
"File \u001b[0;32m~/miniconda3/envs/dss-env/lib/python3.10/zipfile.py:1336\u001b[0m, in \u001b[0;36mZipFile._RealGetContents\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1334\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile is not a zip file\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1335\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m endrec:\n\u001b[0;32m-> 1336\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile is not a zip file\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 1338\u001b[0m \u001b[38;5;28mprint\u001b[39m(endrec)\n",
|
43 |
-
"\u001b[0;31mBadZipFile\u001b[0m: File is not a zip file"
|
44 |
-
]
|
45 |
-
}
|
46 |
-
],
|
47 |
"source": [
|
48 |
"import zipfile\n",
|
49 |
"\n",
|
50 |
-
"filelist = zipfile.ZipFile('
|
51 |
-
"filelist"
|
52 |
]
|
53 |
},
|
54 |
{
|
|
|
25 |
"### Preview"
|
26 |
]
|
27 |
},
|
28 |
+
{
|
29 |
+
"cell_type": "markdown",
|
30 |
+
"metadata": {},
|
31 |
+
"source": [
|
32 |
+
"#### `/raw/DocLayNet_core.zip` contents"
|
33 |
+
]
|
34 |
+
},
|
35 |
{
|
36 |
"cell_type": "code",
|
37 |
+
"execution_count": null,
|
38 |
"metadata": {},
|
39 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
"source": [
|
41 |
"import zipfile\n",
|
42 |
"\n",
|
43 |
+
"filelist = zipfile.ZipFile('/raw/DocLayNet_core.zip', 'r')\n",
|
44 |
+
"[i.filename for i in filelist.filelist]"
|
45 |
]
|
46 |
},
|
47 |
{
|