Charles Kabui commited on
Commit
663c04c
·
1 Parent(s): 98bdd2e

preview data

Browse files
Files changed (1) hide show
  1. data/preview.ipynb +11 -18
data/preview.ipynb CHANGED
@@ -25,30 +25,23 @@
25
  "### Preview"
26
  ]
27
  },
 
 
 
 
 
 
 
28
  {
29
  "cell_type": "code",
30
- "execution_count": 3,
31
  "metadata": {},
32
- "outputs": [
33
- {
34
- "ename": "BadZipFile",
35
- "evalue": "File is not a zip file",
36
- "output_type": "error",
37
- "traceback": [
38
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
39
- "\u001b[0;31mBadZipFile\u001b[0m Traceback (most recent call last)",
40
- "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mzipfile\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m filelist \u001b[38;5;241m=\u001b[39m \u001b[43mzipfile\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mZipFile\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m./raw/DocLayNet_core.zip\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mfilelist\n\u001b[1;32m 4\u001b[0m filelist\n",
41
- "File \u001b[0;32m~/miniconda3/envs/dss-env/lib/python3.10/zipfile.py:1269\u001b[0m, in \u001b[0;36mZipFile.__init__\u001b[0;34m(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps)\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m-> 1269\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_RealGetContents\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m mode \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;66;03m# set the modified flag so central directory gets written\u001b[39;00m\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;66;03m# even if no files are added to the archive\u001b[39;00m\n\u001b[1;32m 1273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_didModify \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
42
- "File \u001b[0;32m~/miniconda3/envs/dss-env/lib/python3.10/zipfile.py:1336\u001b[0m, in \u001b[0;36mZipFile._RealGetContents\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1334\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile is not a zip file\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1335\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m endrec:\n\u001b[0;32m-> 1336\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile is not a zip file\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 1338\u001b[0m \u001b[38;5;28mprint\u001b[39m(endrec)\n",
43
- "\u001b[0;31mBadZipFile\u001b[0m: File is not a zip file"
44
- ]
45
- }
46
- ],
47
  "source": [
48
  "import zipfile\n",
49
  "\n",
50
- "filelist = zipfile.ZipFile('./raw/DocLayNet_core.zip', 'r').filelist\n",
51
- "filelist"
52
  ]
53
  },
54
  {
 
25
  "### Preview"
26
  ]
27
  },
28
+ {
29
+ "cell_type": "markdown",
30
+ "metadata": {},
31
+ "source": [
32
+ "#### `/raw/DocLayNet_core.zip` contents"
33
+ ]
34
+ },
35
  {
36
  "cell_type": "code",
37
+ "execution_count": null,
38
  "metadata": {},
39
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "source": [
41
  "import zipfile\n",
42
  "\n",
43
+ "filelist = zipfile.ZipFile('/raw/DocLayNet_core.zip', 'r')\n",
44
+ "[i.filename for i in filelist.filelist]"
45
  ]
46
  },
47
  {