{ "cells": [ { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "import os\n", "import gzip" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 3, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/rzimmerdev/conda/envs/data/lib/python3.9/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils.\n", " warnings.warn(\"Setuptools is replacing distutils.\")\n" ] } ], "source": [ "from src.downloader import download_dataset\n", "download_dataset(\"mnist\", \"../datasets/mnist\")" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "data": { "text/plain": "b'\\x00\\x00\\x08\\x03\\x00\\x00\\xea`\\x00\\x00\\x00\\x1c\\x00\\x00\\x00\\x1c'" }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f = gzip.open(\"../datasets/mnist/\" + os.listdir(\"../datasets/mnist/\")[0], 'r')\n", "f.read(16)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [ "import numpy as np\n", "from torch.utils.data import DataLoader, Dataset" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 6, "outputs": [], "source": [ "class DatasetMNIST(Dataset):\n", " def __init__(self, images, labels):\n", " with gzip.open(images, 'r') as f:\n", " f.read(4)\n", " self.total = int.from_bytes(f.read(4), 'big')\n", " rows = int.from_bytes(f.read(4), 'big')\n", " columns = int.from_bytes(f.read(4), 'big')\n", "\n", " image_data = f.read()\n", " images = np.frombuffer(image_data, dtype=np.uint8)\\\n", " .reshape((self.total, rows, columns))\n", " self.images = images\n", " with gzip.open(labels, 'r') as f:\n", " f.read(4)\n", " total = int.from_bytes(f.read(4), 'big')\n", "\n", " label_data = f.read()\n", " labels = np.frombuffer(label_data, dtype=np.uint8)\n", " self.labels = labels\n", " self.data = list(zip(self.images, self.labels))\n", " def __getitem__(self, n):\n", " if n > self.total:\n", " raise ValueError(f\"Dataset doesn't have enough elements to suffice request of {n} elements.\")\n", " return self.data[n]\n", "\n", " def __len__(self):\n", " return len(self.data)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 7, "outputs": [], "source": [ "dataset_dir = \"../datasets/mnist/\"\n", "loader = DatasetMNIST(dataset_dir + \"train_images\", dataset_dir + \"train_labels\")" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "data": { "text/plain": "
", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAEICAYAAACZA4KlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAASY0lEQVR4nO3de9BU9X3H8ffHu4IGKZcgigZk2qi1JjKaTqjQ8VpnVNR6a42AmWBMYpNMbLXUqhm1STrV1GljJogWFJRg1HiJrTI2iqQpkaSoCBjUYkQIqEhUojHAt3+c83ROHnfPLnuX3+c1s/Ps7nfPOV+W/ew5e86e/SkiMLMd307dbsDMOsNhN0uEw26WCIfdLBEOu1kiHHazRDjstl0kXS1pTpXaRElr6pzPFEmLGuyh4WlT5rC3kKTHJL0hafcOLe8gSSFpl+2YZrWk49rZV6+RdIqkZZLelvRfkg7pdk/d4LC3iKSDgD8BAji1u91YH0ljgbnAZ4FBwAPA/dvzBrmjcNhb5wLgv4FZwORiQdIsSd+S9ANJb0laLGlMoR6SPitpVb5l8C1Jyms7SbpC0kuSNki6TdKH8kkX5n835WutP5Y0RtJ/Snpd0muS5koalM/rdmAU8ED++L/J7/9EvsbbJOkpSRMLvX1E0uN53wuAIfU+IZIul/RCPu1ySae//yH6F0m/krRS0rGFwock3SJpnaRXJF0raed6l11wIvBERCyKiC3AN4CRwIQG5vXBFhG+tOACPA98DjgS+C0wvFCbBWwEjgJ2IVvTzCvUA3iQbM0zCngVOCmvXZjPezQwELgHuD2vHZRPu0thXgcDxwO7A0PJ3hD+uVBfDRxXuD0SeB04mezN//j89tC8/mPghnx+xwBvAXOqPAcTgTWF22cB++XzPQfYDIzIa1OALcCXgV3z+q+AwXn9+8B3gAHAMOAnwEWFaRcVlvMgcHmVni4BHirc3hl4F/hit18zHX+NdruBHeECjM8DPiS/vRL4cqE+C5hZuH0ysLJwO4Dxhdvz+168wKPA5wq138+XtUulsFfobRLwP4Xb/cN+Wd+bR+G+h8m2TkblgRxQqN1Rb9gr1JcCp+XXpwBrARXqPwE+BQwHfgPsWaidB/ywMO2iasvpt8w/yN9kJgK7AX8PbAP+ttuvm05fvBnfGpOBRyLitfz2HfTblAd+Wbj+a7K1dD31/YCXCrWXyII+vFIjkoZJmpdv+r4JzKF80/tA4Kx8E36TpE1kb14j8mW/ERGb+y2/LpIukLS0MN/D+vXySuSJLMx7v7ynXYF1hWm/Q7aG3y4RsZLs/+JfgXX58pcDdR012JEkt5Oi1STtCZwN7CypL7C7A4Mk/VFEPNXkItaSvfj79K1t15Ntgvf3NbK1/eER8bqkSWQv9D79T3N8mWzN/pn+M5J0ILCvpAGFwI+qMI/3yae9GTgW+HFEbJW0FFDhYSMlqRD4UcD9eU+/IdtS2lJrWbVExPeA7+V9DSL7aPRks/P9oPGavXmTgK3AIcAR+eWjwBNkO+2adSfw5XxH2UDgH4Dv5iF4lWyTdHTh8XsDb5PttBsJ/HW/+a3v9/g5wCmSTpS0s6Q98uPl+0fES8AS4KuSdpM0Hjilzr4HkL0pvAogaSrZmr1oGPBXknaVdBbZ8/ZQRKwDHgGul7RPvpNyjKSGdqpJOjL/tw0l20J4IF/jJ8Vhb95k4N8i4hcR8cu+C9na9C9bcIjnVuB2sh1t/0u2c+kSgIj4NXAd8KN8c/cTwFeBj5Pt7PoB2Q69oq8BV+SPvzQiXgZOA6aTBfNlsjeIvtfGXwBHk+1gvAq4rZ6mI2I5cD3ZDr71wB8CP+r3sMXAWOC1/N/x5xHxel67gOwz9nLgDbI184hKy5L075Kml7RzI7AJeC7/+76tmBTodz8ymdmOymt2s0Q47GaJcNjNEuGwmyWio8fZJXlvoFmbRYQq3d/Uml3SSZKek/S8pMubmZeZtVfDh97yM5B+TnbixBqybySdlx9frTaN1+xmbdaONftRwPMR8WJEvAfMI/tyhpn1oGbCPpLs21Z91lDhu9qSpklaImlJE8sysyY1s4Ou0qbC+zbTI2IGMAO8GW/WTc2s2dcABxRu7092hpaZ9aBmwv4kMDY/G2s34Fyy0xPNrAc1vBkfEVskfYHsV012Bm6NiGdb1pmZtVRHz3rzZ3az9mvLl2rM7IPDYTdLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZIhx2s0Q0PD47gKTVwFvAVmBLRIxrRVNm1npNhT33pxHxWgvmY2Zt5M14s0Q0G/YAHpH0U0nTKj1A0jRJSyQtaXJZZtYERUTjE0v7RcRaScOABcAlEbGw5PGNL8zM6hIRqnR/U2v2iFib/90A3Asc1cz8zKx9Gg67pAGS9u67DpwALGtVY2bWWs3sjR8O3Cupbz53RMR/tKQrM2u5pj6zb/fC/JndrO3a8pndzD44HHazRDjsZolw2M0S4bCbJaIVJ8JYDzv66KNL6+eff35pfcKECaX1Qw89dLt76nPppZeW1teuXVtaHz9+fGl9zpw5VWuLFy8unXZH5DW7WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpYIn/W2AzjnnHOq1m688cbSaYcMGVJaz09hruqxxx4rrQ8dOrRq7ZBDDimdtpZavd11111Va+eee25Ty+5lPuvNLHEOu1kiHHazRDjsZolw2M0S4bCbJcJhN0uEz2fvAbvsUv7fMG5c+eC4N998c9XaXnvtVTrtwoVVB/AB4JprrimtL1q0qLS+++67V63Nnz+/dNoTTjihtF7LkiUecazIa3azRDjsZolw2M0S4bCbJcJhN0uEw26WCIfdLBE+zt4Dav12+8yZMxue94IFC0rrZefCA7z55psNL7vW/Js9jr5mzZrS+uzZs5ua/46m5ppd0q2SNkhaVrhvsKQFklblf/dtb5tm1qx6NuNnASf1u+9y4NGIGAs8mt82sx5WM+wRsRDY2O/u04C+baTZwKTWtmVmrdboZ/bhEbEOICLWSRpW7YGSpgHTGlyOmbVI23fQRcQMYAb4ByfNuqnRQ2/rJY0AyP9uaF1LZtYOjYb9fmByfn0ycF9r2jGzdqn5u/GS7gQmAkOA9cBVwPeB+cAo4BfAWRHRfydepXkluRlf65zw6dOnl9Zr/R/ddNNNVWtXXHFF6bTNHkevZcWKFVVrY8eObWreZ555Zmn9vvvSXAdV+934mp/ZI+K8KqVjm+rIzDrKX5c1S4TDbpYIh90sEQ67WSIcdrNE+BTXFrjyyitL67UOrb333nul9Ycffri0ftlll1WtvfPOO6XT1rLHHnuU1mudpjpq1KiqtVpDLl977bWl9VQPrTXKa3azRDjsZolw2M0S4bCbJcJhN0uEw26WCIfdLBE1T3Ft6cI+wKe4Dho0qGpt5cqVpdMOGTKktP7ggw+W1idNmlRab8bBBx9cWp87d25p/cgjj2x42XfffXdp/cILLyytb968ueFl78iqneLqNbtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulggfZ6/TsGFVR7hi7dq1Tc179OjRpfV33323tD516tSqtVNPPbV02sMOO6y0PnDgwNJ6rddPWf2MM84onfaBBx4orVtlPs5uljiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCx9nrVHY+e9mwxABDhw4trdf6/fR2/h/V+o5Ard5GjBhRWn/11VcbntYa0/Bxdkm3StogaVnhvqslvSJpaX45uZXNmlnr1bMZPws4qcL934yII/LLQ61ty8xarWbYI2IhsLEDvZhZGzWzg+4Lkp7ON/P3rfYgSdMkLZG0pIllmVmTGg37t4ExwBHAOuD6ag+MiBkRMS4ixjW4LDNrgYbCHhHrI2JrRGwDbgaOam1bZtZqDYVdUvGYyenAsmqPNbPeUHN8dkl3AhOBIZLWAFcBEyUdAQSwGriofS32hk2bNlWt1fpd91q/Cz948ODS+gsvvFBaLxunfNasWaXTbtxYvu913rx5pfVax8prTW+dUzPsEXFehbtvaUMvZtZG/rqsWSIcdrNEOOxmiXDYzRLhsJsloubeeKtt8eLFpfVap7h20zHHHFNanzBhQml927ZtpfUXX3xxu3uy9vCa3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhI+zJ27PPfcsrdc6jl7rZ659imvv8JrdLBEOu1kiHHazRDjsZolw2M0S4bCbJcJhN0uEh2y2Ulu3bi2t13r9lP3UdNlwzta4hodsNrMdg8NulgiH3SwRDrtZIhx2s0Q47GaJcNjNElHPkM0HALcBHwa2ATMi4kZJg4HvAgeRDdt8dkS80b5WrR1OPPHEbrdgHVLPmn0L8JWI+CjwCeDzkg4BLgcejYixwKP5bTPrUTXDHhHrIuJn+fW3gBXASOA0YHb+sNnApDb1aGYtsF2f2SUdBHwMWAwMj4h1kL0hAMNa3p2ZtUzdv0EnaSBwN/CliHhTqvj120rTTQOmNdaembVKXWt2SbuSBX1uRNyT371e0oi8PgLYUGnaiJgREeMiYlwrGjazxtQMu7JV+C3Aioi4oVC6H5icX58M3Nf69sysVerZjP8k8CngGUlL8/umA18H5kv6NPAL4Ky2dGhtNXr06G63YB1SM+wRsQio9gH92Na2Y2bt4m/QmSXCYTdLhMNulgiH3SwRDrtZIhx2s0R4yObEPfHEE6X1nXYqXx/UGtLZeofX7GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZInycPXHLli0rra9ataq0Xut8+DFjxlStecjmzvKa3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhCKicwuTOrcwa4kpU6aU1mfOnFlaf/zxx6vWLrnkktJply9fXlq3yiKi4k+/e81ulgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyWi5nF2SQcAtwEfBrYBMyLiRklXA58B+k5Knh4RD9WYl4+zf8Dss88+pfX58+eX1o877riqtXvuuad02qlTp5bWN2/eXFpPVbXj7PX8eMUW4CsR8TNJewM/lbQgr30zIv6pVU2aWfvUDHtErAPW5dffkrQCGNnuxsystbbrM7ukg4CPAYvzu74g6WlJt0rat8o00yQtkbSkuVbNrBl1h13SQOBu4EsR8SbwbWAMcATZmv/6StNFxIyIGBcR45pv18waVVfYJe1KFvS5EXEPQESsj4itEbENuBk4qn1tmlmzaoZdkoBbgBURcUPh/hGFh50OlP9MqZl1VT2H3sYDTwDPkB16A5gOnEe2CR/AauCifGde2bx86G0HU+vQ3HXXXVe1dvHFF5dOe/jhh5fWfQpsZQ0feouIRUCliUuPqZtZb/E36MwS4bCbJcJhN0uEw26WCIfdLBEOu1ki/FPSZjsY/5S0WeIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpaIen5dtpVeA14q3B6S39eLerW3Xu0L3FujWtnbgdUKHf1SzfsWLi3p1d+m69XeerUvcG+N6lRv3ow3S4TDbpaIbod9RpeXX6ZXe+vVvsC9NaojvXX1M7uZdU631+xm1iEOu1kiuhJ2SSdJek7S85Iu70YP1UhaLekZSUu7PT5dPobeBknLCvcNlrRA0qr8b8Ux9rrU29WSXsmfu6WSTu5SbwdI+qGkFZKelfTF/P6uPnclfXXkeev4Z3ZJOwM/B44H1gBPAudFRE/84r+k1cC4iOj6FzAkHQO8DdwWEYfl9/0jsDEivp6/Ue4bEZf1SG9XA293exjvfLSiEcVhxoFJwBS6+NyV9HU2HXjeurFmPwp4PiJejIj3gHnAaV3oo+dFxEJgY7+7TwNm59dnk71YOq5Kbz0hItZFxM/y628BfcOMd/W5K+mrI7oR9pHAy4Xba+it8d4DeETSTyVN63YzFQzvG2Yr/zusy/30V3MY707qN8x4zzx3jQx/3qxuhL3S72P10vG/T0bEx4E/Az6fb65afeoaxrtTKgwz3hMaHf68Wd0I+xrggMLt/YG1XeijoohYm//dANxL7w1Fvb5vBN3874Yu9/P/emkY70rDjNMDz103hz/vRtifBMZK+oik3YBzgfu70Mf7SBqQ7zhB0gDgBHpvKOr7gcn59cnAfV3s5Xf0yjDe1YYZp8vPXdeHP4+Ijl+Ak8n2yL8A/F03eqjS12jgqfzybLd7A+4k26z7LdkW0aeB3wMeBVblfwf3UG+3kw3t/TRZsEZ0qbfxZB8NnwaW5peTu/3clfTVkefNX5c1S4S/QWeWCIfdLBEOu1kiHHazRDjsZolw2M0S4bCbJeL/AHyD7vpJDzRWAAAAAElFTkSuQmCC\n" }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "X, y = loader[4]\n", "plt.imshow(X, cmap=\"gray\")\n", "plt.title(label=\"Annotated label: \" + str(y))\n", "plt.show()" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }