AshanGimhana's picture
Upload folder using huggingface_hub
9375c9a verified
raw
history blame
5.4 kB
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_MNIST_CPp_
#define DLIB_MNIST_CPp_
#include "mnist.h"
#include <fstream>
#include "../byte_orderer.h"
#include "../uintn.h"
// ----------------------------------------------------------------------------------------
namespace dlib
{
void load_mnist_dataset (
const std::string& folder_name,
std::vector<matrix<unsigned char> >& training_images,
std::vector<unsigned long>& training_labels,
std::vector<matrix<unsigned char> >& testing_images,
std::vector<unsigned long>& testing_labels
)
{
using namespace std;
ifstream fin1((folder_name+"/train-images-idx3-ubyte").c_str(), ios::binary);
if (!fin1)
{
fin1.open((folder_name + "/train-images.idx3-ubyte").c_str(), ios::binary);
}
ifstream fin2((folder_name+"/train-labels-idx1-ubyte").c_str(), ios::binary);
if (!fin2)
{
fin2.open((folder_name + "/train-labels.idx1-ubyte").c_str(), ios::binary);
}
ifstream fin3((folder_name+"/t10k-images-idx3-ubyte").c_str(), ios::binary);
if (!fin3)
{
fin3.open((folder_name + "/t10k-images.idx3-ubyte").c_str(), ios::binary);
}
ifstream fin4((folder_name+"/t10k-labels-idx1-ubyte").c_str(), ios::binary);
if (!fin4)
{
fin4.open((folder_name + "/t10k-labels.idx1-ubyte").c_str(), ios::binary);
}
if (!fin1) throw error("Unable to open file train-images-idx3-ubyte or train-images.idx3-ubyte");
if (!fin2) throw error("Unable to open file train-labels-idx1-ubyte or train-labels.idx1-ubyte");
if (!fin3) throw error("Unable to open file t10k-images-idx3-ubyte or t10k-images.idx3-ubyte");
if (!fin4) throw error("Unable to open file t10k-labels-idx1-ubyte or t10k-labels.idx1-ubyte");
byte_orderer bo;
// make sure the files have the contents we expect.
uint32 magic, num, nr, nc, num2, num3, num4;
fin1.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
fin1.read((char*)&num, sizeof(num)); bo.big_to_host(num);
fin1.read((char*)&nr, sizeof(nr)); bo.big_to_host(nr);
fin1.read((char*)&nc, sizeof(nc)); bo.big_to_host(nc);
if (magic != 2051 || num != 60000 || nr != 28 || nc != 28)
throw error("mnist dat files are corrupted.");
fin2.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
fin2.read((char*)&num2, sizeof(num2)); bo.big_to_host(num2);
if (magic != 2049 || num2 != 60000)
throw error("mnist dat files are corrupted.");
fin3.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
fin3.read((char*)&num3, sizeof(num3)); bo.big_to_host(num3);
fin3.read((char*)&nr, sizeof(nr)); bo.big_to_host(nr);
fin3.read((char*)&nc, sizeof(nc)); bo.big_to_host(nc);
if (magic != 2051 || num3 != 10000 || nr != 28 || nc != 28)
throw error("mnist dat files are corrupted.");
fin4.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
fin4.read((char*)&num4, sizeof(num4)); bo.big_to_host(num4);
if (magic != 2049 || num4 != 10000)
throw error("mnist dat files are corrupted.");
if (!fin1) throw error("Unable to read train-images-idx3-ubyte");
if (!fin2) throw error("Unable to read train-labels-idx1-ubyte");
if (!fin3) throw error("Unable to read t10k-images-idx3-ubyte");
if (!fin4) throw error("Unable to read t10k-labels-idx1-ubyte");
training_images.resize(60000);
training_labels.resize(60000);
testing_images.resize(10000);
testing_labels.resize(10000);
for (size_t i = 0; i < training_images.size(); ++i)
{
training_images[i].set_size(nr,nc);
fin1.read((char*)&training_images[i](0,0), nr*nc);
}
for (size_t i = 0; i < training_labels.size(); ++i)
{
char l;
fin2.read(&l, 1);
training_labels[i] = l;
}
for (size_t i = 0; i < testing_images.size(); ++i)
{
testing_images[i].set_size(nr,nc);
fin3.read((char*)&testing_images[i](0,0), nr*nc);
}
for (size_t i = 0; i < testing_labels.size(); ++i)
{
char l;
fin4.read(&l, 1);
testing_labels[i] = l;
}
if (!fin1) throw error("Unable to read train-images-idx3-ubyte");
if (!fin2) throw error("Unable to read train-labels-idx1-ubyte");
if (!fin3) throw error("Unable to read t10k-images-idx3-ubyte");
if (!fin4) throw error("Unable to read t10k-labels-idx1-ubyte");
if (fin1.get() != EOF) throw error("Unexpected bytes at end of train-images-idx3-ubyte");
if (fin2.get() != EOF) throw error("Unexpected bytes at end of train-labels-idx1-ubyte");
if (fin3.get() != EOF) throw error("Unexpected bytes at end of t10k-images-idx3-ubyte");
if (fin4.get() != EOF) throw error("Unexpected bytes at end of t10k-labels-idx1-ubyte");
}
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_MNIST_CPp_