File size: 1,600 Bytes
5d923de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import cv2
import os
import struct
 
def trans(image, label, save):
    if 'train' in os.path.basename(image):
        prefix = 'train'
    else:
        prefix = 'test'
 
    labelIndex = 0
    imageIndex = 0
    i = 0
    lbdata = open(label, 'rb').read()
    magic, nums = struct.unpack_from(">II", lbdata, labelIndex)
    labelIndex += struct.calcsize('>II')
 
    imgdata = open(image, "rb").read()
    magic, nums, numRows, numColumns = struct.unpack_from('>IIII', imgdata, imageIndex)
    imageIndex += struct.calcsize('>IIII')
 
    for i in range(nums):
        label = struct.unpack_from('>B', lbdata, labelIndex)[0]
        labelIndex += struct.calcsize('>B')
        im = struct.unpack_from('>784B', imgdata, imageIndex)
        imageIndex += struct.calcsize('>784B')
        im = np.array(im, dtype='uint8')
        img = im.reshape(28, 28)
        save_name = os.path.join(save, '{}_{}_{}.jpg'.format(prefix, i, label))
        cv2.imwrite(save_name, img)
 
if __name__ == '__main__':
    path = './origin_data'
    train_images = f'{path}/train-images-idx3-ubyte'
    train_labels = f'{path}/train-labels-idx1-ubyte'
    test_images =f'{path}/t10k-images-idx3-ubyte'
    test_labels = f'{path}/t10k-labels-idx1-ubyte'
    
    save_train =f'{path}/MNIST_data/train_images/'
    save_test =f'{path}/MNIST_data/test_images/'
    
    if not os.path.exists(save_train):
        os.makedirs(save_train)
    if not os.path.exists(save_test):
        os.makedirs(save_test)
 
    trans(test_images, test_labels, save_test)
    trans(train_images, train_labels, save_train)