train-images.idx3-ubyte 파일은
http://yann.lecun.com/exdb/mnist/에서 받으면 되고
보통은 만들어진 파서 사용하면 되지만 파이썬 공부도 할겸. 직접 만들어서 처리.
# train-images.idx3-ubyte
from PIL import Image
f = open('train-images.idx3-ubyte', 'rb')
#[offset] [type] [value] [description]
#0000 32 bit integer 0x00000803(2051) magic number
#0004 32 bit integer 60000 number of images
#0008 32 bit integer 28 number of rows
#0012 32 bit integer 28 number of columns
#0016 unsigned byte ?? pixel
#0017 unsigned byte ?? pixel
#........
#xxxx unsigned byte ?? pixel
#Pixels are organized row-wise. Pixel values are 0
def read32(bs):
data = bs.read(4)
return int.from_bytes(data, byteorder='big', signed=False)
magic = read32(f)
imageCount = read32(f)
imageRow = read32(f)
imageCol = read32(f)
for i in range(0,imageCount):
# 루프를 돌면서 28x28개수만큼 픽셀을 읽는다.
imageBuffer = f.read(28*28)
image = Image.frombytes('L', (28, 28), imageBuffer, 'raw')
image.save('extracted/' + str(i) + '.jpg', 'JPEG')
print('[DONE] Extracted all the images!')
f.close()