numpy.memmap map to save file

2020-07-16 05:47发布

I'm trying to create random matrix and save it in binary file using numpy.save

Then I try to map this file using numpy.memmap, but it seems it maps it wrong.

How to fix it?

It seems it read .npy header and I need to scip some bytes from begining.

rows=6
cols=4

def create_matrix(rows,cols):
    data = (np.random.rand(rows,cols)*100).astype('uint8') #type for image [0 255] int8?
    return data

def save_matrix(filename, data):
    np.save(filename, data)

def load_matrix(filename):
    data= np.load(filename)
    return data

def test_mult_ram():
    A= create_matrix(rows,cols)
    A[1][2]= 42
    save_matrix("A.npy", A)
    A= load_matrix("A.npy")
    print A
    B= create_matrix(cols,rows)
    save_matrix("B.npy", B)
    B= load_matrix("B.npy")
    print B




fA = np.memmap('A.npy', dtype='uint8', mode='r', shape=(rows,cols))
fB = np.memmap('B.npy', dtype='uint8', mode='r', shape=(cols,rows))
print fA
print fB

UPDATE:

I just found that already np.lib.format.open_memmap function exist.

usage: a = np.lib.format.open_memmap('A.npy', dtype='uint8', mode='r+')

2条回答
老娘就宠你
2楼-- · 2020-07-16 06:13

The npy format has a header that must be skipped when using np.memmap. It starts with an 6-byte magic string, '\x93NUMPY', 2 byte version number, followed by 2 bytes header length, followed by header data.

So if you open the file, find the header length, then you can compute the offset to pass to np.memmap:

def load_npy_to_memmap(filename, dtype, shape):
    # npy format is documented here
    # https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt
    with open(filename, 'r') as f:
        # skip magic string \x93NUMPY + 2 bytes major/minor version number
        # + 2 bytes little-endian unsigned short int
        junk, header_len = struct.unpack('<8sh', f.read(10))

    data= np.memmap(filename, dtype=dtype, shape=shape, offset=6+2+2+header_len)
    return data

import struct
import numpy as np
np.random.seed(1)
rows = 6
cols = 4

def create_matrix(rows, cols):
    data = (np.random.rand(
        rows, cols) * 100).astype('uint8')  # type for image [0 255] int8?
    return data

def save_matrix(filename, data):
    np.save(filename, data)

def load_matrix(filename):
    data= np.load(filename)
    return data

def load_npy_to_memmap(filename, dtype, shape):
    # npy format is documented here
    # https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt
    with open(filename, 'r') as f:
        # skip magic string \x93NUMPY + 2 bytes major/minor version number
        # + 2 bytes little-endian unsigned short int
        junk, header_len = struct.unpack('<8sh', f.read(10))

    data= np.memmap(filename, dtype=dtype, shape=shape, offset=6+2+2+header_len)
    return data

def test_mult_ram():
    A = create_matrix(rows, cols)
    A[1][2] = 42
    save_matrix("A.npy", A)
    A = load_matrix("A.npy")
    print A
    B = create_matrix(cols, rows)
    save_matrix("B.npy", B)
    B = load_matrix("B.npy")
    print B

    fA = load_npy_to_memmap('A.npy', dtype='uint8', shape=(rows, cols))
    fB = load_npy_to_memmap('B.npy', dtype='uint8', shape=(cols, rows))
    print fA
    print fB
    np.testing.assert_equal(A, fA)
    np.testing.assert_equal(B, fB)

test_mult_ram()
查看更多
啃猪蹄的小仙女
3楼-- · 2020-07-16 06:25

If your goal is to open arrays you saved with np.save as memmaps, then you can just use np.load with the option mmap_mode:

fA = np.load('A.npy', mmap_mode='r')
fB = np.load('B.npy', mmap_mode='r')

This way you actually benefit from the header stored in the .npy files, in the sense that it keeps track of the shape and dtype of the array.

查看更多
登录 后发表回答