Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Serialization of single Numpy array

pip install numpy
pip install scipy
pip install h5py
pip install protobuf
import os
import sys
import json
import numpy as np
import h5py
import scipy.io
import pickle

def main():
    size = (2, 4)
    if len(sys.argv) == 3:
        size = (int(sys.argv[1]), int(sys.argv[2]))
    print(f"size: {size}\n")
    original = np.random.random(size)
    #print(original)

    try_json(original)
    try_pickle(original)
    try_matlab(original)
    try_hdf5(original)

def try_json(original):
    with open('demo.json', 'w') as fh:
        json.dump(original, fh, default=lambda obj: obj.tolist())
    with open('demo.json') as fh:
        loaded = np.array(json.load(fh)) #, default=lambda obj: obj.tolist())
        #print(loaded)
    assert np.array_equal(original, loaded)
    print(f"json:   {os.path.getsize('demo.json'):7}")

def try_pickle(original):
    with open('demo.pickle', 'wb') as fh:
        pickle.dump(original, fh, pickle.HIGHEST_PROTOCOL)
    with open('demo.pickle', 'rb') as fh:
        loaded = pickle.load(fh)
    assert np.array_equal(original, loaded)
    print(f"pickle: {os.path.getsize('demo.pickle'):7}")

def try_matlab(original):
    scipy.io.savemat('demo.mat', {'data': original})
    mat = scipy.io.loadmat('demo.mat')
    loaded = mat['data']
    assert np.array_equal(original, loaded)
    print(f"matlab: {os.path.getsize('demo.mat'):7}")


def try_hdf5(original):
    with h5py.File('demo.h5', 'w') as hdf:
        hdf['data'] = original
    with h5py.File('demo.h5', 'r') as hdf:
        loaded = hdf['data'][:] # [:] is needed to copy the content
    assert np.array_equal(original, loaded)
    print(f"hdf5:   {os.path.getsize('demo.h5'):7}")


main()
  • try to gzip the JSON file and maybe also the others and see the sizes.