I'm trying to write some numpy arrays in python to lmdb:
import numpy as np
import lmdb
def write_lmdb(filename):
lmdb_env = lmdb.open(filename, map_size=int(1e9))
lmdb_txn = lmdb_env.begin(write=True)
X= np.array([[1.0, 0.0], [0.1, 2.0]])
y= np.array([1.4, 2.1])
#Put first pair of arrays
lmdb_txn.put('X', X)
lmdb_txn.put('y', y)
#Put second pair of arrays
lmdb_txn.put('X', X+1.6)
lmdb_txn.put('y', y+1.2)
def read_lmdb(filename):
lmdb_env = lmdb.open(filename)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
print type(key)
print type(value)
print key
print value
write_lmdb('temp.db')
read_lmdb('temp.db')
but read_lmdb
prints nothing, what is the proper way to write numpy arrays to lmdb?
Update: Based on @frankyjuang answer I manage to do it, howewer not in very elegant way: multidimensional array lose it's shape, each array should have it's own name.
import numpy as np
import lmdb
def write_lmdb(filename):
print 'Write lmdb'
lmdb_env = lmdb.open(filename, map_size=int(1e9))
n_samples= 2
X= (255*np.random.rand(n_samples,3,4,3)).astype(np.uint8)
y= np.random.rand(n_samples).astype(np.float32)
for i in range(n_samples):
with lmdb_env.begin(write=True) as lmdb_txn:
lmdb_txn.put('X_'+str(i), X)
lmdb_txn.put('y_'+str(i), y)
print 'X:',X
print 'y:',y
def read_lmdb(filename):
print 'Read lmdb'
lmdb_env = lmdb.open(filename)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
n_samples=0
with lmdb_env.begin() as lmdb_txn:
with lmdb_txn.cursor() as lmdb_cursor:
for key, value in lmdb_cursor:
print key
if('X' in key):
print np.fromstring(value, dtype=np.uint8)
if('y' in key):
print np.fromstring(value, dtype=np.float32)
n_samples=n_samples+1
print 'n_samples',n_samples
write_lmdb('temp.db')
read_lmdb('temp.db')
Test script output should be something like:
Write lmdb
X: [[[[ 48 224 119]
[ 76 87 174]
[ 14 88 183]
[ 76 234 56]]
[[234 223 65]
[ 63 85 175]
[184 252 125]
[100 7 225]]
[[134 159 41]
[ 2 146 221]
[ 99 74 225]
[169 57 59]]]
[[[100 202 3]
[ 88 204 131]
[ 96 238 243]
[103 58 30]]
[[157 125 107]
[238 207 99]
[102 220 64]
[ 27 240 33]]
[[ 74 93 131]
[107 88 206]
[ 55 86 35]
[212 235 187]]]]
y: [ 0.80826157 0.01407595]
X: [[[[ 48 224 119]
[ 76 87 174]
[ 14 88 183]
[ 76 234 56]]
[[234 223 65]
[ 63 85 175]
[184 252 125]
[100 7 225]]
[[134 159 41]
[ 2 146 221]
[ 99 74 225]
[169 57 59]]]
[[[100 202 3]
[ 88 204 131]
[ 96 238 243]
[103 58 30]]
[[157 125 107]
[238 207 99]
[102 220 64]
[ 27 240 33]]
[[ 74 93 131]
[107 88 206]
[ 55 86 35]
[212 235 187]]]]
y: [ 0.80826157 0.01407595]
Read lmdb
X_0
[ 48 224 119 76 87 174 14 88 183 76 234 56 234 223 65 63 85 175
184 252 125 100 7 225 134 159 41 2 146 221 99 74 225 169 57 59
100 202 3 88 204 131 96 238 243 103 58 30 157 125 107 238 207 99
102 220 64 27 240 33 74 93 131 107 88 206 55 86 35 212 235 187]
X_1
[ 48 224 119 76 87 174 14 88 183 76 234 56 234 223 65 63 85 175
184 252 125 100 7 225 134 159 41 2 146 221 99 74 225 169 57 59
100 202 3 88 204 131 96 238 243 103 58 30 157 125 107 238 207 99
102 220 64 27 240 33 74 93 131 107 88 206 55 86 35 212 235 187]
y_0
[ 0.80826157 0.01407595]
y_1
[ 0.80826157 0.01407595]
n_samples 4
Wrap your transactions under
with
. And remember to convert the value from bytes (string) back to numpy array usingnp.fromstring
.To be honest, it is not a good idea to store numpy array in lmdb since conversion from array to bytes back to array will lose some informations (ex. shape). You can try storing a dict of numpy arrays using pickle.