Python - appending to a pickled list

2019-04-22 07:18发布

问题:

I'm struggling to append a list in a pickled file. This is the code:

#saving high scores to a pickled file

import pickle

first_name = input("Please enter your name:")
score = input("Please enter your score:")

scores = []
high_scores = first_name, score
scores.append(high_scores)

file = open("high_scores.dat", "ab")
pickle.dump(scores, file)
file.close()

file = open("high_scores.dat", "rb")
scores = pickle.load(file)
print(scores)
file.close()

The first time I run the code, it prints the name and score.

The second time I run the code, it prints the 2 names and 2 scores.

The third time I run the code, it prints the first name and score, but it overwrites the second name and score with the third name and score I entered. I just want it to keep adding the names and scores. I don't understand why it is saving the first name and overwriting the second one.

回答1:

You need to pull the list from your database (i.e. your pickle file) first before appending to it.

import pickle
import os

high_scores_filename = 'high_scores.dat'

scores = []

# first time you run this, "high_scores.dat" won't exist
#   so we need to check for its existence before we load 
#   our "database"
if os.path.exists(high_scores_filename):
    # "with" statements are very handy for opening files. 
    with open(high_scores_filename,'rb') as rfp: 
        scores = pickle.load(rfp)
    # Notice that there's no "rfp.close()"
    #   ... the "with" clause calls close() automatically! 

first_name = input("Please enter your name:")
score = input("Please enter your score:")

high_scores = first_name, score
scores.append(high_scores)

# Now we "sync" our database
with open(high_scores_filename,'wb') as wfp:
    pickle.dump(scores, wfp)

# Re-load our database
with open(high_scores_filename,'rb') as rfp:
    scores = pickle.load(rfp)

print(scores)


回答2:

If you want to write and read to the pickled file, you can call dump multiple times for each entry in your list. Each time you dump, you append a score to the pickled file, and each time you load you read the next score.

>>> import pickle as dill
>>> 
>>> scores = [('joe', 1), ('bill', 2), ('betty', 100)]
>>> nscores = len(scores)
>>> 
>>> with open('high.pkl', 'ab') as f:
…   _ = [dill.dump(score, f) for score in scores]
... 
>>> 
>>> with open('high.pkl', 'ab') as f:
...   dill.dump(('mary', 1000), f)
... 
>>> # we added a score on the fly, so load nscores+1
>>> with open('high.pkl', 'rb') as f:
...     _scores = [dill.load(f) for i in range(nscores + 1)]
... 
>>> _scores
[('joe', 1), ('bill', 2), ('betty', 100), ('mary', 1000)]
>>>

The reason your code was failing most likely is that you are replacing the original scores with the unpickled list of scores. So if there were any new scores added, you'd blow them away in memory.

>>> scores
[('joe', 1), ('bill', 2), ('betty', 100)]
>>> f = open('high.pkl', 'wb')
>>> dill.dump(scores, f)
>>> f.close()
>>> 
>>> scores.append(('mary',1000))
>>> scores
[('joe', 1), ('bill', 2), ('betty', 100), ('mary', 1000)]
>>> 
>>> f = open('high.pkl', 'rb')
>>> _scores = dill.load(f)
>>> f.close()
>>> _scores
[('joe', 1), ('bill', 2), ('betty', 100)]
>>> blow away the old scores list, by pointing to _scores
>>> scores = _scores
>>> scores
[('joe', 1), ('bill', 2), ('betty', 100)]

So it's more of a python name reference issue for scores, than it is a pickle issue. Pickle is just instantiating a new list and calling it scores (in your case), and then it garbage collects whatever thing scores was pointed to before that.

>>> scores = 1
>>> f = open('high.pkl', 'rb')
>>> scores = dill.load(f)
>>> f.close()
>>> scores
[('joe', 1), ('bill', 2), ('betty', 100)]


回答3:

Doesnt actually answer the question, but if anyone would like to add a single item at a time to a pickle, you can do it by...

import pickle
import os

high_scores_filename = '/home/ubuntu-dev/Desktop/delete/high_scores.dat'

scores = []

# first time you run this, "high_scores.dat" won't exist
#   so we need to check for its existence before we load
#   our "database"
if os.path.exists(high_scores_filename):
    # "with" statements are very handy for opening files.
    with open(high_scores_filename,'rb') as rfp:
        scores = pickle.load(rfp)
    # Notice that there's no "rfp.close()"
    #   ... the "with" clause calls close() automatically!

names = ["mike", "bob", "joe"]

for name in names:
    high_score = name
    print(name)
    scores.append(high_score)

# Now we "sync" our database
with open(high_scores_filename,'wb') as wfp:
    pickle.dump(scores, wfp)

# Re-load our database
with open(high_scores_filename,'rb') as rfp:
    scores = pickle.load(rfp)

print(scores)


回答4:

Don't use pickle but use h5py which also solves your purpose

with h5py.File('.\PreprocessedData.h5', 'a') as hf:
    hf["X_train"].resize((hf["X_train"].shape[0] + X_train_data.shape[0]), axis = 0)
    hf["X_train"][-X_train_data.shape[0]:] = X_train_data

    hf["X_test"].resize((hf["X_test"].shape[0] + X_test_data.shape[0]), axis = 0)
    hf["X_test"][-X_test_data.shape[0]:] = X_test_data


    hf["Y_train"].resize((hf["Y_train"].shape[0] + Y_train_data.shape[0]), axis = 0)
    hf["Y_train"][-Y_train_data.shape[0]:] = Y_train_data

    hf["Y_test"].resize((hf["Y_test"].shape[0] + Y_test_data.shape[0]), axis = 0)
    hf["Y_test"][-Y_test_data.shape[0]:] = Y_test_data

source