Pickling error when multithreading - design or cod

2019-07-30 16:41发布

问题:

I'm writing a tool in pygtk which needs to deal with recursively parsing large directories, and adding resulting matched files to a list. This process obviously causes the user interface to hang up, and so I have attempted to use the multiprocessing library.

Some more background before I ask for some solutions: - the program has two main Classes, a controller class that does all the intensive work and speaks to the UI, and a Model class, for dealing with all the data the tool needs.

import sys
import os
import pygtk  
import fnmatch
from multiprocessing import Pool
pygtk.require("2.0")  

#try:  
from gi.repository import Gtk
from gi.repository import GObject
#except:  
#   print("GTK Not Availible")
#   sys.exit(1)


class Controller(object):
    def __init__(self,builder,model):
        self.builder=builder
        self.model=model
    def btn_pass_clicked(self, *args,**kwargs):
        print "it's working!, its woooooorkkinnnnggg!"
        spinnywheel= self.builder.get_object("activitySpinner")
        spinnywheel.start()
    def btn_fail_clicked(self, *args, **kwargs):
        print "stopping spinnywheel!"
        spinnywheel=self.builder.get_object("activitySpinner")
        spinnywheel.stop()
    def quit(self,*args,**kwargs):
        print "iamquit"
        Gtk.main_quit()
    def file_menu_open(self,*args,**kwargs):
        print "file->open"
        self.builder.get_object("openDialogue").show()
    def opendialogue_btnOpen_clicked(self,*args,**kwargs):
        rootdir = os.path.expanduser(self.builder.get_object("openDialogue_entryBox").get_text())
        self.builder.get_object("openDialogue").hide()
        self.builder.get_object("openDialogue_entryBox").set_text("")
        if os.path.exists(rootdir):
            self.builder.get_object("activitySpinner").start()
            print "pooling workers and walking ",rootdir
            p = Pool(None)
            p.apply_async(self.walk_for_files,rootdir,None,self.finished_recurse)
        else:
            print "Path does not exist!"


    def walk_for_files(self,rootdir):
            for root,dirs,files in os.walk(rootdir):
                    for extension in ['c','cpp']:
                        for filename in fnmatch.filter(files,'*.'+extension):
                            self.model.add_single_file(os.path.join(root,filename))

    def finished_recurse(self,*args,**kargs):
        print "workers finished parsing dirs!"
        self.builder.get_object("activitySpinner").stop()


class Model(object):
    def __init__(self):
        self.fileList=[]

    def add_single_file(self,file):
        self.fileList.append(file)
        print "added ",file




class Scrutiny(object):
    def __init__(self):
        builder = Gtk.Builder()
        builder.add_from_file("scrutinydev.ui")
        model_object=Model()
        controller_object=Controller(builder,model_object)
        builder.connect_signals(controller_object)
        builder.get_object("windowMain").show()
        builder.get_object("listView")
        GObject.threads_init()
        Gtk.main()



if __name__ == "__main__":
    scrutiny = Scrutiny()

now, heres my problem.

As you can see, the workers spawned with pool() need to execute the callback finish_recurse so that I can stop the GtkSpinner amongst other UI work.

With the code in its current state, I get a pickling error,

PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed

I understand that this is because I'm unable to serialize the callback, and would like suggestions for workarounds/fixes in order to achieve what I need.

回答1:

I don't know well GTK, but I think your problem is more about pickling than about multiprocessing.

The __getstate__ and __setstate__ methods of the pickle module lets you customize the pickling process for any object.

Here is a trivial example which show how it works :

from pickle import dumps, loads


class NotPickable(object):
    def __init__(self, x):
        self.attr = x

ffile = open('/tmp/filesarenotpickable', 'r+w')    
o = NotPickable(ffile)
dumps(o)
# =>  TypeError: can't pickle file objects

class Pickable(NotPickable):
    attr = open('/tmp/a_file_on_an_other_system', 'r+w')

    def __getstate__(self):
        return self.attr.read()

    def __setstate__(self, state):
        self.attr.write(state)

o = Pickable(ffile)                                            
dumps(o)
# OUT: 'ccopy_reg\n_reconstructor\np0\n(c__main__\nPickable\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n.'                        

o2 = loads(dumps(o))                                           
o2.attr
# OUT: <open file '/tmp/a_file_on_an_other_system', mode 'r+w' at 0x18ad4b0>

Of course, it remains the responsibility of the developer to represent and properly restore the state of objects.