I'm writing a tool in pygtk which needs to deal with recursively parsing large directories, and adding resulting matched files to a list. This process obviously causes the user interface to hang up, and so I have attempted to use the multiprocessing library.
Some more background before I ask for some solutions: - the program has two main Classes, a controller class that does all the intensive work and speaks to the UI, and a Model class, for dealing with all the data the tool needs.
import sys
import os
import pygtk
import fnmatch
from multiprocessing import Pool
pygtk.require("2.0")
#try:
from gi.repository import Gtk
from gi.repository import GObject
#except:
# print("GTK Not Availible")
# sys.exit(1)
class Controller(object):
def __init__(self,builder,model):
self.builder=builder
self.model=model
def btn_pass_clicked(self, *args,**kwargs):
print "it's working!, its woooooorkkinnnnggg!"
spinnywheel= self.builder.get_object("activitySpinner")
spinnywheel.start()
def btn_fail_clicked(self, *args, **kwargs):
print "stopping spinnywheel!"
spinnywheel=self.builder.get_object("activitySpinner")
spinnywheel.stop()
def quit(self,*args,**kwargs):
print "iamquit"
Gtk.main_quit()
def file_menu_open(self,*args,**kwargs):
print "file->open"
self.builder.get_object("openDialogue").show()
def opendialogue_btnOpen_clicked(self,*args,**kwargs):
rootdir = os.path.expanduser(self.builder.get_object("openDialogue_entryBox").get_text())
self.builder.get_object("openDialogue").hide()
self.builder.get_object("openDialogue_entryBox").set_text("")
if os.path.exists(rootdir):
self.builder.get_object("activitySpinner").start()
print "pooling workers and walking ",rootdir
p = Pool(None)
p.apply_async(self.walk_for_files,rootdir,None,self.finished_recurse)
else:
print "Path does not exist!"
def walk_for_files(self,rootdir):
for root,dirs,files in os.walk(rootdir):
for extension in ['c','cpp']:
for filename in fnmatch.filter(files,'*.'+extension):
self.model.add_single_file(os.path.join(root,filename))
def finished_recurse(self,*args,**kargs):
print "workers finished parsing dirs!"
self.builder.get_object("activitySpinner").stop()
class Model(object):
def __init__(self):
self.fileList=[]
def add_single_file(self,file):
self.fileList.append(file)
print "added ",file
class Scrutiny(object):
def __init__(self):
builder = Gtk.Builder()
builder.add_from_file("scrutinydev.ui")
model_object=Model()
controller_object=Controller(builder,model_object)
builder.connect_signals(controller_object)
builder.get_object("windowMain").show()
builder.get_object("listView")
GObject.threads_init()
Gtk.main()
if __name__ == "__main__":
scrutiny = Scrutiny()
now, heres my problem.
As you can see, the workers spawned with pool() need to execute the callback finish_recurse so that I can stop the GtkSpinner amongst other UI work.
With the code in its current state, I get a pickling error,
PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
I understand that this is because I'm unable to serialize the callback, and would like suggestions for workarounds/fixes in order to achieve what I need.
I don't know well GTK, but I think your problem is more about pickling than about multiprocessing.
The __getstate__ and __setstate__ methods of the pickle module lets you customize the pickling process for any object.
Here is a trivial example which show how it works :
Of course, it remains the responsibility of the developer to represent and properly restore the state of objects.