Python Tkinter multiprocessing progress

2019-08-01 01:36发布

问题:

For my work, I frequently have to collect reasonably large datasets from a MySQL database, e.g. several parameters for several locations, and store that data in a CSV file per location. For this, I've written a small GUI. Since, the data has to be stored per location, I thought I'd take advantages of my 8-thread CPU and use the multiprocessing package to query the database per location. This works just fine, but I also want to keep track of how far the data retrieval and file writing is.

The trick with using multiprocessing together with Tkinter was to put the function that is called in the multiprocessing outside of the GUI class, but how do I get information from that function back into the class?

My code so far:

from multiprocessing import Process
from tkinter import *
import os
import pandas
import pymysql


class App:

    def __init__(self, master):
        self.master = master
        self.stations = None
        self.variables = None
        self.startdtg = None
        self.enddtg = None
        self.outputlocation = "C:/Users/test"
        self.processes = []

        Label(master, text="Locations:").grid(row=0, column=0, sticky=W, columnspan=3)
        self.locationEntry = Entry(master)
        self.locationEntry.grid(row=0, column=1, sticky=EW, columnspan=3)

        Label(master, text="Enter variables:").grid(row=1, column=0, sticky=W)
        self.varEntry = Entry(master)
        self.varEntry.grid(row=1, column=1, sticky=EW, columnspan=3)

        Label(master, text="Start DTG:").grid(row=2, column=0, sticky=W)
        self.startEntry = Entry(master)
        self.startEntry.grid(row=2, column=1, sticky=EW)

        Label(master, text="End DTG:").grid(row=2, column=2, sticky=W)
        self.endEntry = Entry(master)
        self.endEntry.grid(row=2, column=3, sticky=EW)

        Label(master, text="Output location:").grid(row=3, column=0, sticky=W)
        self.outputEntry = Entry(master)
        self.outputEntry.grid(row=3, column=1, columnspan=2, sticky=EW)

        self.startButton = Button(master, text="Start", command=self.get_data)
        self.startButton.grid(row=5, column=1, sticky=EW)

    def get_data(self):
        self.update_variables()
        self.collect_data()

    def update_variables(self):
        self.stations = [station.strip() for station in self.locationEntry.get().split(",")]
        self.variables = [variable.strip() for variable in self.varEntry.get().split(",")]
        self.startdtg = self.startEntry.get()
        self.enddtg = self.endEntry.get()
        self.outputlocation = os.path.join(self.outputlocation, self.outputEntry.get())

    def collect_data(self):
        for station in self.stations:
            p = Process(target=query_database, args=(station, self.variables, self.startdtg, self.enddtg, self.outputlocation))
            self.processes.append(p)
            p.start()


def query_database(station, variables, startdtg, enddtg, outputlocation):
    """"Function that collects and writes data to local drive"""


if __name__ == "__main__":
    root = Tk()
    app = App(root)
    root.mainloop()

To be clear: this code works fine. It produces this GUI:

What I want, is a GUI like this:

With the part showing the progress of the query_database function, meaning it has to update when a step in that function has been completed.

How would I approach this? Also, feel free to give any comments about my coding, I'm still learning the basics of GUIs and setting up classes.

回答1:

Let's sum what was said in comments:

  1. To get information from function, which executed in another process, you should communicate with function's process with either a Queue or a Pipe.
  2. While you have a channel to communicate with - keep checking continiously for messages via self-scheduling after.
  3. Keep in mind that idea to pass a Label or anything tk-related to that process isn't an option, since it isn't a thread/process-safe practice.

After all of this you should come with something similar to this approach:

try:
    import Tkinter as tk              # Python 2
    import ttk
    import Queue as queue
except ImportError:
    import tkinter as tk              # Python 3
    import tkinter.ttk as ttk
    import queue

import multiprocessing as mp
import time


class App(tk.Tk):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.minsize(width=400, height=25)

        self.label = tk.Label(self, text='Waiting for "work"')
        self.label.pack(expand=True, fill='both')

        self.progressbar = ttk.Progressbar(self, orient='horizontal', value=0, maximum=3, mode='determinate')
        self.progressbar.pack(fill='x')

        self.button = tk.Button(self, text='Start', command=self.start_work)
        self.button.pack(fill='x')

        self.queue = mp.Queue()
        self.process = None

    def start_work(self):
        self.process = mp.Process(target=work, args=(self.queue,))
        self.button.configure(state='disabled')
        self.process.start()
        self.periodic_call()

    def periodic_call(self):
        #   check a queue once
        self.check_queue()

        #   if exit code is None - process is on the run and we should re-schedule check
        if self.process.exitcode is None:
            self.after(100, self.periodic_call)
        #   things are executed
        else:
            self.process.join()
            self.button.configure(state='normal')
            self.label.configure(text='Waiting for "work"')
            self.progressbar.configure(value=0)

    def check_queue(self):
        #   common check of the queue
        while self.queue.qsize():
            try:
                self.label.configure(text=self.queue.get(0))
                self.progressbar.configure(value=self.progressbar['value'] + 1)
            except queue.Empty:
                pass


def work(working_queue):
    for type_of_work in ['Combobulationg Discombobulator', 'Pointing towards space',
                         'Calculating Ultimate Answer']:
        working_queue.put(type_of_work)
        time.sleep(1.5)


if __name__ == '__main__':
    app = App()
    app.mainloop()