Non-blocking read from multiple subprocesses (Pyth

2019-04-13 16:30发布

问题:

I currently have the following code, inspired by the answer to Non-blocking read on a subprocess.PIPE in python. It seems to work correctly, outputting the lines to the screen, however it only does so for the first created process, all other processes (which are running) don't get any data printed.

How do I make sure I can read data (in a non-blocking way) from multiple subprocesses?

#!/usr/bin/env python
import sys
import os
import subprocess
from threading import Thread
from Queue import Queue, Empty

STREAMER_URL = 'rtmp://127.0.0.1/app'
RTMPDUMP_EXECUTEABLE = 'rtmpdump'

def enqueue_output(out, queue):
    for line in iter(lambda: out.read(16), b''):
        queue.put(line)
    out.close()

def download_rtmp(media, filename):
  # Create parameters
  args=[RTMPDUMP_EXECUTEABLE]
  args.extend(['-r',media[0],'-y',media[1]])

  # Set output file
  OUTPUT_FILE = filename
  args.extend(['-o',OUTPUT_FILE])

  # Send rtmpdump any extra arguments
  if len(sys.argv) > 2:
    args.extend(sys.argv[2:])

  # Execute rtmpdump
  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT )
  q = Queue()
  t = Thread(target=enqueue_output, args=(p.stdout, q))
  t.daemon = True # thread dies with the program
  t.start()
  return (p, q, t)

def main():

  # actual data is from somewhere else on the internet
  for (name, playpath, filepath) in data:
    print 'Spawning %s download...' % name
    PROCESSES.append(download_rtmp((STREAMER_URL, playpath), filepath))

  BUFS = dict()

  # infinite loop checking if all processes have finished
  while True:
    done = True
    for (process, queue, thread) in PROCESSES:
      try:
        readdata = queue.get_nowait()
      except Empty:
        pass
      else:
        if process in BUFS:
          readdata = BUFS[process] + readdata
        lines = readdata.split('\n')
        if len(lines) > 1:
          for line in lines[:-1]:
            print 'Line: %s' % line
        if '\r' in lines[-1]:
          lines = readdata.split('\r')
          for line in lines[:-1]:
            print 'Line2: %s' % line
        BUFS[process] = lines[-1]

      process.poll()

      if process.returncode is None:
        done = False
        break
    if done:
      break

  print "Done"

if __name__ == "__main__":
    main()

回答1:

I haven't figured the whole thing out, but the break in if process.returncode is None: means that you won't look at other process queues until the first process exits completely. And I'm not sure where you got that multi-queue polling thing from, but its absolutely horrible.

This problem is best solved with a single return queue used by all of the worker threads. The workers pass tuples of (process, line) and the main thread does a blocking wait for data from all of the workers.

This is pseudocode really, but it would look like:

STREAMER_URL = 'rtmp://127.0.0.1/app'
RTMPDUMP_EXECUTEABLE = 'rtmpdump'

def enqueue_output(process, queue):
    """read process stdout in small chunks and queue for processing"""
    for line in iter(lambda: out.read(16), b''):
        queue.put((process, line))
    process.wait()
    queue.put((process, None))

def download_rtmp(media, filename):
  # Create parameters
  args=[RTMPDUMP_EXECUTEABLE, '-r', media[0], '-y', media[1], '-o', filename]

  # Send rtmpdump any extra arguments
  # if len(sys.argv) > 2: no need for the if in list comprehension
  args.extend(sys.argv[2:])

  # Execute rtmpdump
  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT )
  t = Thread(target=enqueue_output, args=(p, return_q))
  t.daemon = True # thread dies with the program
  t.start()
  return (p, t)

def main():
  THREADS = []
  BUFS = dict()

  # actual data is from somewhere else on the internet
  for (name, playpath, filepath) in data:
    print 'Spawning %s download...' % name
    process, thread = download_rtmp((STREAMER_URL, playpath), filepath)
    BUFS[process] = ''
    THREADS.append(thread)

  # all processes write to return_q and we process them here
  while BUFS:
    process, line = return_q.get()
    readdata = BUFS[process] + (line or '')
    if line is None:
        del BUFS[process]
    # I didn't try to figure this part out... basically, when line is
    # None, process is removed from BUFS so you know your end condition
    # and the following stuff should do its final processing.
    lines = readdata.split('\n')
    if len(lines) > 1:
      for line in lines[:-1]:
        print 'Line: %s' % line
    if '\r' in lines[-1]:
      lines = readdata.split('\r')
      for line in lines[:-1]:
        print 'Line2: %s' % line
    if line is not None:
        BUFS[process] = lines[-1]