# coding=utf8 # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Collection of subprocess wrapper functions. In theory you shouldn't need anything else in subprocess, or this module failed. """ from __future__ import with_statement import cStringIO import errno import logging import os import Queue import subprocess import sys import time import threading # Constants forwarded from subprocess. PIPE = subprocess.PIPE STDOUT = subprocess.STDOUT # Sends stdout or stderr to os.devnull. VOID = object() # Error code when a process was killed because it timed out. TIMED_OUT = -2001 # Globals. # Set to True if you somehow need to disable this hack. SUBPROCESS_CLEANUP_HACKED = False class CalledProcessError(subprocess.CalledProcessError): """Augment the standard exception with more data.""" def __init__(self, returncode, cmd, cwd, stdout, stderr): super(CalledProcessError, self).__init__(returncode, cmd) self.stdout = stdout self.stderr = stderr self.cwd = cwd def __str__(self): out = 'Command %s returned non-zero exit status %s' % ( ' '.join(self.cmd), self.returncode) if self.cwd: out += ' in ' + self.cwd return '\n'.join(filter(None, (out, self.stdout, self.stderr))) class CygwinRebaseError(CalledProcessError): """Occurs when cygwin's fork() emulation fails due to rebased dll.""" ## Utility functions def kill_pid(pid): """Kills a process by its process id.""" try: # Unable to import 'module' # pylint: disable=E1101,F0401 import signal return os.kill(pid, signal.SIGKILL) except ImportError: pass def kill_win(process): """Kills a process with its windows handle. Has no effect on other platforms. """ try: # Unable to import 'module' # pylint: disable=F0401 import win32process # Access to a protected member _handle of a client class # pylint: disable=W0212 return win32process.TerminateProcess(process._handle, -1) except ImportError: pass def add_kill(): """Adds kill() method to subprocess.Popen for python <2.6""" if hasattr(subprocess.Popen, 'kill'): return if sys.platform == 'win32': subprocess.Popen.kill = kill_win else: subprocess.Popen.kill = lambda process: kill_pid(process.pid) def hack_subprocess(): """subprocess functions may throw exceptions when used in multiple threads. See http://bugs.python.org/issue1731717 for more information. """ global SUBPROCESS_CLEANUP_HACKED if not SUBPROCESS_CLEANUP_HACKED and threading.activeCount() != 1: # Only hack if there is ever multiple threads. # There is no point to leak with only one thread. subprocess._cleanup = lambda: None SUBPROCESS_CLEANUP_HACKED = True def get_english_env(env): """Forces LANG and/or LANGUAGE to be English. Forces encoding to utf-8 for subprocesses. Returns None if it is unnecessary. """ if sys.platform == 'win32': return None env = env or os.environ # Test if it is necessary at all. is_english = lambda name: env.get(name, 'en').startswith('en') if is_english('LANG') and is_english('LANGUAGE'): return None # Requires modifications. env = env.copy() def fix_lang(name): if not is_english(name): env[name] = 'en_US.UTF-8' fix_lang('LANG') fix_lang('LANGUAGE') return env def Popen(args, **kwargs): """Wraps subprocess.Popen() with various workarounds. Returns a subprocess.Popen object. - Forces English output since it's easier to parse the stdout if it is always in English. - Sets shell=True on windows by default. You can override this by forcing shell parameter to a value. - Adds support for VOID to not buffer when not needed. Note: Popen() can throw OSError when cwd or args[0] doesn't exist. Translate exceptions generated by cygwin when it fails trying to emulate fork(). """ # Make sure we hack subprocess if necessary. hack_subprocess() add_kill() env = get_english_env(kwargs.get('env')) if env: kwargs['env'] = env if kwargs.get('shell') is None: # *Sigh*: Windows needs shell=True, or else it won't search %PATH% for the # executable, but shell=True makes subprocess on Linux fail when it's called # with a list because it only tries to execute the first item in the list. kwargs['shell'] = bool(sys.platform=='win32') if isinstance(args, basestring): tmp_str = args elif isinstance(args, (list, tuple)): tmp_str = ' '.join(args) else: raise CalledProcessError(None, args, kwargs.get('cwd'), None, None) if kwargs.get('cwd', None): tmp_str += '; cwd=%s' % kwargs['cwd'] logging.debug(tmp_str) def fix(stream): if kwargs.get(stream) in (VOID, os.devnull): # Replaces VOID with handle to /dev/null. # Create a temporary file to workaround python's deadlock. # http://docs.python.org/library/subprocess.html#subprocess.Popen.wait # When the pipe fills up, it will deadlock this process. Using a real file # works around that issue. kwargs[stream] = open(os.devnull, 'w') if callable(kwargs.get(stream)): # Callable stdout/stderr should be used only with call() wrappers. kwargs[stream] = PIPE fix('stdout') fix('stderr') try: return subprocess.Popen(args, **kwargs) except OSError, e: if e.errno == errno.EAGAIN and sys.platform == 'cygwin': # Convert fork() emulation failure into a CygwinRebaseError(). raise CygwinRebaseError( e.errno, args, kwargs.get('cwd'), None, 'Visit ' 'http://code.google.com/p/chromium/wiki/CygwinDllRemappingFailure to ' 'learn how to fix this error; you need to rebase your cygwin dlls') # Popen() can throw OSError when cwd or args[0] doesn't exist. Let it go # through raise def _queue_pipe_read(pipe, name, done, dest): """Queue characters read from a pipe into a queue. Left outside the _tee_threads function to not introduce a function closure to speed up variable lookup. """ while not done.isSet(): data = pipe.read(1) if not data: break dest.put((name, data)) dest.put(name) def _tee_threads(proc, timeout, start, stdin, args, kwargs): """Does I/O for a process's pipes using thread. It's the simplest and slowest implementation. Expect very slow behavior. If there is a callback and it doesn't keep up with the calls, the timeout effectiveness will be delayed accordingly. """ # TODO(maruel): Implement a select based implementation on POSIX and a Windows # one using WaitForMultipleObjects(). # # Queue of either of when done or (, data). # In theory we would like to limit to ~64kb items to not cause large memory # usage when the callback blocks. It is not done because it slows down # processing on OSX10.6 by a factor of 2x, making it even slower than Windows! # Revisit this decision if it becomes a problem, e.g. crash because of # memory exhaustion. queue = Queue.Queue() done = threading.Event() def write_stdin(): stdin_io = cStringIO.StringIO(stdin) while not done.isSet(): data = stdin_io.read(1024) if data: proc.stdin.write(data) else: proc.stdin.close() break queue.put('stdin') def timeout_fn(): done.wait(timeout) # No need to close the pipes since killing should be sufficient. queue.put('timeout') # Starts up to 4 threads: # Read stdout # Read stderr # Write stdin # Timeout threads = {} if timeout is not None: threads['timeout'] = threading.Thread(target=timeout_fn) if callable(kwargs.get('stdout')): threads['stdout'] = threading.Thread( target=_queue_pipe_read, args=(proc.stdout, 'stdout', done, queue)) if callable(kwargs.get('stderr')): threads['stderr'] = threading.Thread( target=_queue_pipe_read, args=(proc.stderr, 'stderr', done, queue)) if isinstance(stdin, str): threads['stdin'] = threading.Thread(target=write_stdin) for t in threads.itervalues(): t.daemon = True t.start() timed_out = False try: while proc.returncode is None: assert threads proc.poll() item = queue.get() if isinstance(item, str): threads[item].join() del threads[item] if item == 'timeout' and not timed_out and proc.poll() is None: logging.debug('Timed out: killing') proc.kill() timed_out = True if not threads: # We won't be waken up anymore. Need to busy loop. break else: kwargs[item[0]](item[1]) finally: # Stop the threads. done.set() # Join threads for thread in threads.itervalues(): thread.join() # Flush the queue. try: while True: item = queue.get(False) if isinstance(item, str): if item == 'timeout': # TODO(maruel): Does it make sense at that point? if not timed_out and proc.poll() is None: logging.debug('Timed out: killing') proc.kill() timed_out = True else: kwargs[item[0]](item[1]) except Queue.Empty: pass # Get the remainder. if callable(kwargs.get('stdout')): data = proc.stdout.read() while data: kwargs['stdout'](data) data = proc.stdout.read() if callable(kwargs.get('stderr')): data = proc.stderr.read() while data: kwargs['stderr'](data) data = proc.stderr.read() if proc.returncode is None: # Usually happens when killed with timeout but not listening to pipes. proc.wait() if timed_out: return TIMED_OUT return proc.returncode def communicate(args, timeout=None, **kwargs): """Wraps subprocess.Popen().communicate(). Returns ((stdout, stderr), returncode). - The process will be killed after |timeout| seconds and returncode set to TIMED_OUT. - Automatically passes stdin content as input so do not specify stdin=PIPE. """ if timeout and kwargs.get('shell'): raise TypeError( 'Using timeout and shell simultaneously will cause a process leak ' 'since the shell will be killed instead of the child process.') stdin = kwargs.pop('stdin', None) if stdin is not None: if stdin is VOID: kwargs['stdin'] = open(os.devnull, 'r') stdin = None else: assert isinstance(stdin, basestring) # When stdin is passed as an argument, use it as the actual input data and # set the Popen() parameter accordingly. kwargs['stdin'] = PIPE start = time.time() proc = Popen(args, **kwargs) need_buffering = (timeout or callable(kwargs.get('stdout')) or callable(kwargs.get('stderr'))) if not need_buffering: # Normal workflow. if stdin not in (None, VOID): return proc.communicate(stdin), proc.returncode else: return proc.communicate(), proc.returncode stdout = None stderr = None # Convert to a lambda to workaround python's deadlock. # http://docs.python.org/library/subprocess.html#subprocess.Popen.wait # When the pipe fills up, it will deadlock this process. Using a thread # works around that issue. No need for thread safe function since the call # backs are guaranteed to be called from the main thread. if kwargs.get('stdout') == PIPE: stdout = [] kwargs['stdout'] = stdout.append if kwargs.get('stderr') == PIPE: stderr = [] kwargs['stderr'] = stderr.append returncode = _tee_threads(proc, timeout, start, stdin, args, kwargs) if not stdout is None: stdout = ''.join(stdout) if not stderr is None: stderr = ''.join(stderr) return (stdout, stderr), returncode def call(args, **kwargs): """Emulates subprocess.call(). Automatically convert stdout=PIPE or stderr=PIPE to VOID. In no case they can be returned since no code path raises subprocess2.CalledProcessError. """ if kwargs.get('stdout') == PIPE: kwargs['stdout'] = VOID if kwargs.get('stderr') == PIPE: kwargs['stderr'] = VOID return communicate(args, **kwargs)[1] def check_call_out(args, **kwargs): """Improved version of subprocess.check_call(). Returns (stdout, stderr), unlike subprocess.check_call(). """ out, returncode = communicate(args, **kwargs) if returncode: raise CalledProcessError( returncode, args, kwargs.get('cwd'), out[0], out[1]) return out def check_call(args, **kwargs): """Emulate subprocess.check_call().""" check_call_out(args, **kwargs) return 0 def capture(args, **kwargs): """Captures stdout of a process call and returns it. Returns stdout. - Discards returncode. - Blocks stdin by default if not specified since no output will be visible. """ kwargs.setdefault('stdin', VOID) # Like check_output, deny the caller from using stdout arg. return communicate(args, stdout=PIPE, **kwargs)[0][0] def check_output(args, **kwargs): """Emulates subprocess.check_output(). Captures stdout of a process call and returns stdout only. - Throws if return code is not 0. - Works even prior to python 2.7. - Blocks stdin by default if not specified since no output will be visible. - As per doc, "The stdout argument is not allowed as it is used internally." """ kwargs.setdefault('stdin', VOID) return check_call_out(args, stdout=PIPE, **kwargs)[0]