shell.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Wed, Jun 26, 07:50

shell.py
View Options

	# -- coding: utf-8 --
	#
	# This file is part of Invenio.
	# Copyright (C) 2008, 2009, 2010, 2011, 2014, 2015 CERN.
	#
	# Invenio is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License as
	# published by the Free Software Foundation; either version 2 of the
	# License, or (at your option) any later version.
	#
	# Invenio is distributed in the hope that it will be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with Invenio; if not, write to the Free Software Foundation, Inc.,
	# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	"""
	The shellutils module contains helper functions useful for interacting
	with the operating system shell.

	The main API functions are:
	- run_shell_command()
	"""

	import os
	import fcntl
	import tempfile
	import time
	import signal
	import six
	import select
	from itertools import chain
	from six import StringIO
	import subprocess

	__all__ = ['run_shell_command',
	'run_process_with_timeout',
	'Timeout',
	'split_cli_ids_arg']

	"""
	This module implements two functions:
	- L{run_shell_command}
	- L{run_process_with_timeout}

	L{run_shell_command} will run a command through a shell, capturing its
	standard output and standard error.

	L{run_process_with_timeout} will run a process on its own allowing to
	specify a input file, capturing the standard output and standard error and
	killing the process after a given timeout.
	"""

	try:
	from shutil import which
	except ImportError:
	# CPython <3.3
	from distutils.spawn import find_executable as which

	class Timeout(Exception):
	"""Exception raised by with_timeout() when the operation takes too long.
	"""
	pass


	def run_shell_command(cmd, args=None, filename_out=None, filename_err=None):
	"""Run operating system command cmd with arguments from the args
	tuple in a sub-shell and return tuple (exit status code, stdout
	info, stderr info).

	@param cmd: Command to execute in a shell; may contain %s
	placeholders for arguments that will be expanded from the args
	tuple. Example: cmd='echo %s', args = ('hello',).
	@type cmd: string

	@param args: Arguments to be escaped and substituted for %s
	placeholders in cmd.
	@type args: tuple of strings

	@param filename_out: Desired filename for stdout output
	(optional; see below).
	@type filename_out: string

	@param filename_err: Desired filename for stderr output
	(optional; see below).
	@type filename_err: string

	@return: Tuple (exit code, string containing stdout output buffer,
	string containing stderr output buffer).

	However, if either filename_out or filename_err are defined,
	then the output buffers are not passed back but rather written
	into filename_out/filename_err pathnames. This is useful for
	commands that produce big files, for which it is not practical
	to pass results back to the callers in a Python text buffer.
	Note that it is the client's responsibility to name these
	files in the proper fashion (e.g. to be unique) and to close
	these files after use.
	@rtype: (number, string, string)

	@raise TypeError: if the number of args does not correspond to the
	number of placeholders in cmd.

	@note: Uses temporary files to store out/err output, not pipes due
	to potential pipe race condition on some systems. If either
	filename_out or filename_err are defined, then do not create
	temporary files, but store stdout or stderr output directly in
	these files instead, and do not delete them after execution.
	"""
	# wash args value:
	if args:
	args = tuple(args)
	else:
	args = ()
	# construct command with argument substitution:
	try:
	cmd = cmd % tuple([escape_shell_arg(x) for x in args])
	except TypeError:
	# there were problems with %s and args substitution, so raise an error:
	raise
	cmd_out = ''
	cmd_err = ''
	# create files:
	if filename_out:
	cmd_out_fd = os.open(filename_out, os.O_CREAT, 0o644)
	file_cmd_out = filename_out
	else:
	cmd_out_fd, file_cmd_out = \
	tempfile.mkstemp("invenio.utils.shell-cmd-out")
	if filename_err:
	cmd_err_fd = os.open(filename_err, os.O_CREAT, 0o644)
	file_cmd_err = filename_err
	else:
	cmd_err_fd, file_cmd_err = \
	tempfile.mkstemp("invenio.utils.shell-cmd-err")
	# run command:
	cmd_exit_code = os.system("%s > %s 2> %s" % (cmd,
	file_cmd_out,
	file_cmd_err))
	# delete temporary files: (if applicable)
	if not filename_out:
	if os.path.exists(file_cmd_out):
	cmd_out_fo = open(file_cmd_out)
	cmd_out = cmd_out_fo.read()
	cmd_out_fo.close()
	os.remove(file_cmd_out)
	if not filename_err:
	if os.path.exists(file_cmd_err):
	cmd_err_fo = open(file_cmd_err)
	cmd_err = cmd_err_fo.read()
	cmd_err_fo.close()
	os.remove(file_cmd_err)
	os.close(cmd_out_fd)
	os.close(cmd_err_fd)
	# return results:
	return cmd_exit_code, cmd_out, cmd_err


	def run_process_with_timeout(args, filename_in=None, filename_out=None, filename_err=None, cwd=None, timeout=None, sudo=None):
	"""Execute the specified process but within a certain timeout.

	@param args: the actuall process. This should be a list of string as in:
	['/usr/bin/foo', '--bar', 'baz']
	@type args: list of string

	@param filename_in: the path to a file that should be provided as standard
	input to the process. If None this will default to /dev/null
	@type filename_in: string

	@param filename_out: Desired filename for stdout output
	(optional; see below).
	@type filename_out: string

	@param filename_err: Desired filename for stderr output
	(optional; see below).
	@type filename_err: string

	@param cwd: the path from where to execute the process
	@type cwd: string

	@param timeout: the timeout in seconds after which to consider the
	process execution as failed. a Timeout exception will be raised
	@type timeout: int

	@param sudo: the optional name of the user under which to execute the
	process (by using sudo, without prompting for a password)
	@type sudo: string

	@return: Tuple (exit code, string containing stdout output buffer,
	string containing stderr output buffer).

	However, if either filename_out or filename_err are defined,
	then the output buffers are not passed back but rather written
	into filename_out/filename_err pathnames. This is useful for
	commands that produce big files, for which it is not practical
	to pass results back to the callers in a Python text buffer.
	Note that it is the client's responsibility to name these
	files in the proper fashion (e.g. to be unique) and to close
	these files after use.
	@rtype: (number, string, string)

	@raise Timeout: if the process does not terminate within the timeout
	"""
	if timeout is None:
	from invenio.config import CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT
	timeout = CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT
	stdout = stderr = None
	if filename_in is not None:
	stdin = open(filename_in)
	else:
	## FIXME: should use NUL on Windows
	stdin = open('/dev/null', 'r')
	if filename_out:
	stdout = open(filename_out, 'w')
	if filename_err:
	stderr = open(filename_err, 'w')
	tmp_stdout = StringIO()
	tmp_stderr = StringIO()
	if sudo is not None:
	args = ['sudo', '-u', sudo, '-S'] + list(args)
	## See: <http://stackoverflow.com/questions/3876886/timeout-a-subprocess>
	process = subprocess.Popen(args, stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, cwd=cwd, preexec_fn=os.setpgrp)

	## See: <http://stackoverflow.com/questions/375427/non-blocking-read-on-a-stream-in-python>
	fd = process.stdout.fileno()
	fl = fcntl.fcntl(fd, fcntl.F_GETFL)
	fcntl.fcntl(fd, fcntl.F_SETFL, fl \| os.O_NONBLOCK)
	fd = process.stderr.fileno()
	fl = fcntl.fcntl(fd, fcntl.F_GETFL)
	fcntl.fcntl(fd, fcntl.F_SETFL, fl \| os.O_NONBLOCK)
	fd_to_poll = [process.stdout, process.stderr]
	select_timeout = 0.5
	t1 = time.time()
	try:
	while process.poll() is None:
	if time.time() - t1 >= timeout:
	if process.stdin is not None:
	process.stdin.close()
	time.sleep(1)
	if process.poll() is None:
	## See: <http://stackoverflow.com/questions/3876886/timeout-a-subprocess>
	os.killpg(process.pid, signal.SIGTERM)
	time.sleep(1)
	if process.poll() is None:
	os.killpg(process.pid, signal.SIGKILL)
	try:
	os.waitpid(process.pid, 0)
	except OSError:
	pass
	raise Timeout()
	for fd in select.select(fd_to_poll, [], [], select_timeout)[0]:
	if fd == process.stdout:
	buf = process.stdout.read(65536)
	if stdout is None:
	tmp_stdout.write(buf)
	else:
	stdout.write(buf)
	elif fd == process.stderr:
	buf = process.stderr.read(65536)
	if stderr is None:
	tmp_stderr.write(buf)
	else:
	stderr.write(buf)
	else:
	raise OSError("fd %s is not a valid file descriptor" % fd)
	finally:
	while True:
	## Let's just read what is remaining to read.
	for fd in select.select(fd_to_poll, [], [], select_timeout)[0]:
	if fd == process.stdout:
	buf = process.stdout.read(65536)
	tmp_stdout.write(buf)
	if stdout is not None:
	stdout.write(buf)
	elif fd == process.stderr:
	buf = process.stderr.read(65536)
	tmp_stderr.write(buf)
	if stderr is not None:
	stderr.write(buf)
	else:
	raise OSError("fd %s is not a valid file descriptor" % fd)
	else:
	break
	return process.poll(), tmp_stdout.getvalue(), tmp_stderr.getvalue()


	def escape_shell_arg(shell_arg):
	"""Escape shell argument shell_arg by placing it within
	single-quotes. Any single quotes found within the shell argument
	string will be escaped.

	@param shell_arg: The shell argument to be escaped.
	@type shell_arg: string
	@return: The single-quote-escaped value of the shell argument.
	@rtype: string
	@raise TypeError: if shell_arg is not a string.
	@see: U{http://mail.python.org/pipermail/python-list/2005-October/346957.html}
	"""

	if type(shell_arg) is six.text_type:
	msg = "ERROR: escape_shell_arg() expected string argument but " \
	"got '%s' of type '%s'." % (repr(shell_arg), type(shell_arg))
	raise TypeError(msg)

	return "'%s'" % shell_arg.replace("'", r"'\''")

	def retry_mkstemp(suffix='', prefix='tmp', directory=None, max_retries=3):
	"""
	Make mkstemp more robust against AFS glitches.
	"""
	if directory is None:
	from invenio.config import CFG_TMPSHAREDDIR
	directory = CFG_TMPSHAREDDIR
	for retry_count in range(1, max_retries + 1):
	try:
	tmp_file_fd, tmp_file_name = tempfile.mkstemp(suffix=suffix,
	prefix=prefix,
	dir=directory)
	except OSError, e:
	if e.errno == 19 and retry_count <= max_retries:
	# AFS Glitch?
	time.sleep(10)
	else:
	raise
	else:
	break
	return tmp_file_fd, tmp_file_name


	def mymkdir(newdir, mode=0o777):
	"""works the way a good mkdir should :)
	- already exists, silently complete
	- regular file in the way, raise an exception
	- parent directory(ies) does not exist, make them as well
	"""
	if os.path.isdir(newdir):
	pass
	elif os.path.isfile(newdir):
	raise OSError("a file with the same name as the desired " \
	"dir, '%s', already exists." % newdir)
	else:
	head, tail = os.path.split(newdir)
	if head and not os.path.isdir(head):
	mymkdir(head, mode)
	if tail:
	os.umask(0o022)
	os.mkdir(newdir, mode)


	def s(t):
	## De-comment this to have lots of debugging information
	#print time.time(), t
	pass


	def split_cli_ids_arg(value):
	"""
	Split ids given in the command line
	Possible formats are:
	* 1
	* 1,2,3,4
	* 1-5,20,30,40
	Returns respectively
	* set([1])
	* set([1,2,3,4])
	* set([1,2,3,4,5,20,30,40])
	"""
	def parse(el):
	el = el.strip()
	if not el:
	ret = []
	elif '-' in el:
	start, end = el.split('-', 1)
	ret = xrange(int(start), int(end) + 1)
	else:
	ret = [int(el)]
	return ret
	return set(chain(*(parse(c) for c in value.split(',') if c.strip())))

shell.pyNo OneTemporaryActions

File Metadata

shell.pyView Options

Event Timeline

shell.py
No OneTemporary
Actions

shell.py
View Options