Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F87733625
shell.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Oct 14, 15:12
Size
13 KB
Mime Type
text/x-python
Expires
Wed, Oct 16, 15:12 (2 d)
Engine
blob
Format
Raw Data
Handle
21588698
Attached To
R3600 invenio-infoscience
shell.py
View Options
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2008, 2009, 2010, 2011, 2014, 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
The shellutils module contains helper functions useful for interacting
with the operating system shell.
The main API functions are:
- run_shell_command()
"""
import
os
import
fcntl
import
tempfile
import
time
import
signal
import
six
import
select
from
itertools
import
chain
from
six
import
StringIO
import
subprocess
__all__
=
[
'run_shell_command'
,
'run_process_with_timeout'
,
'Timeout'
,
'split_cli_ids_arg'
]
"""
This module implements two functions:
- L{run_shell_command}
- L{run_process_with_timeout}
L{run_shell_command} will run a command through a shell, capturing its
standard output and standard error.
L{run_process_with_timeout} will run a process on its own allowing to
specify a input file, capturing the standard output and standard error and
killing the process after a given timeout.
"""
try
:
from
shutil
import
which
except
ImportError
:
# CPython <3.3
from
distutils.spawn
import
find_executable
as
which
class
Timeout
(
Exception
):
"""Exception raised by with_timeout() when the operation takes too long.
"""
pass
def
run_shell_command
(
cmd
,
args
=
None
,
filename_out
=
None
,
filename_err
=
None
):
"""Run operating system command cmd with arguments from the args
tuple in a sub-shell and return tuple (exit status code, stdout
info, stderr info).
@param cmd: Command to execute in a shell; may contain %s
placeholders for arguments that will be expanded from the args
tuple. Example: cmd='echo %s', args = ('hello',).
@type cmd: string
@param args: Arguments to be escaped and substituted for %s
placeholders in cmd.
@type args: tuple of strings
@param filename_out: Desired filename for stdout output
(optional; see below).
@type filename_out: string
@param filename_err: Desired filename for stderr output
(optional; see below).
@type filename_err: string
@return: Tuple (exit code, string containing stdout output buffer,
string containing stderr output buffer).
However, if either filename_out or filename_err are defined,
then the output buffers are not passed back but rather written
into filename_out/filename_err pathnames. This is useful for
commands that produce big files, for which it is not practical
to pass results back to the callers in a Python text buffer.
Note that it is the client's responsibility to name these
files in the proper fashion (e.g. to be unique) and to close
these files after use.
@rtype: (number, string, string)
@raise TypeError: if the number of args does not correspond to the
number of placeholders in cmd.
@note: Uses temporary files to store out/err output, not pipes due
to potential pipe race condition on some systems. If either
filename_out or filename_err are defined, then do not create
temporary files, but store stdout or stderr output directly in
these files instead, and do not delete them after execution.
"""
# wash args value:
if
args
:
args
=
tuple
(
args
)
else
:
args
=
()
# construct command with argument substitution:
try
:
cmd
=
cmd
%
tuple
([
escape_shell_arg
(
x
)
for
x
in
args
])
except
TypeError
:
# there were problems with %s and args substitution, so raise an error:
raise
cmd_out
=
''
cmd_err
=
''
# create files:
if
filename_out
:
cmd_out_fd
=
os
.
open
(
filename_out
,
os
.
O_CREAT
,
0
o644
)
file_cmd_out
=
filename_out
else
:
cmd_out_fd
,
file_cmd_out
=
\
tempfile
.
mkstemp
(
"invenio.utils.shell-cmd-out"
)
if
filename_err
:
cmd_err_fd
=
os
.
open
(
filename_err
,
os
.
O_CREAT
,
0
o644
)
file_cmd_err
=
filename_err
else
:
cmd_err_fd
,
file_cmd_err
=
\
tempfile
.
mkstemp
(
"invenio.utils.shell-cmd-err"
)
# run command:
cmd_exit_code
=
os
.
system
(
"
%s
>
%s
2>
%s
"
%
(
cmd
,
file_cmd_out
,
file_cmd_err
))
# delete temporary files: (if applicable)
if
not
filename_out
:
if
os
.
path
.
exists
(
file_cmd_out
):
cmd_out_fo
=
open
(
file_cmd_out
)
cmd_out
=
cmd_out_fo
.
read
()
cmd_out_fo
.
close
()
os
.
remove
(
file_cmd_out
)
if
not
filename_err
:
if
os
.
path
.
exists
(
file_cmd_err
):
cmd_err_fo
=
open
(
file_cmd_err
)
cmd_err
=
cmd_err_fo
.
read
()
cmd_err_fo
.
close
()
os
.
remove
(
file_cmd_err
)
os
.
close
(
cmd_out_fd
)
os
.
close
(
cmd_err_fd
)
# return results:
return
cmd_exit_code
,
cmd_out
,
cmd_err
def
run_process_with_timeout
(
args
,
filename_in
=
None
,
filename_out
=
None
,
filename_err
=
None
,
cwd
=
None
,
timeout
=
None
,
sudo
=
None
):
"""Execute the specified process but within a certain timeout.
@param args: the actuall process. This should be a list of string as in:
['/usr/bin/foo', '--bar', 'baz']
@type args: list of string
@param filename_in: the path to a file that should be provided as standard
input to the process. If None this will default to /dev/null
@type filename_in: string
@param filename_out: Desired filename for stdout output
(optional; see below).
@type filename_out: string
@param filename_err: Desired filename for stderr output
(optional; see below).
@type filename_err: string
@param cwd: the path from where to execute the process
@type cwd: string
@param timeout: the timeout in seconds after which to consider the
process execution as failed. a Timeout exception will be raised
@type timeout: int
@param sudo: the optional name of the user under which to execute the
process (by using sudo, without prompting for a password)
@type sudo: string
@return: Tuple (exit code, string containing stdout output buffer,
string containing stderr output buffer).
However, if either filename_out or filename_err are defined,
then the output buffers are not passed back but rather written
into filename_out/filename_err pathnames. This is useful for
commands that produce big files, for which it is not practical
to pass results back to the callers in a Python text buffer.
Note that it is the client's responsibility to name these
files in the proper fashion (e.g. to be unique) and to close
these files after use.
@rtype: (number, string, string)
@raise Timeout: if the process does not terminate within the timeout
"""
if
timeout
is
None
:
from
invenio.config
import
CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT
timeout
=
CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT
stdout
=
stderr
=
None
if
filename_in
is
not
None
:
stdin
=
open
(
filename_in
)
else
:
## FIXME: should use NUL on Windows
stdin
=
open
(
'/dev/null'
,
'r'
)
if
filename_out
:
stdout
=
open
(
filename_out
,
'w'
)
if
filename_err
:
stderr
=
open
(
filename_err
,
'w'
)
tmp_stdout
=
StringIO
()
tmp_stderr
=
StringIO
()
if
sudo
is
not
None
:
args
=
[
'sudo'
,
'-u'
,
sudo
,
'-S'
]
+
list
(
args
)
## See: <http://stackoverflow.com/questions/3876886/timeout-a-subprocess>
process
=
subprocess
.
Popen
(
args
,
stdin
=
stdin
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
close_fds
=
True
,
cwd
=
cwd
,
preexec_fn
=
os
.
setpgrp
)
## See: <http://stackoverflow.com/questions/375427/non-blocking-read-on-a-stream-in-python>
fd
=
process
.
stdout
.
fileno
()
fl
=
fcntl
.
fcntl
(
fd
,
fcntl
.
F_GETFL
)
fcntl
.
fcntl
(
fd
,
fcntl
.
F_SETFL
,
fl
|
os
.
O_NONBLOCK
)
fd
=
process
.
stderr
.
fileno
()
fl
=
fcntl
.
fcntl
(
fd
,
fcntl
.
F_GETFL
)
fcntl
.
fcntl
(
fd
,
fcntl
.
F_SETFL
,
fl
|
os
.
O_NONBLOCK
)
fd_to_poll
=
[
process
.
stdout
,
process
.
stderr
]
select_timeout
=
0.5
t1
=
time
.
time
()
try
:
while
process
.
poll
()
is
None
:
if
time
.
time
()
-
t1
>=
timeout
:
if
process
.
stdin
is
not
None
:
process
.
stdin
.
close
()
time
.
sleep
(
1
)
if
process
.
poll
()
is
None
:
## See: <http://stackoverflow.com/questions/3876886/timeout-a-subprocess>
os
.
killpg
(
process
.
pid
,
signal
.
SIGTERM
)
time
.
sleep
(
1
)
if
process
.
poll
()
is
None
:
os
.
killpg
(
process
.
pid
,
signal
.
SIGKILL
)
try
:
os
.
waitpid
(
process
.
pid
,
0
)
except
OSError
:
pass
raise
Timeout
()
for
fd
in
select
.
select
(
fd_to_poll
,
[],
[],
select_timeout
)[
0
]:
if
fd
==
process
.
stdout
:
buf
=
process
.
stdout
.
read
(
65536
)
if
stdout
is
None
:
tmp_stdout
.
write
(
buf
)
else
:
stdout
.
write
(
buf
)
elif
fd
==
process
.
stderr
:
buf
=
process
.
stderr
.
read
(
65536
)
if
stderr
is
None
:
tmp_stderr
.
write
(
buf
)
else
:
stderr
.
write
(
buf
)
else
:
raise
OSError
(
"fd
%s
is not a valid file descriptor"
%
fd
)
finally
:
while
True
:
## Let's just read what is remaining to read.
for
fd
in
select
.
select
(
fd_to_poll
,
[],
[],
select_timeout
)[
0
]:
if
fd
==
process
.
stdout
:
buf
=
process
.
stdout
.
read
(
65536
)
tmp_stdout
.
write
(
buf
)
if
stdout
is
not
None
:
stdout
.
write
(
buf
)
elif
fd
==
process
.
stderr
:
buf
=
process
.
stderr
.
read
(
65536
)
tmp_stderr
.
write
(
buf
)
if
stderr
is
not
None
:
stderr
.
write
(
buf
)
else
:
raise
OSError
(
"fd
%s
is not a valid file descriptor"
%
fd
)
else
:
break
return
process
.
poll
(),
tmp_stdout
.
getvalue
(),
tmp_stderr
.
getvalue
()
def
escape_shell_arg
(
shell_arg
):
"""Escape shell argument shell_arg by placing it within
single-quotes. Any single quotes found within the shell argument
string will be escaped.
@param shell_arg: The shell argument to be escaped.
@type shell_arg: string
@return: The single-quote-escaped value of the shell argument.
@rtype: string
@raise TypeError: if shell_arg is not a string.
@see: U{http://mail.python.org/pipermail/python-list/2005-October/346957.html}
"""
if
type
(
shell_arg
)
is
six
.
text_type
:
msg
=
"ERROR: escape_shell_arg() expected string argument but "
\
"got '
%s
' of type '
%s
'."
%
(
repr
(
shell_arg
),
type
(
shell_arg
))
raise
TypeError
(
msg
)
return
"'
%s
'"
%
shell_arg
.
replace
(
"'"
,
r"'\''"
)
def
retry_mkstemp
(
suffix
=
''
,
prefix
=
'tmp'
,
directory
=
None
,
max_retries
=
3
):
"""
Make mkstemp more robust against AFS glitches.
"""
if
directory
is
None
:
from
invenio.config
import
CFG_TMPSHAREDDIR
directory
=
CFG_TMPSHAREDDIR
for
retry_count
in
range
(
1
,
max_retries
+
1
):
try
:
tmp_file_fd
,
tmp_file_name
=
tempfile
.
mkstemp
(
suffix
=
suffix
,
prefix
=
prefix
,
dir
=
directory
)
except
OSError
,
e
:
if
e
.
errno
==
19
and
retry_count
<=
max_retries
:
# AFS Glitch?
time
.
sleep
(
10
)
else
:
raise
else
:
break
return
tmp_file_fd
,
tmp_file_name
def
mymkdir
(
newdir
,
mode
=
0
o777
):
"""works the way a good mkdir should :)
- already exists, silently complete
- regular file in the way, raise an exception
- parent directory(ies) does not exist, make them as well
"""
if
os
.
path
.
isdir
(
newdir
):
pass
elif
os
.
path
.
isfile
(
newdir
):
raise
OSError
(
"a file with the same name as the desired "
\
"dir, '
%s
', already exists."
%
newdir
)
else
:
head
,
tail
=
os
.
path
.
split
(
newdir
)
if
head
and
not
os
.
path
.
isdir
(
head
):
mymkdir
(
head
,
mode
)
if
tail
:
os
.
umask
(
0
o022
)
os
.
mkdir
(
newdir
,
mode
)
def
s
(
t
):
## De-comment this to have lots of debugging information
#print time.time(), t
pass
def
split_cli_ids_arg
(
value
):
"""
Split ids given in the command line
Possible formats are:
* 1
* 1,2,3,4
* 1-5,20,30,40
Returns respectively
* set([1])
* set([1,2,3,4])
* set([1,2,3,4,5,20,30,40])
"""
def
parse
(
el
):
el
=
el
.
strip
()
if
not
el
:
ret
=
[]
elif
'-'
in
el
:
start
,
end
=
el
.
split
(
'-'
,
1
)
ret
=
xrange
(
int
(
start
),
int
(
end
)
+
1
)
else
:
ret
=
[
int
(
el
)]
return
ret
return
set
(
chain
(
*
(
parse
(
c
)
for
c
in
value
.
split
(
','
)
if
c
.
strip
())))
Event Timeline
Log In to Comment