Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90044616
batchuploader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Oct 28, 19:23
Size
5 KB
Mime Type
text/x-python
Expires
Wed, Oct 30, 19:23 (2 d)
Engine
blob
Format
Raw Data
Handle
21986345
Attached To
R3600 invenio-infoscience
batchuploader.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" Invenio bibsched task for uploading multiple documents
or metadata files. This task can run in two different modes:
metadata or documents.
The parent directory from where the folders metadata and
documents are expected to be found has to be specified
in the invenio config file.
"""
__revision__
=
"$Id$"
import
sys
import
os
import
time
import
tempfile
import
shutil
from
invenio.config
import
CFG_TMPDIR
,
\
CFG_BATCHUPLOADER_DAEMON_DIR
,
\
CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY
,
\
CFG_PREFIX
from
invenio.bibtask
import
task_init
,
task_set_option
,
\
task_get_option
,
task_update_progress
,
task_low_level_submission
,
\
write_message
,
task_sleep_now_if_required
from
invenio.batchuploader_engine
import
document_upload
def
task_submit_elaborate_specific_parameter
(
key
,
value
,
opts
,
args
):
""" Given the string key, checks its meaning and returns True if
has elaborated the key.
Possible keys:
"""
if
key
in
(
'-d'
,
'--documents'
):
task_set_option
(
'documents'
,
"documents"
)
return
True
elif
key
in
(
'-m'
,
'--metadata'
):
task_set_option
(
'metadata'
,
"metadata"
)
return
True
return
False
def
task_run_core
():
""" Walks through all directories where metadata files are located
and uploads them.
Files are then moved to the corresponding DONE folders.
"""
tempfile
.
tempdir
=
CFG_TMPDIR
if
not
task_get_option
(
'documents'
):
# Metadata upload
daemon_dir
=
CFG_BATCHUPLOADER_DAEMON_DIR
[
0
]
==
'/'
and
CFG_BATCHUPLOADER_DAEMON_DIR
\
or
CFG_PREFIX
+
'/'
+
CFG_BATCHUPLOADER_DAEMON_DIR
parent_dir
=
daemon_dir
+
"/metadata/"
progress
=
0
for
folder
in
[
"insert/"
,
"append/"
,
"correct/"
,
"replace/"
]:
files_dir
=
parent_dir
+
folder
files_done_dir
=
files_dir
+
"DONE/"
try
:
files
=
os
.
listdir
(
files_dir
)
except
OSError
,
e
:
write_message
(
e
,
sys
.
stderr
)
# Create directory DONE/ if doesn't exist
try
:
os
.
mkdir
(
files_done_dir
)
except
OSError
:
# Directory exists
pass
for
metafile
in
files
:
if
os
.
path
.
isfile
(
os
.
path
.
join
(
files_dir
,
metafile
)):
# Create temporary file to be uploaded
filename
=
tempfile
.
mktemp
(
prefix
=
metafile
+
"_"
+
time
.
strftime
(
"%Y%m
%d
%H%M%S"
,
time
.
localtime
())
+
"_"
)
shutil
.
copy
(
os
.
path
.
join
(
files_dir
,
metafile
),
filename
)
# Send bibsched task
mode
=
"-"
+
folder
[
0
]
jobid
=
str
(
task_low_level_submission
(
'bibupload'
,
'batchupload'
,
mode
,
filename
))
# Move file to done folder
filename
=
metafile
+
"_"
+
time
.
strftime
(
"%Y%m
%d
%H%M%S"
,
time
.
localtime
())
+
"_"
+
jobid
os
.
rename
(
os
.
path
.
join
(
files_dir
,
metafile
),
os
.
path
.
join
(
files_done_dir
,
filename
))
task_sleep_now_if_required
(
can_stop_too
=
True
)
progress
+=
1
task_update_progress
(
"Done
%d
out of 4."
%
progress
)
else
:
# Documents upload
daemon_dir
=
CFG_BATCHUPLOADER_DAEMON_DIR
[
0
]
==
'/'
and
CFG_BATCHUPLOADER_DAEMON_DIR
\
or
CFG_PREFIX
+
'/'
+
CFG_BATCHUPLOADER_DAEMON_DIR
parent_dir
=
daemon_dir
+
"/documents/"
matching_order
=
CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY
for
folder
in
[
"append/"
,
"revise/"
]:
for
matching
in
matching_order
:
errors
=
document_upload
(
folder
=
parent_dir
+
folder
,
matching
=
matching
,
mode
=
folder
[:
-
1
])[
0
]
if
not
errors
:
break
# All documents succedeed with that matching
for
error
in
errors
:
write_message
(
"File:
%s
-
%s
with matching
%s
"
%
(
error
[
0
],
error
[
1
],
matching
),
sys
.
stderr
)
task_sleep_now_if_required
(
can_stop_too
=
True
)
return
1
def
main
():
""" Main that constructs all the bibtask. """
task_init
(
authorization_action
=
'runbatchuploader'
,
authorization_msg
=
"Batch Uploader"
,
description
=
"""Description:
The batch uploader has two different run modes.
If --metadata is specified (by default) then all files in folders insert,
append, correct and replace are uploaded using the corresponding mode.
If mode --documents is selected all documents present in folders named
append and revise are uploaded using the corresponding mode.
Parent directory for batch uploader must be specified in the
invenio configuration file.\n"""
,
help_specific_usage
=
""" -m, --metadata\t Batch Uploader will look for metadata files in the corresponding folders
-d, --documents\t Batch Uploader will look for documents in the corresponding folders
"""
,
version
=
__revision__
,
specific_params
=
(
"md:"
,
[
"metadata"
,
"documents"
]),
task_submit_elaborate_specific_parameter_fnc
=
task_submit_elaborate_specific_parameter
,
task_run_fnc
=
task_run_core
)
if
__name__
==
'__main__'
:
main
()
Event Timeline
Log In to Comment