Page MenuHomec4science

cli.py
No OneTemporary

File Metadata

Created
Wed, Nov 27, 01:46
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011, 2012, 2014 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
from __future__ import print_function
"""
BibDocAdmin CLI administration tool
"""
__revision__ = "$Id$"
import sys
import re
import os
import time
import fnmatch
from datetime import datetime
from logging import getLogger, debug, DEBUG
from optparse import OptionParser, OptionGroup, OptionValueError
from six import iteritems
from invenio.base.factory import with_app_context
from invenio.ext.logging import register_exception
from invenio.config import CFG_SITE_URL, CFG_BIBDOCFILE_FILEDIR, \
CFG_SITE_RECORD, CFG_TMPSHAREDDIR
from invenio.legacy.bibdocfile.api import BibRecDocs, BibDoc, InvenioBibDocFileError, \
nice_size, check_valid_url, clean_url, get_docname_from_url, \
guess_format_from_url, KEEP_OLD_VALUE, decompose_bibdocfile_fullpath, \
bibdocfile_url_to_bibdoc, decompose_bibdocfile_url, \
CFG_BIBDOCFILE_AVAILABLE_FLAGS
from intbitset import intbitset
from invenio.legacy.search_engine import perform_request_search
from invenio.utils.text import wrap_text_in_a_box, wait_for_user
from invenio.legacy.dbquery import run_sql
from invenio.legacy.bibsched.bibtask import task_low_level_submission
from invenio.utils.text import encode_for_xml
from invenio.legacy.websubmit.file_converter import can_perform_ocr
from invenio.utils.shell import retry_mkstemp
def _xml_mksubfield(key, subfield, fft):
return fft.get(key, None) is not None and '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(str(fft[key]))) or ''
def _xml_mksubfields(key, subfield, fft):
ret = ""
for value in fft.get(key, []):
ret += '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(str(value)))
return ret
def _xml_fft_creator(fft):
"""Transform an fft dictionary (made by keys url, docname, format,
new_docname, comment, description, restriction, doctype, into an xml
string."""
debug('Input FFT structure: %s' % fft)
out = '\t<datafield tag ="FFT" ind1=" " ind2=" ">\n'
out += _xml_mksubfield('url', 'a', fft)
out += _xml_mksubfield('docname', 'n', fft)
out += _xml_mksubfield('format', 'f', fft)
out += _xml_mksubfield('new_docname', 'm', fft)
out += _xml_mksubfield('doctype', 't', fft)
out += _xml_mksubfield('description', 'd', fft)
out += _xml_mksubfield('comment', 'z', fft)
out += _xml_mksubfield('restriction', 'r', fft)
out += _xml_mksubfields('options', 'o', fft)
out += _xml_mksubfield('version', 'v', fft)
out += '\t</datafield>\n'
debug('FFT created: %s' % out)
return out
def ffts_to_xml(ffts_dict):
"""Transform a dictionary: recid -> ffts where ffts is a list of fft dictionary
into xml.
"""
debug('Input FFTs dictionary: %s' % ffts_dict)
out = ''
recids = ffts_dict.keys()
recids.sort()
for recid in recids:
ffts = ffts_dict[recid]
if ffts:
out += '<record>\n'
out += '\t<controlfield tag="001">%i</controlfield>\n' % recid
for fft in ffts:
out += _xml_fft_creator(fft)
out += '</record>\n'
debug('MARC to Upload: %s' % out)
return out
_shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])")
def _parse_datetime(var):
"""Returns a date string according to the format string.
It can handle normal date strings and shifts with respect
to now."""
if not var:
return None
date = time.time()
factors = {"d":24*3600, "h":3600, "m":60, "s":1}
m = _shift_re.match(var)
if m:
sign = m.groups()[0] == "-" and -1 or 1
factor = factors[m.groups()[2]]
value = float(m.groups()[1])
return datetime.fromtimestamp(date + sign * factor * value)
else:
return datetime(*(time.strptime(var, "%Y-%m-%d %H:%M:%S")[0:6]))
# The code above is Python 2.4 compatible. The following is the 2.5
# version.
# return datetime.strptime(var, "%Y-%m-%d %H:%M:%S")
def _parse_date_range(var):
"""Returns the two dates contained as a low,high tuple"""
limits = var.split(",")
if len(limits)==1:
low = _parse_datetime(limits[0])
return low, None
if len(limits)==2:
low = _parse_datetime(limits[0])
high = _parse_datetime(limits[1])
return low, high
return None, None
def cli_quick_match_all_recids(options):
"""Return an quickly an approximate but (by excess) list of good recids."""
url = getattr(options, 'url', None)
if url:
return intbitset([decompose_bibdocfile_url(url)[0]])
path = getattr(options, 'path', None)
if path:
docid = decompose_bibdocfile_fullpath(path)["doc_id"]
bd = BibDoc(docid)
ids = []
for rec_link in bd.bibrec_links:
ids.append(rec_link["recid"])
return intbitset(ids)
docids = getattr(options, 'docids', None)
if docids:
ids = []
for docid in docids:
bd = BibDoc(docid)
for rec_link in bd.bibrec_links:
ids.append(rec_link["recid"])
return intbitset(ids)
collection = getattr(options, 'collection', None)
pattern = getattr(options, 'pattern', None)
recids = getattr(options, 'recids', None)
md_rec = getattr(options, 'md_rec', None)
cd_rec = getattr(options, 'cd_rec', None)
tmp_date_query = []
tmp_date_params = []
if recids is None:
debug('Initially considering all the recids')
recids = intbitset(run_sql('SELECT id FROM bibrec'))
if not recids:
print('WARNING: No record in the database', file=sys.stderr)
if md_rec[0] is not None:
tmp_date_query.append('modification_date>=%s')
tmp_date_params.append(md_rec[0])
if md_rec[1] is not None:
tmp_date_query.append('modification_date<=%s')
tmp_date_params.append(md_rec[1])
if cd_rec[0] is not None:
tmp_date_query.append('creation_date>=%s')
tmp_date_params.append(cd_rec[0])
if cd_rec[1] is not None:
tmp_date_query.append('creation_date<=%s')
tmp_date_params.append(cd_rec[1])
if tmp_date_query:
tmp_date_query = ' AND '.join(tmp_date_query)
tmp_date_params = tuple(tmp_date_params)
query = 'SELECT id FROM bibrec WHERE %s' % tmp_date_query
debug('Query: %s, param: %s' % (query, tmp_date_params))
recids &= intbitset(run_sql(query % tmp_date_query, tmp_date_params))
debug('After applying dates we obtain recids: %s' % recids)
if not recids:
print('WARNING: Time constraints for records are too strict', file=sys.stderr)
if collection or pattern:
recids &= intbitset(perform_request_search(cc=collection or '', p=pattern or ''))
debug('After applyings pattern and collection we obtain recids: %s' % recids)
debug('Quick recids: %s' % recids)
return recids
def cli_quick_match_all_docids(options, recids=None):
"""Return an quickly an approximate but (by excess) list of good docids."""
url = getattr(options, 'url', None)
if url:
return intbitset([bibdocfile_url_to_bibdoc(url).get_id()])
path = getattr(options, 'path', None)
if path:
docid = decompose_bibdocfile_fullpath(path)["doc_id"]
bd = BibDoc(docid)
ids = []
for rec_link in bd.bibrec_links:
ids.append(rec_link["recid"])
return intbitset(ids)
deleted_docs = getattr(options, 'deleted_docs', None)
action_undelete = getattr(options, 'action', None) == 'undelete'
docids = getattr(options, 'docids', None)
md_doc = getattr(options, 'md_doc', None)
cd_doc = getattr(options, 'cd_doc', None)
if docids is None:
debug('Initially considering all the docids')
if recids is None:
recids = cli_quick_match_all_recids(options)
docids = intbitset()
for id_bibrec, id_bibdoc in run_sql('SELECT id_bibrec, id_bibdoc FROM bibrec_bibdoc'):
if id_bibrec in recids:
docids.add(id_bibdoc)
else:
debug('Initially considering this docids: %s' % docids)
tmp_query = []
tmp_params = []
if deleted_docs is None and action_undelete:
deleted_docs = 'only'
if deleted_docs == 'no':
tmp_query.append('status<>"DELETED"')
elif deleted_docs == 'only':
tmp_query.append('status="DELETED"')
if md_doc[0] is not None:
tmp_query.append('modification_date>=%s')
tmp_params.append(md_doc[0])
if md_doc[1] is not None:
tmp_query.append('modification_date<=%s')
tmp_params.append(md_doc[1])
if cd_doc[0] is not None:
tmp_query.append('creation_date>=%s')
tmp_params.append(cd_doc[0])
if cd_doc[1] is not None:
tmp_query.append('creation_date<=%s')
tmp_params.append(cd_doc[1])
if tmp_query:
tmp_query = ' AND '.join(tmp_query)
tmp_params = tuple(tmp_params)
query = 'SELECT id FROM bibdoc WHERE %s' % tmp_query
debug('Query: %s, param: %s' % (query, tmp_params))
docids &= intbitset(run_sql(query, tmp_params))
debug('After applying dates we obtain docids: %s' % docids)
return docids
def cli_slow_match_single_recid(options, recid, docids=None):
"""Apply all the given queries in order to assert wethever a recid
match or not.
if with_docids is True, the recid is matched if it has at least one docid that is matched"""
debug('cli_slow_match_single_recid checking: %s' % recid)
deleted_docs = getattr(options, 'deleted_docs', None)
deleted_recs = getattr(options, 'deleted_recs', None)
empty_recs = getattr(options, 'empty_recs', None)
docname = cli2docname(options)
bibrecdocs = BibRecDocs(recid, deleted_too=(deleted_docs != 'no'))
if bibrecdocs.deleted_p() and (deleted_recs == 'no'):
return False
elif not bibrecdocs.deleted_p() and (deleted_recs != 'only'):
if docids:
for bibdoc in bibrecdocs.list_bibdocs():
if bibdoc.get_id() in docids:
break
else:
return False
if docname:
for other_docname in bibrecdocs.get_bibdoc_names():
if docname and fnmatch.fnmatchcase(other_docname, docname):
break
else:
return False
if bibrecdocs.empty_p() and (empty_recs != 'no'):
return True
elif not bibrecdocs.empty_p() and (empty_recs != 'only'):
return True
return False
def cli_slow_match_single_docid(options, docid, recids=None):
"""Apply all the given queries in order to assert wethever a recid
match or not."""
debug('cli_slow_match_single_docid checking: %s' % docid)
empty_docs = getattr(options, 'empty_docs', None)
docname = cli2docname(options)
if recids is None:
recids = cli_quick_match_all_recids(options)
bibdoc = BibDoc.create_instance(docid)
dn = None
if bibdoc.bibrec_links:
dn = bibdoc.bibrec_links[0]["docname"]
if docname and not fnmatch.fnmatchcase(dn, docname):
debug('docname %s does not match the pattern %s' % (repr(dn), repr(docname)))
return False
# elif bibdoc.get_recid() and bibdoc.get_recid() not in recids:
# debug('recid %s is not in pattern %s' % (repr(bibdoc.get_recid()), repr(recids)))
# return False
elif empty_docs == 'no' and bibdoc.empty_p():
debug('bibdoc is empty')
return False
elif empty_docs == 'only' and not bibdoc.empty_p():
debug('bibdoc is not empty')
return False
else:
return True
def cli2recid(options, recids=None, docids=None):
"""Given the command line options return a recid."""
recids = list(cli_recids_iterator(options, recids=recids, docids=docids))
if len(recids) == 1:
return recids[0]
if recids:
raise StandardError, "More than one recid has been matched: %s" % recids
else:
raise StandardError, "No recids matched"
def cli2docid(options, recids=None, docids=None):
"""Given the command line options return a docid."""
docids = list(cli_docids_iterator(options, recids=recids, docids=docids))
if len(docids) == 1:
return docids[0]
if docids:
raise StandardError, "More than one docid has been matched: %s" % docids
else:
raise StandardError, "No docids matched"
def cli2flags(options):
"""
Transform a comma separated list of flags into a list of valid flags.
"""
flags = getattr(options, 'flags', None)
if flags:
flags = [flag.strip().upper() for flag in flags.split(',')]
for flag in flags:
if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
raise StandardError("%s is not among the valid flags: %s" % (flag, ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS)))
return flags
return []
def cli2description(options):
"""Return a good value for the description."""
description = getattr(options, 'set_description', None)
if description is None:
description = KEEP_OLD_VALUE
return description
def cli2restriction(options):
"""Return a good value for the restriction."""
restriction = getattr(options, 'set_restriction', None)
if restriction is None:
restriction = KEEP_OLD_VALUE
return restriction
def cli2comment(options):
"""Return a good value for the comment."""
comment = getattr(options, 'set_comment', None)
if comment is None:
comment = KEEP_OLD_VALUE
return comment
def cli2doctype(options):
"""Return a good value for the doctype."""
doctype = getattr(options, 'set_doctype', None)
if not doctype:
return 'Main'
return doctype
def cli2docname(options, url=None):
"""Given the command line options and optional precalculated docid
returns the corresponding docname."""
docname = getattr(options, 'docname', None)
if docname is not None:
return docname
if url is not None:
return get_docname_from_url(url)
else:
return None
def cli2format(options, url=None):
"""Given the command line options returns the corresponding format."""
docformat = getattr(options, 'format', None)
if docformat is not None:
return docformat
elif url is not None:
## FIXME: to deploy once conversion-tools branch is merged
#return guess_format_from_url(url)
return guess_format_from_url(url)
else:
raise OptionValueError("Not enough information to retrieve a valid format")
def cli_recids_iterator(options, recids=None, docids=None):
"""Slow iterator over all the matched recids.
if with_docids is True, the recid must be attached to at least a matched docid"""
debug('cli_recids_iterator')
if recids is None:
recids = cli_quick_match_all_recids(options)
debug('working on recids: %s, docids: %s' % (recids, docids))
for recid in recids:
if cli_slow_match_single_recid(options, recid, docids):
yield recid
raise StopIteration
def cli_docids_iterator(options, recids=None, docids=None):
"""Slow iterator over all the matched docids."""
if recids is None:
recids = cli_quick_match_all_recids(options)
if docids is None:
docids = cli_quick_match_all_docids(options, recids)
for docid in docids:
if cli_slow_match_single_docid(options, docid, recids):
yield docid
raise StopIteration
def cli_get_stats(dummy):
"""Print per every collection some stats"""
def print_table(title, table):
if table:
print("=" * 20, title, "=" * 20)
for row in table:
print("\t".join(str(elem) for elem in row))
for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"):
print("-" * 79)
print("Statistic for: %s " % collection)
reclist = intbitset(reclist)
if reclist:
sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')'
print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
print_table("Mimetypes", run_sql("SELECT COUNT(mime) as c, mime FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY mime ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
print_table("Sizes", run_sql("SELECT SUM(filesize) AS c FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true" % sqlreclist)) # kwalitee: disable=sql
class OptionParserSpecial(OptionParser):
def format_help(self, *args, **kwargs):
result = OptionParser.format_help(self, *args, **kwargs)
if hasattr(self, 'trailing_text'):
return "%s\n%s\n" % (result, self.trailing_text)
else:
return result
def prepare_option_parser():
"""Parse the command line options."""
def _ids_ranges_callback(option, opt, value, parser):
"""Callback for optparse to parse a set of ids ranges in the form
nnn1-nnn2,mmm1-mmm2... returning the corresponding intbitset.
"""
try:
debug('option: %s, opt: %s, value: %s, parser: %s' % (option, opt, value, parser))
debug('Parsing range: %s' % value)
value = ranges2ids(value)
setattr(parser.values, option.dest, value)
except Exception as e:
raise OptionValueError("It's impossible to parse the range '%s' for option %s: %s" % (value, opt, e))
def _date_range_callback(option, opt, value, parser):
"""Callback for optparse to parse a range of dates in the form
[date1],[date2]. Both date1 and date2 could be optional.
the date can be expressed absolutely ("%Y-%m-%d %H:%M:%S")
or relatively (([-\+]{0,1})([\d]+)([dhms])) to the current time."""
try:
value = _parse_date_range(value)
setattr(parser.values, option.dest, value)
except Exception as e:
raise OptionValueError("It's impossible to parse the range '%s' for option %s: %s" % (value, opt, e))
parser = OptionParserSpecial(usage="usage: %prog [options]",
#epilog="""With <query> you select the range of record/docnames/single files to work on. Note that some actions e.g. delete, append, revise etc. works at the docname level, while others like --set-comment, --set-description, at single file level and other can be applied in an iterative way to many records in a single run. Note that specifing docid(2) takes precedence over recid(2) which in turns takes precedence over pattern/collection search.""",
version=__revision__)
parser.trailing_text = """
Examples:
$ bibdocfile --append foo.tar.gz --recid=1
$ bibdocfile --revise http://foo.com?search=123 --with-docname='sam'
--format=pdf --recid=3 --set-docname='pippo' # revise for record 3
# the document sam, renaming it to pippo.
$ bibdocfile --delete --with-docname="*sam" --all # delete all documents
# starting ending
# with "sam"
$ bibdocfile --undelete -c "Test Collection" # undelete documents for
# the collection
$ bibdocfile --get-info --recids=1-4,6-8 # obtain informations
$ bibdocfile -r 1 --with-docname=foo --set-docname=bar # Rename a document
$ bibdocfile -r 1 --set-restriction "firerole: deny until '2011-01-01'
allow any" # set an embargo to all the documents attached to record 1
# (note the ^M or \\n before 'allow any')
# See also $r subfield in <%(site)s/help/admin/bibupload-admin-guide#3.6>
# and Firerole in <%(site)s/help/admin/webaccess-admin-guide#6>
$ bibdocfile --append x.pdf --recid=1 --with-flags='PDF/A,OCRED' # append
# to record 1 the file x.pdf specifying the PDF/A and OCRED flags
""" % {'site': CFG_SITE_URL}
query_options = OptionGroup(parser, 'Query options')
query_options.add_option('-r', '--recids', action="callback", callback=_ids_ranges_callback, type='string', dest='recids', help='matches records by recids, e.g.: --recids=1-3,5-7')
query_options.add_option('-d', '--docids', action="callback", callback=_ids_ranges_callback, type='string', dest='docids', help='matches documents by docids, e.g.: --docids=1-3,5-7')
query_options.add_option('-a', '--all', action='store_true', dest='all', help='Select all the records')
query_options.add_option("--with-deleted-recs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_recs", help="'Yes' to also match deleted records, 'no' to exclude them, 'only' to match only deleted ones", metavar="yes/no/only", default='no')
query_options.add_option("--with-deleted-docs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_docs", help="'Yes' to also match deleted documents, 'no' to exclude them, 'only' to match only deleted ones (e.g. for undeletion)", metavar="yes/no/only", default='no')
query_options.add_option("--with-empty-recs", choices=['yes', 'no', 'only'], type="choice", dest="empty_recs", help="'Yes' to also match records without attached documents, 'no' to exclude them, 'only' to consider only such records (e.g. for statistics)", metavar="yes/no/only", default='no')
query_options.add_option("--with-empty-docs", choices=['yes', 'no', 'only'], type="choice", dest="empty_docs", help="'Yes' to also match documents without attached files, 'no' to exclude them, 'only' to consider only such documents (e.g. for sanity checking)", metavar="yes/no/only", default='no')
query_options.add_option("--with-record-modification-date", action="callback", callback=_date_range_callback, dest="md_rec", nargs=1, type="string", default=(None, None), help="matches records modified date1 and date2; dates can be expressed relatively, e.g.:\"-5m,2030-2-23 04:40\" # matches records modified since 5 minutes ago until the 2030...", metavar="date1,date2")
query_options.add_option("--with-record-creation-date", action="callback", callback=_date_range_callback, dest="cd_rec", nargs=1, type="string", default=(None, None), help="matches records created between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--with-document-modification-date", action="callback", callback=_date_range_callback, dest="md_doc", nargs=1, type="string", default=(None, None), help="matches documents modified between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--with-document-creation-date", action="callback", callback=_date_range_callback, dest="cd_doc", nargs=1, type="string", default=(None, None), help="matches documents created between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--url", dest="url", help='matches the document referred by the URL, e.g. "%s/%s/1/files/foobar.pdf?version=2"' % (CFG_SITE_URL, CFG_SITE_RECORD))
query_options.add_option("--path", dest="path", help='matches the document referred by the internal filesystem path, e.g. %s/g0/1/foobar.pdf\\;1' % CFG_BIBDOCFILE_FILEDIR)
query_options.add_option("--with-docname", dest="docname", help='matches documents with the given docname (accept wildcards)')
query_options.add_option("--with-doctype", dest="doctype", help='matches documents with the given doctype')
query_options.add_option('-p', '--pattern', dest='pattern', help='matches records by pattern')
query_options.add_option('-c', '--collection', dest='collection', help='matches records by collection')
query_options.add_option('--force', dest='force', help='force an action even when it\'s not necessary e.g. textify on an already textified bibdoc.', action='store_true', default=False)
parser.add_option_group(query_options)
getting_information_options = OptionGroup(parser, 'Actions for getting information')
getting_information_options.add_option('--get-info', dest='action', action='store_const', const='get-info', help='print all the informations about the matched record/documents')
getting_information_options.add_option('--get-disk-usage', dest='action', action='store_const', const='get-disk-usage', help='print disk usage statistics of the matched documents')
getting_information_options.add_option('--get-history', dest='action', action='store_const', const='get-history', help='print the matched documents history')
getting_information_options.add_option('--get-stats', dest='action', action='store_const', const='get-stats', help='print some statistics of file properties grouped by collections')
parser.add_option_group(getting_information_options)
setting_information_options = OptionGroup(parser, 'Actions for setting information')
setting_information_options.add_option('--set-doctype', dest='set_doctype', help='specify the new doctype', metavar='doctype')
setting_information_options.add_option('--set-description', dest='set_description', help='specify a description', metavar='description')
setting_information_options.add_option('--set-comment', dest='set_comment', help='specify a comment', metavar='comment')
setting_information_options.add_option('--set-restriction', dest='set_restriction', help='specify a restriction tag', metavar='restriction')
setting_information_options.add_option('--set-docname', dest='new_docname', help='specifies a new docname for renaming', metavar='docname')
setting_information_options.add_option("--unset-comment", action="store_const", const='', dest="set_comment", help="remove any comment")
setting_information_options.add_option("--unset-descriptions", action="store_const", const='', dest="set_description", help="remove any description")
setting_information_options.add_option("--unset-restrictions", action="store_const", const='', dest="set_restriction", help="remove any restriction")
setting_information_options.add_option("--hide", dest="action", action='store_const', const='hide', help="hides matched documents and revisions")
setting_information_options.add_option("--unhide", dest="action", action='store_const', const='unhide', help="hides matched documents and revisions")
parser.add_option_group(setting_information_options)
revising_options = OptionGroup(parser, 'Action for revising content')
revising_options.add_option("--append", dest='append_path', help='specify the URL/path of the file that will appended to the bibdoc (implies --with-empty-recs=yes)', metavar='PATH/URL')
revising_options.add_option("--revise", dest='revise_path', help='specify the URL/path of the file that will revise the bibdoc', metavar='PATH/URL')
revising_options.add_option("--revert", dest='action', action='store_const', const='revert', help='reverts a document to the specified version')
revising_options.add_option("--delete", action='store_const', const='delete', dest='action', help='soft-delete the matched documents')
revising_options.add_option("--hard-delete", action='store_const', const='hard-delete', dest='action', help='hard-delete the single matched document with a specific format and a specific revision (this operation is not revertible)')
revising_options.add_option("--undelete", action='store_const', const='undelete', dest='action', help='undelete previosuly soft-deleted documents')
revising_options.add_option("--purge", action='store_const', const='purge', dest='action', help='purge (i.e. hard-delete any format of any version prior to the latest version of) the matched documents')
revising_options.add_option("--expunge", action='store_const', const='expunge', dest='action', help='expunge (i.e. hard-delete any version and formats of) the matched documents')
revising_options.add_option("--with-version", dest="version", help="specifies the version(s) to be used with hide, unhide, e.g.: 1-2,3 or ALL. Specifies the version to be used with hard-delete and revert, e.g. 2")
revising_options.add_option("--with-format", dest="format", help='to specify a format when appending/revising/deleting/reverting a document, e.g. "pdf"', metavar='FORMAT')
revising_options.add_option("--with-hide-previous", dest='hide_previous', action='store_true', help='when revising, hides previous versions', default=False)
revising_options.add_option("--with-flags", dest='flags', help='comma-separated optional list of flags used when appending/revising a document. Valid flags are: %s' % ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS), default=None)
parser.add_option_group(revising_options)
housekeeping_options = OptionGroup(parser, 'Actions for housekeeping')
housekeeping_options.add_option("--check-md5", action='store_const', const='check-md5', dest='action', help='check md5 checksum validity of files')
housekeeping_options.add_option("--check-format", action='store_const', const='check-format', dest='action', help='check if any format-related inconsistences exists')
housekeeping_options.add_option("--check-duplicate-docnames", action='store_const', const='check-duplicate-docnames', dest='action', help='check for duplicate docnames associated with the same record')
housekeeping_options.add_option("--update-md5", action='store_const', const='update-md5', dest='action', help='update md5 checksum of files')
housekeeping_options.add_option("--fix-all", action='store_const', const='fix-all', dest='action', help='fix inconsistences in filesystem vs database vs MARC')
housekeeping_options.add_option("--fix-marc", action='store_const', const='fix-marc', dest='action', help='synchronize MARC after filesystem/database')
housekeeping_options.add_option("--fix-format", action='store_const', const='fix-format', dest='action', help='fix format related inconsistences')
housekeeping_options.add_option("--fix-duplicate-docnames", action='store_const', const='fix-duplicate-docnames', dest='action', help='fix duplicate docnames associated with the same record')
housekeeping_options.add_option("--fix-bibdocfsinfo-cache", action='store_const', const='fix-bibdocfsinfo-cache', dest='action', help='fix bibdocfsinfo cache related inconsistences')
parser.add_option_group(housekeeping_options)
experimental_options = OptionGroup(parser, 'Experimental options (do not expect to find them in the next release)')
experimental_options.add_option('--textify', dest='action', action='store_const', const='textify', help='extract text from matched documents and store it for later indexing')
experimental_options.add_option('--with-ocr', dest='perform_ocr', action='store_true', default=False, help='when used with --textify, wether to perform OCR')
parser.add_option_group(experimental_options)
parser.add_option('-D', '--debug', action='store_true', dest='debug', default=False)
parser.add_option('-H', '--human-readable', dest='human_readable', action='store_true', default=False, help='print sizes in human readable format (e.g., 1KB 234MB 2GB)')
parser.add_option('--yes-i-know', action='store_true', dest='yes-i-know', help='use with care!')
return parser
def print_info(docid, info):
"""Nicely print info about a docid."""
print('%i:%s' % (docid, info))
def bibupload_ffts(ffts, append=False, do_debug=False, interactive=True):
"""Given an ffts dictionary it creates the xml and submit it."""
xml = ffts_to_xml(ffts)
if xml:
if interactive:
print(xml)
tmp_file_fd, tmp_file_name = retry_mkstemp(suffix='.xml',
prefix="bibdocfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"),
directory=CFG_TMPSHAREDDIR)
os.write(tmp_file_fd, xml)
os.close(tmp_file_fd)
os.chmod(tmp_file_name, 0o644)
if append:
if interactive:
wait_for_user("This will be appended via BibUpload")
if do_debug:
task = task_low_level_submission('bibupload', 'bibdocfile', '-a', tmp_file_name, '-N', 'FFT', '-S2', '-v9')
else:
task = task_low_level_submission('bibupload', 'bibdocfile', '-a', tmp_file_name, '-N', 'FFT', '-S2')
if interactive:
print("BibUpload append submitted with id %s" % task)
else:
if interactive:
wait_for_user("This will be corrected via BibUpload")
if do_debug:
task = task_low_level_submission('bibupload', 'bibdocfile', '-c', tmp_file_name, '-N', 'FFT', '-S2', '-v9')
else:
task = task_low_level_submission('bibupload', 'bibdocfile', '-c', tmp_file_name, '-N', 'FFT', '-S2')
if interactive:
print("BibUpload correct submitted with id %s" % task)
elif interactive:
print("WARNING: no MARC to upload.", file=sys.stderr)
return True
def ranges2ids(parse_string):
"""Parse a string and return the intbitset of the corresponding ids."""
ids = intbitset()
ranges = parse_string.split(",")
for arange in ranges:
tmp_ids = arange.split("-")
if len(tmp_ids)==1:
ids.add(int(tmp_ids[0]))
else:
if int(tmp_ids[0]) > int(tmp_ids[1]): # sanity check
tmp = tmp_ids[0]
tmp_ids[0] = tmp_ids[1]
tmp_ids[1] = tmp
ids += xrange(int(tmp_ids[0]), int(tmp_ids[1]) + 1)
return ids
def cli_append(options, append_path):
"""Create a bibupload FFT task submission for appending a format."""
recid = cli2recid(options)
comment = cli2comment(options)
description = cli2description(options)
restriction = cli2restriction(options)
doctype = cli2doctype(options)
docname = cli2docname(options, url=append_path)
flags = cli2flags(options)
if not docname:
raise OptionValueError, 'Not enough information to retrieve a valid docname'
docformat = cli2format(options, append_path)
url = clean_url(append_path)
check_valid_url(url)
bibrecdocs = BibRecDocs(recid)
if bibrecdocs.has_docname_p(docname) and bibrecdocs.get_bibdoc(docname).format_already_exists_p(docformat):
new_docname = bibrecdocs.propose_unique_docname(docname)
wait_for_user("WARNING: a document with name %s and format %s already exists for recid %s. A new document with name %s will be created instead." % (repr(docname), repr(docformat), repr(recid), repr(new_docname)))
docname = new_docname
ffts = {recid: [{
'docname' : docname,
'comment' : comment,
'description' : description,
'restriction' : restriction,
'doctype' : doctype,
'format' : docformat,
'url' : url,
'options': flags
}]}
return bibupload_ffts(ffts, append=True)
def cli_revise(options, revise_path):
"""Create aq bibupload FFT task submission for appending a format."""
recid = cli2recid(options)
comment = cli2comment(options)
description = cli2description(options)
restriction = cli2restriction(options)
docname = cli2docname(options, url=revise_path)
hide_previous = getattr(options, 'hide_previous', None)
flags = cli2flags(options)
if hide_previous and 'PERFORM_HIDE_PREVIOUS' not in flags:
flags.append('PERFORM_HIDE_PREVIOUS')
if not docname:
raise OptionValueError, 'Not enough information to retrieve a valid docname'
docformat = cli2format(options, revise_path)
doctype = cli2doctype(options)
url = clean_url(revise_path)
new_docname = getattr(options, 'new_docname', None)
check_valid_url(url)
ffts = {recid : [{
'docname' : docname,
'new_docname' : new_docname,
'comment' : comment,
'description' : description,
'restriction' : restriction,
'doctype' : doctype,
'format' : docformat,
'url' : url,
'options' : flags
}]}
return bibupload_ffts(ffts)
def cli_set_batch(options):
"""Change in batch the doctype, description, comment and restriction."""
ffts = {}
doctype = getattr(options, 'set_doctype', None)
description = cli2description(options)
comment = cli2comment(options)
restriction = cli2restriction(options)
with_format = getattr(options, 'format', None)
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
docname = None
if bibdoc.bibrec_links:
# pick a sample recid from those to which a BibDoc is attached
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
fft = []
if description is not None or comment is not None:
for bibdocfile in bibdoc.list_latest_files():
docformat = bibdocfile.get_format()
if not with_format or with_format == format:
fft.append({
'docname': docname,
'restriction': restriction,
'comment': comment,
'description': description,
'format': docformat,
'doctype': doctype
})
else:
fft.append({
'docname': docname,
'restriction': restriction,
'doctype': doctype,
})
ffts[recid] = fft
return bibupload_ffts(ffts, append=False)
def cli_textify(options):
"""Extract text to let indexing on fulltext be possible."""
force = getattr(options, 'force', None)
perform_ocr = getattr(options, 'perform_ocr', None)
if perform_ocr:
if not can_perform_ocr():
print("WARNING: OCR requested but OCR is not possible", file=sys.stderr)
perform_ocr = False
if perform_ocr:
additional = ' using OCR (this might take some time)'
else:
additional = ''
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
print('Extracting text for docid %s%s...' % (docid, additional), end=' ')
sys.stdout.flush()
#pylint: disable=E1103
if force or (hasattr(bibdoc, "has_text") and not bibdoc.has_text(require_up_to_date=True)):
try:
#pylint: disable=E1103
bibdoc.extract_text(perform_ocr=perform_ocr)
print("DONE")
except InvenioBibDocFileError as e:
print("WARNING: %s" % e, file=sys.stderr)
else:
print("not needed")
def cli_rename(options):
"""Rename a docname within a recid."""
new_docname = getattr(options, 'new_docname', None)
docid = cli2docid(options)
bibdoc = BibDoc.create_instance(docid)
docname = None
if bibdoc.bibrec_links:
docname = bibdoc.bibrec_links[0]["docname"]
recid = cli2recid(options) # now we read the recid from options
ffts = {recid : [{'docname' : docname, 'new_docname' : new_docname}]}
return bibupload_ffts(ffts, append=False)
def cli_fix_bibdocfsinfo_cache(options):
"""Rebuild the bibdocfsinfo table according to what is available on filesystem"""
to_be_fixed = intbitset()
for docid in intbitset(run_sql("SELECT id FROM bibdoc")):
print("Fixing bibdocfsinfo table for docid %s..." % docid, end=' ')
sys.stdout.flush()
try:
bibdoc = BibDoc(docid)
except InvenioBibDocFileError as err:
print(err)
continue
try:
bibdoc._sync_to_db()
except Exception as err:
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
if recid:
to_be_fixed.add(recid)
print("ERROR: %s, scheduling a fix for recid %s" % (err, recid))
else:
print("ERROR %s" % (err, ))
print("DONE")
if to_be_fixed:
cli_fix_format(options, recids=to_be_fixed)
print("You can now add CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE=1 to your invenio-local.conf file.")
def cli_fix_all(options):
"""Fix all the records of a recid_set."""
ffts = {}
for recid in cli_recids_iterator(options):
ffts[recid] = []
for docname in BibRecDocs(recid).get_bibdoc_names():
ffts[recid].append({'docname' : docname, 'doctype' : 'FIX-ALL'})
return bibupload_ffts(ffts, append=False)
def cli_fix_marc(options, explicit_recid_set=None, interactive=True):
"""Fix all the records of a recid_set."""
ffts = {}
if explicit_recid_set is not None:
for recid in explicit_recid_set:
ffts[recid] = [{'doctype' : 'FIX-MARC'}]
else:
for recid in cli_recids_iterator(options):
ffts[recid] = [{'doctype' : 'FIX-MARC'}]
return bibupload_ffts(ffts, append=False, interactive=interactive)
def cli_check_format(options):
"""Check if any format-related inconsistences exists."""
count = 0
tot = 0
duplicate = False
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if not bibrecdocs.check_duplicate_docnames():
print("recid %s has duplicate docnames!" % recid, file=sys.stderr)
broken = True
duplicate = True
else:
broken = False
for docname in bibrecdocs.get_bibdoc_names():
if not bibrecdocs.check_format(docname):
print("recid %s with docname %s need format fixing" % (recid, docname), file=sys.stderr)
broken = True
if broken:
count += 1
if count:
result = "%d out of %d records need their formats to be fixed." % (count, tot)
else:
result = "All records appear to be correct with respect to formats."
if duplicate:
result += " Note however that at least one record appear to have duplicate docnames. You should better fix this situation by using --fix-duplicate-docnames."
print(wrap_text_in_a_box(result, style="conclusion"))
return not(duplicate or count)
def cli_check_duplicate_docnames(options):
"""Check if some record is connected with bibdoc having the same docnames."""
count = 0
tot = 0
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if not bibrecdocs.check_duplicate_docnames():
count += 1
print("recid %s has duplicate docnames!" % recid, file=sys.stderr)
if count:
print("%d out of %d records have duplicate docnames." % (count, tot))
return False
else:
print("All records appear to be correct with respect to duplicate docnames.")
return True
def cli_fix_format(options, recids=None):
"""Fix format-related inconsistences."""
fixed = intbitset()
tot = 0
if not recids:
recids = cli_recids_iterator(options)
for recid in recids:
tot += 1
bibrecdocs = BibRecDocs(recid)
for docname in bibrecdocs.get_bibdoc_names():
if not bibrecdocs.check_format(docname):
if bibrecdocs.fix_format(docname, skip_check=True):
print("%i has been fixed for docname %s" % (recid, docname), file=sys.stderr)
else:
print("%i has been fixed for docname %s. However note that a new bibdoc might have been created." % (recid, docname), file=sys.stderr)
fixed.add(recid)
if fixed:
print("Now we need to synchronize MARC to reflect current changes.")
cli_fix_marc(options, explicit_recid_set=fixed)
print(wrap_text_in_a_box("%i out of %i record needed to be fixed." % (tot, len(fixed)), style="conclusion"))
return not fixed
def cli_fix_duplicate_docnames(options):
"""Fix duplicate docnames."""
fixed = intbitset()
tot = 0
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if not bibrecdocs.check_duplicate_docnames():
bibrecdocs.fix_duplicate_docnames(skip_check=True)
print("%i has been fixed for duplicate docnames." % recid, file=sys.stderr)
fixed.add(recid)
if fixed:
print("Now we need to synchronize MARC to reflect current changes.")
cli_fix_marc(options, explicit_recid_set=fixed)
print(wrap_text_in_a_box("%i out of %i record needed to be fixed." % (len(fixed), tot), style="conclusion"))
return not fixed
def cli_delete(options):
"""Delete the given docid_set."""
ffts = {}
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
docname = None
recid = None
# retrieve the 1st recid
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
ffts.setdefault(recid, []).append(
{'docname' : docname,
'doctype' : 'DELETE'}
)
return bibupload_ffts(ffts)
def cli_delete_file(options):
"""Delete the given file irreversibely."""
docid = cli2docid(options)
recid = cli2recid(options, docids=intbitset([docid]))
docformat = cli2format(options)
bdr = BibRecDocs(recid)
docname = bdr.get_docname(docid)
version = getattr(options, 'version', None)
try:
version_int = int(version)
if 0 >= version_int:
raise ValueError
except:
raise OptionValueError, 'when hard-deleting, version should be valid positive integer, not %s' % version
ffts = {recid : [{'docname' : docname, 'version' : version, 'format' : docformat, 'doctype' : 'DELETE-FILE'}]}
return bibupload_ffts(ffts)
def cli_revert(options):
"""Revert a bibdoc to a given version."""
docid = cli2docid(options)
recid = cli2recid(options, docids=intbitset([docid]))
bdr = BibRecDocs(recid)
docname = bdr.get_docname(docid)
version = getattr(options, 'version', None)
try:
version_int = int(version)
if 0 >= version_int:
raise ValueError
except:
raise OptionValueError, 'when reverting, version should be valid positive integer, not %s' % version
ffts = {recid : [{'docname' : docname, 'version' : version, 'doctype' : 'REVERT'}]}
return bibupload_ffts(ffts)
def cli_undelete(options):
"""Delete the given docname"""
docname = cli2docname(options)
restriction = getattr(options, 'restriction', None)
count = 0
if not docname:
docname = 'DELETED-*-*'
if not docname.startswith('DELETED-'):
docname = 'DELETED-*-' + docname
to_be_undeleted = intbitset()
fix_marc = intbitset()
setattr(options, 'deleted_docs', 'only')
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
dnold = None
if bibdoc.bibrec_links:
dnold = bibdoc.bibrec_links[0]["docname"]
if bibdoc.get_status() == 'DELETED' and fnmatch.fnmatch(dnold, docname):
to_be_undeleted.add(docid)
# get the 1st recid to which the document is attached
recid = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
fix_marc.add(recid)
count += 1
print('%s (docid %s from recid %s) will be undeleted to restriction: %s' % (dnold, docid, recid, restriction))
wait_for_user("I'll proceed with the undeletion")
for docid in to_be_undeleted:
bibdoc = BibDoc.create_instance(docid)
bibdoc.undelete(restriction)
cli_fix_marc(options, explicit_recid_set=fix_marc)
print(wrap_text_in_a_box("%s bibdoc successfuly undeleted with status '%s'" % (count, restriction), style="conclusion"))
def cli_get_info(options):
"""Print all the info of the matched docids or recids."""
debug('Getting info!')
human_readable = bool(getattr(options, 'human_readable', None))
debug('human_readable: %s' % human_readable)
deleted_docs = getattr(options, 'deleted_docs', None) in ('yes', 'only')
debug('deleted_docs: %s' % deleted_docs)
if getattr(options, 'docids', None):
for docid in cli_docids_iterator(options):
sys.stdout.write(str(BibDoc.create_instance(docid, human_readable=human_readable)))
else:
for recid in cli_recids_iterator(options):
sys.stdout.write(str(BibRecDocs(recid, deleted_too=deleted_docs, human_readable=human_readable)))
def cli_purge(options):
"""Purge the matched docids."""
ffts = {}
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
docname = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
if recid:
if recid not in ffts:
ffts[recid] = []
ffts[recid].append({
'docname' : docname,
'doctype' : 'PURGE',
})
return bibupload_ffts(ffts)
def cli_expunge(options):
"""Expunge the matched docids."""
ffts = {}
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
docname = None
if bibdoc.bibrec_links:
#TODO: If we have a syntax for manipulating completely standalone objects,
# this has to be modified
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
if recid:
if recid not in ffts:
ffts[recid] = []
ffts[recid].append({
'docname' : docname,
'doctype' : 'EXPUNGE',
})
return bibupload_ffts(ffts)
def cli_get_history(options):
"""Print the history of a docid_set."""
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
history = bibdoc.get_history()
for row in history:
print_info(docid, row)
def cli_get_disk_usage(options):
"""Print the space usage of a docid_set."""
human_readable = getattr(options, 'human_readable', None)
total_size = 0
total_latest_size = 0
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
size = bibdoc.get_total_size()
total_size += size
latest_size = bibdoc.get_total_size_latest_version()
total_latest_size += latest_size
if human_readable:
print_info(docid, 'size=%s' % nice_size(size))
print_info(docid, 'latest version size=%s' % nice_size(latest_size))
else:
print_info(docid, 'size=%s' % size)
print_info( docid, 'latest version size=%s' % latest_size)
if human_readable:
print(wrap_text_in_a_box('total size: %s\n\nlatest version total size: %s'
% (nice_size(total_size), nice_size(total_latest_size)),
style='conclusion'))
else:
print(wrap_text_in_a_box('total size: %s\n\nlatest version total size: %s'
% (total_size, total_latest_size),
style='conclusion'))
def cli_check_md5(options):
"""Check the md5 sums of a docid_set."""
failures = 0
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
if bibdoc.md5s.check():
print_info(docid, 'checksum OK')
else:
for afile in bibdoc.list_all_files():
if not afile.check():
failures += 1
print_info(docid, '%s failing checksum!' % afile.get_full_path())
if failures:
print(wrap_text_in_a_box('%i files failing' % failures , style='conclusion'))
else:
print(wrap_text_in_a_box('All files are correct', style='conclusion'))
def cli_update_md5(options):
"""Update the md5 sums of a docid_set."""
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
if bibdoc.md5s.check():
print_info(docid, 'checksum OK')
else:
for afile in bibdoc.list_all_files():
if not afile.check():
print_info(docid, '%s failing checksum!' % afile.get_full_path())
wait_for_user('Updating the md5s of this document can hide real problems.')
bibdoc.md5s.update(only_new=False)
bibdoc._sync_to_db()
def cli_hide(options):
"""Hide the matched versions of documents."""
documents_to_be_hidden = {}
to_be_fixed = intbitset()
versions = getattr(options, 'version', 'all')
if versions != 'all':
try:
versions = ranges2ids(versions)
except:
raise OptionValueError, 'You should specify correct versions. Not %s' % versions
else:
versions = intbitset(trailing_bits=True)
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
if recid:
for bibdocfile in bibdoc.list_all_files():
this_version = bibdocfile.get_version()
this_format = bibdocfile.get_format()
if this_version in versions:
if docid not in documents_to_be_hidden:
documents_to_be_hidden[docid] = []
documents_to_be_hidden[docid].append((this_version, this_format))
to_be_fixed.add(recid)
print('%s (docid: %s, recid: %s) will be hidden' % (bibdocfile.get_full_name(), docid, recid))
wait_for_user('Proceeding to hide the matched documents...')
for docid, documents in iteritems(documents_to_be_hidden):
bibdoc = BibDoc.create_instance(docid)
for version, docformat in documents:
bibdoc.set_flag('HIDDEN', docformat, version)
return cli_fix_marc(options, to_be_fixed)
def cli_unhide(options):
"""Unhide the matched versions of documents."""
documents_to_be_unhidden = {}
to_be_fixed = intbitset()
versions = getattr(options, 'version', 'all')
if versions != 'all':
try:
versions = ranges2ids(versions)
except:
raise OptionValueError, 'You should specify correct versions. Not %s' % versions
else:
versions = intbitset(trailing_bits=True)
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
if recid:
for bibdocfile in bibdoc.list_all_files():
this_version = bibdocfile.get_version()
this_format = bibdocfile.get_format()
if this_version in versions:
if docid not in documents_to_be_unhidden:
documents_to_be_unhidden[docid] = []
documents_to_be_unhidden[docid].append((this_version, this_format))
to_be_fixed.add(recid)
print('%s (docid: %s, recid: %s) will be unhidden' % (bibdocfile.get_full_name(), docid, recid))
wait_for_user('Proceeding to unhide the matched documents...')
for docid, documents in iteritems(documents_to_be_unhidden):
bibdoc = BibDoc.create_instance(docid)
for version, docformat in documents:
bibdoc.unset_flag('HIDDEN', docformat, version)
return cli_fix_marc(options, to_be_fixed)
@with_app_context()
def main():
parser = prepare_option_parser()
(options, args) = parser.parse_args()
if getattr(options, 'debug', None):
getLogger().setLevel(DEBUG)
debug('test')
debug('options: %s, args: %s' % (options, args))
try:
if not getattr(options, 'action', None) and \
not getattr(options, 'append_path', None) and \
not getattr(options, 'revise_path', None):
if getattr(options, 'set_doctype', None) is not None or \
getattr(options, 'set_comment', None) is not None or \
getattr(options, 'set_description', None) is not None or \
getattr(options, 'set_restriction', None) is not None:
cli_set_batch(options)
elif getattr(options, 'new_docname', None):
cli_rename(options)
else:
print("ERROR: no action specified", file=sys.stderr)
sys.exit(1)
elif getattr(options, 'append_path', None):
options.empty_recs = 'yes'
options.empty_docs = 'yes'
cli_append(options, getattr(options, 'append_path', None))
elif getattr(options, 'revise_path', None):
cli_revise(options, getattr(options, 'revise_path', None))
elif options.action == 'textify':
cli_textify(options)
elif getattr(options, 'action', None) == 'get-history':
cli_get_history(options)
elif getattr(options, 'action', None) == 'get-info':
cli_get_info(options)
elif getattr(options, 'action', None) == 'get-disk-usage':
cli_get_disk_usage(options)
elif getattr(options, 'action', None) == 'check-md5':
cli_check_md5(options)
elif getattr(options, 'action', None) == 'update-md5':
cli_update_md5(options)
elif getattr(options, 'action', None) == 'fix-all':
cli_fix_all(options)
elif getattr(options, 'action', None) == 'fix-marc':
cli_fix_marc(options)
elif getattr(options, 'action', None) == 'delete':
cli_delete(options)
elif getattr(options, 'action', None) == 'hard-delete':
cli_delete_file(options)
elif getattr(options, 'action', None) == 'fix-duplicate-docnames':
cli_fix_duplicate_docnames(options)
elif getattr(options, 'action', None) == 'fix-format':
cli_fix_format(options)
elif getattr(options, 'action', None) == 'check-duplicate-docnames':
cli_check_duplicate_docnames(options)
elif getattr(options, 'action', None) == 'check-format':
cli_check_format(options)
elif getattr(options, 'action', None) == 'undelete':
cli_undelete(options)
elif getattr(options, 'action', None) == 'purge':
cli_purge(options)
elif getattr(options, 'action', None) == 'expunge':
cli_expunge(options)
elif getattr(options, 'action', None) == 'revert':
cli_revert(options)
elif getattr(options, 'action', None) == 'hide':
cli_hide(options)
elif getattr(options, 'action', None) == 'unhide':
cli_unhide(options)
elif getattr(options, 'action', None) == 'fix-bibdocfsinfo-cache':
options.empty_docs = 'yes'
cli_fix_bibdocfsinfo_cache(options)
elif getattr(options, 'action', None) == 'get-stats':
cli_get_stats(options)
else:
print("ERROR: Action %s is not valid" % getattr(options, 'action', None), file=sys.stderr)
sys.exit(1)
except Exception as e:
register_exception()
print('ERROR: %s' % e, file=sys.stderr)
sys.exit(1)

Event Timeline