raise EmailError(g._('The system is not attempting to send an email from %s, to %s, with body %s.') % (fromaddr, toaddr, body))
except EmailError:
register_exception()
return False
sent = False
while not sent and attempt_times > 0:
try:
sent = body.send()
except Exception:
register_exception()
if debug_level > 1:
try:
raise EmailError(g._('Error in sending message. Waiting %s seconds. Exception is %s, while sending email from %s to %s with body %s.') % (attempt_sleeptime, sys.exc_info()[0], fromaddr, toaddr, body))
except EmailError:
register_exception()
if not sent:
attempt_times -= 1
if attempt_times > 0: # sleep only if we shall retry again
sleep(attempt_sleeptime)
if not sent:
try:
raise EmailError(g._('Error in sending email from %s to %s with body %s.') % (fromaddr, toaddr, body))
- Return a sentence describing when this exception was already seen.
- """
- exc_log = run_sql("SELECT last_notified,last_seen,total FROM hstEXCEPTION WHERE name=%s AND filename=%s AND line=%s", (name, filename, line))
- if exc_log:
- last_notified, last_seen, total = exc_log[0]
- return "This exception has already been seen %s times\n last time it was seen: %s\n last time it was notified: %s\n" % (total, last_seen.strftime("%Y-%m-%d %H:%M:%S"), last_notified.strftime("%Y-%m-%d %H:%M:%S"))
- else:
- return "It is the first time this exception has been seen.\n"
def register_exception(stream='error',
req=None,
prefix='',
suffix='',
alert_admin=False,
subject=''):
"""
Log error exception to invenio.err and warning exception to invenio.log.
Errors will be logged together with client information (if req is
given).
Note: For sanity reasons, dynamic params such as PREFIX, SUFFIX and
local stack variables are checked for length, and only first 500
chars of their values are printed.
@param stream: 'error' or 'warning'
@param req: mod_python request
@param prefix: a message to be printed before the exception in
the log
@param suffix: a message to be printed before the exception in
the log
@param alert_admin: wethever to send the exception to the administrator via
email. Note this parameter is bypassed when
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS is set to a value different than 1
@param subject: overrides the email subject
@return: 1 if successfully wrote to stream, 0 if not
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos,
ln=ln)
elif loan_id:
infos.append('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on a loan. Cannot be checked out.' % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos,
ln=ln)
elif user_id is None:
infos.append(_('You must select one borrower.'))
body = bc_templates.tmpl_loan_on_desk_step1(result=None,
if item_status != CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF:
message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s status is %(x_strong_tag_open)s%(x_status)s%(x_strong_tag_close)s") % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>', 'x_status': item_status}
infos.append(message)
if CFG_CERN_SITE:
library_type = db.get_library_type(library_id)
if library_type != CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN:
library_name = db.get_library_name(library_id)
message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s location is %(x_strong_tag_open)s%(x_location)s%(x_strong_tag_close)s") % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>', 'x_location': library_name}
infos.append(message)
if len(queue) != 0 and queue[0][0] != user_id:
message = _("Another user is waiting for the book: %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s. \n\n If you want continue with this loan choose %(x_strong_tag_open)s[Continue]%(x_strong_tag_close)s.") % {'x_title': book_title_from_MARC(recid), 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
body = bc_templates.tmpl_loan_on_desk_step3(user_id=user_id,
message = _("The items with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s are already on loan.") % {'x_barcode': on_loan, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
body = bc_templates.tmpl_loan_on_desk_step1(result=None, key='',
string='', infos=infos,
ln=ln)
return page(title=_("Loan on desk"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
# validate the period of interest given by the admin
for date in due_date:
if validate_date_format(date) is False:
infos = []
message = _("The given due date %(x_strong_tag_open)s%(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': date, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
list_of_books = []
for bc in list_of_barcodes:
recid = db.get_id_bibrec(bc)
(library_id, location) = db.get_lib_location(bc)
tup = (recid, bc, library_id, location)
list_of_books.append(tup)
body = bc_templates.tmpl_loan_on_desk_step3(user_id=user_id,
infos.append(_("A loan for the item %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s, with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s, has been registered with success.") % {'x_title': book_title_from_MARC(recid), 'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
infos.append(_("You could enter the barcode for this user's next loan, if any."))
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
body = bc_templates.tmpl_create_loan(request_id=request_id,
recid=recid,
borrower=borrower,
infos=infos,
ln=ln)
elif loan_id:
infos.append(_('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
borrower = db.get_borrower_details(borrower_id)
title = _("Create Loan")
body = bc_templates.tmpl_create_loan(request_id=request_id,
recid=recid,
borrower=borrower,
infos=infos,
ln=ln)
elif barcode not in list_of_barcodes:
infos.append(_('The given barcode "%(x_barcode)s" does not correspond to requested item.') % {'x_barcode': barcode})
borrower = db.get_borrower_details(borrower_id)
title = _("Create Loan")
body = bc_templates.tmpl_create_loan(request_id=request_id,
infos.append('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.' % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_loan_return(infos=infos, ln=ln)
elif loan_id is None:
message = _("The item the with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is not on loan. Please, try again.") % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
body = bc_templates.tmpl_loan_return(infos=infos, ln=ln)
else:
library_id = db.get_item_info(barcode)[1]
if CFG_CERN_SITE:
library_type = db.get_library_type(library_id)
if library_type != CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN:
library_name = db.get_library_name(library_id)
message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s location is %(x_strong_tag_open)s%(x_location)s%(x_strong_tag_close)s") % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>', 'x_location': library_name}
# validate the period of interest given by the admin
if validate_date_format(period_from) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_from, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_place_new_request_step2(barcode=barcode,
recid=recid,
user_info=user_info,
infos=infos,
ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
elif validate_date_format(period_to) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_to, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_place_new_request_step2(barcode=barcode,
infos.append(_("Item with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is already on loan.") % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower,
infos=infos,
ln=ln)
elif loan_id:
infos.append(_('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
borrower = db.get_borrower_details(borrower_id)
title = _("New loan")
body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower,
message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
message += '\n\n'
message += _("Do you want renew this loan anyway?")
infos.append(_("Item <strong>[%s]</strong> updated, but the <strong>barcode was not modified</strong> because it is already in use.") % (old_barcode))
else:
if db.update_barcode(old_barcode, barcode):
infos.append(_("Item <strong>[%s]</strong> updated to <strong>[%s]</strong> with success.") % (old_barcode, barcode))
else:
infos.append(_("Item <strong>[%s]</strong> updated, but the <strong>barcode was not modified</strong> because it was not found (!?).") % (old_barcode))
message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
message += '\n\n'
message += _("Do you want renew this loan anyway?")
message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
message += '\n\n'
message += _("Do you want renew this loan anyway?")
if validate_date_format(period_of_interest_from) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_request_with_no_recid_step1(
infos=infos,
borrower_id=None,
admin=True,
ln=ln)
elif validate_date_format(period_of_interest_to) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_request_with_no_recid_step1(
if validate_date_format(period_of_interest_from) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos,
ln=ln)
elif validate_date_format(period_of_interest_to) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos,
Returns the dictionary of all bibdocs object belonging to a recid.
Keys in the dictionary are names of documetns and values are BibDoc objects.
If C{doctype} is set, it returns just the bibdocs of that doctype.
@param doctype: the optional doctype.
@type doctype: string
@return: the dictionary of bibdocs.
@rtype: dictionary of Dcname -> BibDoc
"""
if not doctype:
return dict((k,v) for (k,(v,_)) in self.bibdocs.iteritems())
res = {}
for (docname, (doc, attachmenttype)) in self.bibdocs.iteritems():
if attachmenttype == doctype:
res[docname] = doc
return res
def list_bibdocs(self, doctype=None):
"""
Returns the list all bibdocs object belonging to a recid.
If C{doctype} is set, it returns just the bibdocs of that doctype.
@param doctype: the optional doctype.
@type doctype: string
@return: the list of bibdocs.
@rtype: list of BibDoc
"""
if not doctype:
return [d for (d,_) in self.bibdocs.values()]
else:
return [bibdoc for (bibdoc, attype) in self.bibdocs.values() if doctype == attype]
def get_bibdoc_names(self, doctype=None):
"""
Returns all the names of the documents associated with the bibrec.
If C{doctype} is set, restrict the result to all the matching doctype.
@param doctype: the optional doctype.
@type doctype: string
@return: the list of document names.
@rtype: list of string
"""
return [docname for (docname, dummy) in self.list_bibdocs_by_names(doctype).items()]
def check_file_exists(self, path, f_format):
"""
Check if a file with the same content of the file pointed in C{path}
is already attached to this record.
@param path: the file to be checked against.
@type path: string
@return: True if a file with the requested content is already attached
to the record.
@rtype: bool
"""
size = os.path.getsize(path)
# Let's consider all the latest files
files = self.list_latest_files()
# Let's consider all the latest files with same size
potential = [afile for afile in files if afile.get_size() == size and afile.format == f_format]
if potential:
checksum = calculate_md5(path)
# Let's consider all the latest files with the same size and the
# same checksum
potential = [afile for afile in potential if afile.get_checksum() == checksum]
if potential:
potential = [afile for afile in potential if \
filecmp.cmp(afile.get_full_path(), path)]
if potential:
return True
else:
# Gosh! How unlucky, same size, same checksum but not same
# content!
pass
return False
def propose_unique_docname(self, docname):
"""
Given C{docname}, return a new docname that is not already attached to
the record.
@param docname: the reference docname.
@type docname: string
@return: a docname not already attached.
@rtype: string
"""
docname = normalize_docname(docname)
goodname = docname
i = 1
while goodname in self.get_bibdoc_names():
i += 1
goodname = "%s_%s" % (docname, i)
return goodname
def merge_bibdocs(self, docname1, docname2):
"""
This method merge C{docname2} into C{docname1}.
1. Given all the formats of the latest version of the files
attached to C{docname2}, these files are added as new formats
into C{docname1}.
2. C{docname2} is marked as deleted.
@raise InvenioBibDocFileError: if at least one format in C{docname2}
already exists in C{docname1}. (In this case the two bibdocs are
preserved)
@note: comments and descriptions are also copied.
@note: if C{docname2} has a I{restriction}(i.e. if the I{status} is
set) and C{docname1} doesn't, the restriction is imported.
"""
bibdoc1 = self.get_bibdoc(docname1)
bibdoc2 = self.get_bibdoc(docname2)
## Check for possibility
for bibdocfile in bibdoc2.list_latest_files():
docformat = bibdocfile.get_format()
if bibdoc1.format_already_exists_p(docformat):
raise InvenioBibDocFileError('Format %s already exists in bibdoc %s of record %s. It\'s impossible to merge bibdoc %s into it.' % (docformat, docname1, self.id, docname2))
Algorithm that transform a broken/old bibdoc into a coherent one.
Think of it as being the fsck of BibDocs.
- All the files in the bibdoc directory will be renamed according
to the document name. Proper .recid, .type, .md5 files will be
created/updated.
- In case of more than one file with the same format version a new
bibdoc will be created in order to put does files.
@param docname: the document name that need to be fixed.
@type docname: string
@return: the list of newly created bibdocs if any.
@rtype: list of BibDoc
@raise InvenioBibDocFileError: in case of issues that can not be
fixed automatically.
"""
bibdoc = self.get_bibdoc(docname)
versions = {}
res = []
new_bibdocs = [] # List of files with the same version/format of
# existing file which need new bibdoc.
counter = 0
zero_version_bug = False
if os.path.exists(bibdoc.basedir):
for filename in os.listdir(bibdoc.basedir):
if filename[0] != '.' and ';' in filename:
name, version = filename.split(';')
try:
version = int(version)
except ValueError:
# Strange name
register_exception()
raise InvenioBibDocFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
raise InvenioBibDocFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e)
if versions[version].has_key(docformat):
new_bibdocs.append((new_name, version))
else:
versions[version][docformat] = new_name
counter += 1
elif filename[0] != '.':
# Strange name
register_exception()
raise InvenioBibDocFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
return (res and [x[0] for x in res]) or [] # after migrating to python 2.6, can be rewritten using x if y else z syntax: return [x[0] for x in res] if res else []
def _database_get_namespaces(self):
"""Read the database to discover namespaces declared in a given MoreInfo"""
if cli_slow_match_single_docid(options, docid, recids):
yield docid
raise StopIteration
def cli_get_stats(dummy):
"""Print per every collection some stats"""
def print_table(title, table):
if table:
print "=" * 20, title, "=" * 20
for row in table:
print "\t".join(str(elem) for elem in row)
for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"):
print "-" * 79
print "Statistic for: %s " % collection
reclist = intbitset(reclist)
if reclist:
sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')'
print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
print_table("Mimetypes", run_sql("SELECT COUNT(mime) as c, mime FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY mime ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
print_table("Sizes", run_sql("SELECT SUM(filesize) AS c FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true" % sqlreclist)) # kwalitee: disable=sql
class OptionParserSpecial(OptionParser):
def format_help(self, *args, **kwargs):
result = OptionParser.format_help(self, *args, **kwargs)
#epilog="""With <query> you select the range of record/docnames/single files to work on. Note that some actions e.g. delete, append, revise etc. works at the docname level, while others like --set-comment, --set-description, at single file level and other can be applied in an iterative way to many records in a single run. Note that specifing docid(2) takes precedence over recid(2) which in turns takes precedence over pattern/collection search.""",
query_options.add_option('-a', '--all', action='store_true', dest='all', help='Select all the records')
query_options.add_option("--with-deleted-recs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_recs", help="'Yes' to also match deleted records, 'no' to exclude them, 'only' to match only deleted ones", metavar="yes/no/only", default='no')
query_options.add_option("--with-deleted-docs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_docs", help="'Yes' to also match deleted documents, 'no' to exclude them, 'only' to match only deleted ones (e.g. for undeletion)", metavar="yes/no/only", default='no')
query_options.add_option("--with-empty-recs", choices=['yes', 'no', 'only'], type="choice", dest="empty_recs", help="'Yes' to also match records without attached documents, 'no' to exclude them, 'only' to consider only such records (e.g. for statistics)", metavar="yes/no/only", default='no')
query_options.add_option("--with-empty-docs", choices=['yes', 'no', 'only'], type="choice", dest="empty_docs", help="'Yes' to also match documents without attached files, 'no' to exclude them, 'only' to consider only such documents (e.g. for sanity checking)", metavar="yes/no/only", default='no')
query_options.add_option("--with-record-modification-date", action="callback", callback=_date_range_callback, dest="md_rec", nargs=1, type="string", default=(None, None), help="matches records modified date1 and date2; dates can be expressed relatively, e.g.:\"-5m,2030-2-23 04:40\" # matches records modified since 5 minutes ago until the 2030...", metavar="date1,date2")
query_options.add_option("--with-record-creation-date", action="callback", callback=_date_range_callback, dest="cd_rec", nargs=1, type="string", default=(None, None), help="matches records created between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--with-document-modification-date", action="callback", callback=_date_range_callback, dest="md_doc", nargs=1, type="string", default=(None, None), help="matches documents modified between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--with-document-creation-date", action="callback", callback=_date_range_callback, dest="cd_doc", nargs=1, type="string", default=(None, None), help="matches documents created between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--url", dest="url", help='matches the document referred by the URL, e.g. "%s/%s/1/files/foobar.pdf?version=2"' % (CFG_SITE_URL, CFG_SITE_RECORD))
query_options.add_option("--path", dest="path", help='matches the document referred by the internal filesystem path, e.g. %s/g0/1/foobar.pdf\\;1' % CFG_BIBDOCFILE_FILEDIR)
query_options.add_option("--with-docname", dest="docname", help='matches documents with the given docname (accept wildcards)')
query_options.add_option("--with-doctype", dest="doctype", help='matches documents with the given doctype')
query_options.add_option('-p', '--pattern', dest='pattern', help='matches records by pattern')
query_options.add_option('-c', '--collection', dest='collection', help='matches records by collection')
query_options.add_option('--force', dest='force', help='force an action even when it\'s not necessary e.g. textify on an already textified bibdoc.', action='store_true', default=False)
parser.add_option_group(query_options)
getting_information_options = OptionGroup(parser, 'Actions for getting information')
getting_information_options.add_option('--get-info', dest='action', action='store_const', const='get-info', help='print all the informations about the matched record/documents')
getting_information_options.add_option('--get-disk-usage', dest='action', action='store_const', const='get-disk-usage', help='print disk usage statistics of the matched documents')
getting_information_options.add_option('--get-history', dest='action', action='store_const', const='get-history', help='print the matched documents history')
getting_information_options.add_option('--get-stats', dest='action', action='store_const', const='get-stats', help='print some statistics of file properties grouped by collections')
revising_options = OptionGroup(parser, 'Action for revising content')
revising_options.add_option("--append", dest='append_path', help='specify the URL/path of the file that will appended to the bibdoc (implies --with-empty-recs=yes)', metavar='PATH/URL')
revising_options.add_option("--revise", dest='revise_path', help='specify the URL/path of the file that will revise the bibdoc', metavar='PATH/URL')
revising_options.add_option("--revert", dest='action', action='store_const', const='revert', help='reverts a document to the specified version')
revising_options.add_option("--delete", action='store_const', const='delete', dest='action', help='soft-delete the matched documents')
revising_options.add_option("--hard-delete", action='store_const', const='hard-delete', dest='action', help='hard-delete the single matched document with a specific format and a specific revision (this operation is not revertible)')
revising_options.add_option("--purge", action='store_const', const='purge', dest='action', help='purge (i.e. hard-delete any format of any version prior to the latest version of) the matched documents')
revising_options.add_option("--expunge", action='store_const', const='expunge', dest='action', help='expunge (i.e. hard-delete any version and formats of) the matched documents')
revising_options.add_option("--with-version", dest="version", help="specifies the version(s) to be used with hide, unhide, e.g.: 1-2,3 or ALL. Specifies the version to be used with hard-delete and revert, e.g. 2")
revising_options.add_option("--with-format", dest="format", help='to specify a format when appending/revising/deleting/reverting a document, e.g. "pdf"', metavar='FORMAT')
revising_options.add_option("--with-flags", dest='flags', help='comma-separated optional list of flags used when appending/revising a document. Valid flags are: %s' % ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS), default=None)
parser.add_option_group(revising_options)
housekeeping_options = OptionGroup(parser, 'Actions for housekeeping')
housekeeping_options.add_option("--check-md5", action='store_const', const='check-md5', dest='action', help='check md5 checksum validity of files')
housekeeping_options.add_option("--check-format", action='store_const', const='check-format', dest='action', help='check if any format-related inconsistences exists')
housekeeping_options.add_option("--check-duplicate-docnames", action='store_const', const='check-duplicate-docnames', dest='action', help='check for duplicate docnames associated with the same record')
housekeeping_options.add_option("--update-md5", action='store_const', const='update-md5', dest='action', help='update md5 checksum of files')
housekeeping_options.add_option("--fix-all", action='store_const', const='fix-all', dest='action', help='fix inconsistences in filesystem vs database vs MARC')
housekeeping_options.add_option("--fix-marc", action='store_const', const='fix-marc', dest='action', help='synchronize MARC after filesystem/database')
housekeeping_options.add_option("--fix-format", action='store_const', const='fix-format', dest='action', help='fix format related inconsistences')
housekeeping_options.add_option("--fix-duplicate-docnames", action='store_const', const='fix-duplicate-docnames', dest='action', help='fix duplicate docnames associated with the same record')
housekeeping_options.add_option("--fix-bibdocfsinfo-cache", action='store_const', const='fix-bibdocfsinfo-cache', dest='action', help='fix bibdocfsinfo cache related inconsistences')
parser.add_option_group(housekeeping_options)
experimental_options = OptionGroup(parser, 'Experimental options (do not expect to find them in the next release)')
experimental_options.add_option('--textify', dest='action', action='store_const', const='textify', help='extract text from matched documents and store it for later indexing')
experimental_options.add_option('--with-ocr', dest='perform_ocr', action='store_true', default=False, help='when used with --textify, wether to perform OCR')
parser.add_option('-H', '--human-readable', dest='human_readable', action='store_true', default=False, help='print sizes in human readable format (e.g., 1KB 234MB 2GB)')
parser.add_option('--yes-i-know', action='store_true', dest='yes-i-know', help='use with care!')
wait_for_user("WARNING: a document with name %s and format %s already exists for recid %s. A new document with name %s will be created instead." % (repr(docname), repr(docformat), repr(recid), repr(new_docname)))
docname = new_docname
ffts = {recid: [{
'docname' : docname,
'comment' : comment,
'description' : description,
'restriction' : restriction,
'doctype' : doctype,
'format' : docformat,
'url' : url,
'options': flags
}]}
return bibupload_ffts(ffts, append=True)
def cli_revise(options, revise_path):
"""Create aq bibupload FFT task submission for appending a format."""
"""Check if any format-related inconsistences exists."""
count = 0
tot = 0
duplicate = False
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if not bibrecdocs.check_duplicate_docnames():
print >> sys.stderr, "recid %s has duplicate docnames!"
broken = True
duplicate = True
else:
broken = False
for docname in bibrecdocs.get_bibdoc_names():
if not bibrecdocs.check_format(docname):
print >> sys.stderr, "recid %s with docname %s need format fixing" % (recid, docname)
broken = True
if broken:
count += 1
if count:
result = "%d out of %d records need their formats to be fixed." % (count, tot)
else:
result = "All records appear to be correct with respect to formats."
if duplicate:
result += " Note however that at least one record appear to have duplicate docnames. You should better fix this situation by using --fix-duplicate-docnames."
print wrap_text_in_a_box('NOTE: it\'s impossible to create the log:\n\n %s\n\nbecause of:\n\n %s\n\nPlease run this migration kit as the same user who runs Invenio (e.g. Apache)' % (logfilename, e), style='conclusion', break_long=False)
sys.exit(1)
bibdoc_bibdoc = retrieve_bibdoc_bibdoc()
print wrap_text_in_a_box ("""This script migrate the filesystem structure used to store icons files to the new stricter structure.
This script must not be run during normal Invenio operations.
It is safe to run this script. No file will be deleted.
Anyway it is recommended to run a backup of the filesystem structure just in case.
A backup of the database tables involved will be automatically performed.""", style='important')
if not bibdoc_bibdoc:
print wrap_text_in_a_box("No need for migration", style='conclusion')
return
print "%s icons will be migrated/fixed." % len(bibdoc_bibdoc)
wait_for_user()
print "Backing up database tables"
try:
if not backup_tables():
print wrap_text_in_a_box("""It appears that is not the first time that you run this script.
Backup tables have been already created by a previous run.
In order for the script to go further they need to be removed.""", style='important')
wait_for_user()
print "Backing up database tables (after dropping previous backup)",
backup_tables(drop=True)
print "-> OK"
else:
print "-> OK"
except Exception, e:
print wrap_text_in_a_box("Unexpected error while backing up tables. Please, do your checks: %s" % e, style='conclusion')
sys.exit(1)
to_fix_marc = intbitset()
print "Created a complete log file into %s" % logfilename
try:
try:
for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc:
try:
record_does_exist = True
recids = get_recid_from_docid(id_bibdoc1)
if not recids:
print "Skipping %s" % id_bibdoc1
continue
for recid in recids:
if record_exists(recid[0]) > 0:
to_fix_marc.add(recid[0])
else:
record_does_exist = False
if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
if record_does_exist:
raise StandardError("Error when correcting document ID %s" % id_bibdoc1)
body = """Please see the log file %s for what was the status prior to the error. Contact %s in case of problems, attaching the log.""" % (logfilename, CFG_SITE_SUPPORT_EMAIL),
style = 'conclusion')
sys.exit(1)
finally:
print "Scheduling FIX-MARC to synchronize MARCXML for updated records."
# First check that we do not conflict with an already
# existing bibdoc name
if file_action == "add" and \
((filename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs] and not file_rename) or \
file_rename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs]):
# A file with that name already exist. Cancel action
# and tell user.
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("A file named %s already exists. Please choose another name.") % \
(file_rename or filename)).replace('"', '\\"')
elif file_action == "revise" and \
file_rename != file_target and \
file_rename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs]:
# A file different from the one to revise already has
# the same bibdocname
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("A file named %s already exists. Please choose another name.") % \
file_rename).replace('"', '\\"')
elif file_action == "addFormat" and \
(extension in \
get_extensions_for_docname(file_target,
abstract_bibdocs)):
# A file with that extension already exists. Cancel
# action and tell user.
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("A file with format '%s' already exists. Please upload another format.") % \
extension).replace('"', '\\"')
elif '.' in file_rename or '/' in file_rename or "\\" in file_rename or \
not os.path.abspath(new_uploaded_filepath).startswith(os.path.join(working_dir, 'files', 'updated')):
# We forbid usage of a few characters, for the good of
# everybody...
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
_("You are not allowed to use dot '.', slash '/', or backslash '\\\\' in file names. Choose a different name and upload your file again. In particular, note that you should not include the extension in the renaming field.").replace('"', '\\"')
else:
# No conflict with file name
# When revising, delete previously uploaded files for
if file_action == "addFormat" and file_target != "":
# We have already checked above that this format does
# not already exist.
# Log
log_action(working_dir, file_action, file_target,
new_uploaded_filepath, file_rename,
file_description, file_comment,
file_target_doctype, keep_previous_files,
file_restriction)
elif file_action in ["add", "addFormat"]:
# No file found, but action involved adding file: ask user to
# select a file
body += """<script>
alert("You did not specify a file. Please choose one before uploading.");
</script>"""
elif file_action == "revise" and file_target != "":
# User has chosen to revise attributes of a file (comment,
# name, etc.) without revising the file itself.
if file_rename != file_target and \
file_rename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs]:
# A file different from the one to revise already has
# the same bibdocname
body += '<script>alert("%s");</script>' % \
(_("A file named %s already exists. Please choose another name.") % \
file_rename).replace('"', '\\"')
elif file_rename != file_target and \
('.' in file_rename or '/' in file_rename or "\\" in file_rename):
# We forbid usage of a few characters, for the good of
# everybody...
body += '<script>alert("%s");</script>' % \
_("You are not allowed to use dot '.', slash '/', or backslash '\\\\' in file names. Choose a different name and upload your file again. In particular, note that you should not include the extension in the renaming field.").replace('"', '\\"')
else:
# Log
log_action(working_dir, file_action, file_target,
"", file_rename,
file_description, file_comment,
file_target_doctype, keep_previous_files,
file_restriction)
elif file_action == "delete" and file_target != "" and \
((file_target_doctype in can_delete_doctypes) or \
'*' in can_delete_doctypes):
# Delete previously uploaded files for this entry
for path_to_delete in get_uploaded_files_for_docname(working_dir, file_target):
# We should print this only if we display for the first time
body = '<div id="uploadFileInterface">' + body + '</div>'
if include_headers:
body = get_upload_file_interface_javascript(form_url_params) + \
get_upload_file_interface_css() + \
body
# Display markup of the revision panel. This one is also
# printed only at the beginning, so that it does not need to
# be returned with each response
body += revise_balloon % \
{'CFG_SITE_URL': CFG_SITE_URL,
'file_label': file_label,
'filename_label': filename_label,
'description_label': description_label,
'comment_label': comment_label,
'restrictions': restrictions_list,
'previous_versions_help': _('You can decide to hide or not previous version(s) of this file.').replace("'", "\\'"),
'revise_format_help': _('When you revise a file, the additional formats that you might have previously uploaded are removed, since they no longer up-to-date with the new file.').replace("'", "\\'"),
'revise_format_warning': _('Alternative formats uploaded for current version of this file will be removed'),
return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
else:
return warning_page(_('Not enough information to retrieve the document'), req, ln)
else:
brd = BibRecDocs(recid)
if not name and docid:
## Let's obtain the name from the docid
try:
name = brd.get_docname(docid)
except InvenioBibDocFileError:
return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)
docformat = normalize_format(docformat)
redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
@param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case
@param sql_queries: a list of sql queries to be executed to select records to reformat.
@param cds_query: a search query to be executed to select records to reformat
@param process_format:
@param process:
@param recids: a list of record IDs to reformat
@return: None
"""
write_message("Processing format %s" % fmt)
t1 = os.times()[4]
start_date = datetime.now()
### Query the database
###
task_update_progress('Fetching records to process')
if process_format: # '-without' parameter
write_message("Querying database for records without cache...")
without_format = without_fmt(sql)
recIDs = intbitset(recids)
if cds_query['field'] != "" or \
cds_query['collection'] != "" or \
cds_query['pattern'] != "":
write_message("Querying database (CDS query)...")
if cds_query['collection'] == "":
# use search_pattern() whenever possible, as it can search
# even in private collections
res = search_pattern(p=cds_query['pattern'],
f=cds_query['field'],
m=cds_query['matching'])
else:
# use perform_request_search when '-c' argument has been
# defined, as it is not supported by search_pattern()
res = intbitset(perform_request_search(req=None, of='id',
#"""for all the bibdoc connected to the specified recid, set
#the text_extraction_date to the task_starting_time."""
#run_sql("UPDATE bibdoc JOIN bibrec_bibdoc ON id=id_bibdoc SET text_extraction_date=%s WHERE id_bibrec BETWEEN %s AND %s", (task_get_task_param('task_starting_time'), first_recid, last_recid))
## We send this exception to the admin only when is not
## already reparing the problem.
register_exception(prefix="Error when putting the term '%s' into db (hitlist=%s): %s\n" % (repr(word), set, e), alert_admin=(task_get_option('cmd') != 'repair'))
if not set: # never store empty words
run_sql("DELETE FROM %s WHERE term=%%s" % wash_table_column_name(tab_name), (word,)) # kwalitee: disable=sql
def display(self):
"Displays the word table."
keys = self.value.keys()
keys.sort()
for k in keys:
write_message("%s: %s" % (k, self.value[k]))
def count(self):
"Returns the number of words in the table."
return len(self.value)
def info(self):
"Prints some information on the words table."
write_message("The words table contains %d words." % self.count())
def lookup_words(self, word=""):
"Lookup word from the words table."
if not word:
done = 0
while not done:
try:
word = raw_input("Enter word: ")
done = 1
except (EOFError, KeyboardInterrupt):
return
if self.value.has_key(word):
write_message("The word '%s' is found %d times." \
% (word, len(self.value[word])))
else:
write_message("The word '%s' does not exist in the word file."\
% word)
def add_recIDs(self, recIDs, opt_flush):
"""Fetches records which id in the recIDs range list and adds
them to the wordTable. The recIDs range list is of the form:
term_rec - the number of records which contains this term
max_occ - max frequency of the term allowed
min_occ - min frequence of the term allowed
termlength - the minimum length of the terms allowed"""
try:
if is_stopword(term) or (len(term) <= termlength) or ((float(term_rec) / float(col_size)) >= max_occ) or ((float(term_rec) / float(col_size)) <= min_occ):
Check if RANK_METHOD_CODE method is valid for the collection given.
If colID is None, then check for existence regardless of collection.
"""
if colID is None:
return run_sql("SELECT COUNT(*) FROM rnkMETHOD WHERE name=%s", (rank_method_code,))[0][0]
enabled_colls = dict(run_sql("SELECT id_collection, score from collection_rnkMETHOD,rnkMETHOD WHERE id_rnkMETHOD=rnkMETHOD.id AND name=%s", (rank_method_code,)))
try:
colID = int(colID)
except TypeError:
return 0
if enabled_colls.has_key(colID):
return 1
else:
while colID:
colID = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID,))
if colID and enabled_colls.has_key(colID[0][0]):
return 1
elif colID:
colID = colID[0][0]
return 0
def get_bibrank_methods(colID, ln=CFG_SITE_LANG):
"""
Return a list of rank methods enabled for collection colID and the
name of them in the language defined by the ln parameter.
"""
if not globals().has_key('methods'):
create_rnkmethod_cache()
avail_methods = []
for (rank_method_code, options) in methods.iteritems():
if options.has_key("function") and is_method_valid(colID, rank_method_code):
"""rank_method_code, e.g. `jif' or `sbr' (word frequency vector model)
rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec'
hitset, search engine hits;
pattern, search engine query or record ID (you check the type)
verbose, verbose level
output:
list of records
list of rank values
prefix
postfix
verbose_output"""
voutput = ""
configcreated = ""
starttime = time.time()
afterfind = starttime - time.time()
aftermap = starttime - time.time()
try:
hitset = copy.deepcopy(hitset_global) #we are receiving a global hitset
if not globals().has_key('methods'):
create_rnkmethod_cache()
function = methods[rank_method_code]["function"]
#we get 'citation' method correctly here
func_object = globals().get(function)
if verbose > 0:
voutput += "function: %s <br/> " % function
voutput += "pattern: %s <br/>" % str(pattern)
if func_object and pattern and pattern[0][0:6] == "recid:" and function == "word_similarity":
result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods)
elif rank_method_code == "citation":
#we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit
p = ""
if pattern and pattern[0]:
p = pattern[0][6:]
result = find_citations(rank_method_code, p, hitset, verbose)
elif func_object:
if function == "word_similarity":
result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods)
elif function in ("word_similarity_solr", "word_similarity_xapian"):
if not rg:
rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
if not jrec:
jrec = 0
ranked_result_amount = rg + jrec
if verbose > 0:
voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount
if verbose > 0:
voutput += "field: %s<br/>" % field
if function == "word_similarity_solr":
if verbose > 0:
voutput += "In Solr part:<br/>"
result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
if function == "word_similarity_xapian":
if verbose > 0:
voutput += "In Xapian part:<br/>"
result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
else:
result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
else:
result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
except Exception, e:
register_exception()
result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)
afterfind = time.time() - starttime
if result[0] and result[1]: #split into two lists for search_engine
write_message("......Processing records #%s-%s" % (recids, recide))
recs = run_sql("SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (tag, recids, recide))
valid = intbitset(trailing_bits=1)
valid.discard(0)
for key in tags:
newset = intbitset()
newset += [recid[0] for recid in (run_sql("SELECT id_bibrec FROM bib%sx, bibrec_bib%sx WHERE id_bibxxx=id AND tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (key, recids, recide)))]
valid.intersection_update(newset)
if tags:
recs = filter(lambda x: x[0] in valid, recs)
records = records + list(recs)
write_message("Number of records found with the necessary tags: %s" % len(records))
records = filter(lambda x: x[0] in options["validset"], records)
rnkset = {}
for key, value in records:
if kb_data.has_key(value):
if not rnkset.has_key(key):
rnkset[key] = float(kb_data[value])
else:
if kb_data.has_key(rnkset[key]) and float(kb_data[value]) > float((rnkset[key])[1]):
rnkset[key] = float(kb_data[value])
else:
rnkset[key] = 0
write_message("Number of records available in rank method: %s" % len(rnkset))
return rnkset
def get_lastupdated(rank_method_code):
"""Get the last time the rank method was updated"""
res = run_sql("SELECT rnkMETHOD.last_updated FROM rnkMETHOD WHERE name=%s", (rank_method_code, ))
if res:
return res[0][0]
else:
raise Exception("Is this the first run? Please do a complete update.")
def intoDB(dict, date, rank_method_code):
"""Insert the rank method data into the database"""
mid = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
del_rank_method_codeDATA(rank_method_code)
serdata = serialize_via_marshal(dict);
midstr = str(mid[0][0]);
run_sql("INSERT INTO rnkMETHODDATA(id_rnkMETHOD, relevance_data) VALUES (%s,%s)", (midstr, serdata,))
if date:
run_sql("UPDATE rnkMETHOD SET last_updated=%s WHERE name=%s", (date, rank_method_code))
def fromDB(rank_method_code):
"""Get the data for a rank method"""
id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
if res:
return deserialize_via_marshal(res[0][0])
else:
return {}
def del_rank_method_codeDATA(rank_method_code):
"""Delete the data for a rank method"""
id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
run_sql("DELETE FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
def del_recids(rank_method_code, range_rec):
"""Delete some records from the rank method"""
id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
write_message("No records specified, updating all", verbose=2)
min_id = run_sql("SELECT min(id) from bibrec")[0][0]
max_id = run_sql("SELECT max(id) from bibrec")[0][0]
options["recid_range"] = [[min_id, max_id]]
if task_get_option("quick") == "no":
write_message("Recalculate parameter not used, parameter ignored.", verbose=9)
if task_get_option("cmd") == "del":
del_recids(cfg_short, options["recid_range"])
elif task_get_option("cmd") == "add":
func_object = globals().get(cfg_function)
func_object(rank_method_code, cfg_name, config)
elif task_get_option("cmd") == "stat":
rank_method_code_statistics(rank_method_code)
elif task_get_option("cmd") == "check":
check_method(rank_method_code)
elif task_get_option("cmd") == "print-missing":
func_object = globals().get(cfg_function)
func_object(rank_method_code, cfg_name, config)
elif task_get_option("cmd") == "repair":
func_object = globals().get(cfg_repair_function)
func_object()
else:
write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
raise StandardError
except StandardError, e:
write_message("\nException caught: %s" % e, sys.stderr)
write_message(traceback.format_exc()[:-1])
register_exception()
raise StandardError
if task_get_option("verbose"):
showtime((time.time() - startCreate))
return 1
def get_valid_range(rank_method_code):
"""Return a range of records"""
write_message("Getting records from collections enabled for rank method.", verbose=9)
res = run_sql("SELECT collection.name FROM collection, collection_rnkMETHOD, rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s", (rank_method_code, ))
#By doing this like below, characters standing alone, like c a b is not added to the inedx, but when they are together with characters like c++ or c$ they are added.
for word in split(phrase):
if options["remove_stopword"] == "True" and not is_stopword(word) and check_term(word, 0):
if lang and lang !="none" and options["use_stemming"]:
word = stem(word, lang)
if not words.has_key(word):
words[word] = (0, 0)
else:
if not words.has_key(word):
words[word] = (0, 0)
words[word] = (words[word][0] + weight, 0)
elif options["remove_stopword"] == "True" and not is_stopword(word):
write_message("Exception caught: %s" % e, sys.stderr)
sys.exit(1)
wordTable.report_on_table_consistency()
# We are done. State it in the database, close and quit
return 1
def get_tags(config):
"""Get the tags that should be used creating the index and each tag's parameter"""
tags = []
function = config.get("rank_method","function")
i = 1
shown_error = 0
#try:
if 1:
while config.has_option(function,"tag%s"% i):
tag = config.get(function, "tag%s" % i)
tag = tag.split(",")
tag[1] = int(tag[1].strip())
tag[2] = tag[2].strip()
#check if stemmer for language is available
if config.get(function, "stemming") and stem("information", "en") != "inform":
if shown_error == 0:
write_message("Warning: Stemming not working. Please check it out!")
shown_error = 1
elif tag[2] and tag[2] != "none" and config.get(function,"stemming") and not is_stemmer_available_for_language(tag[2]):
write_message("Warning: Stemming not available for language '%s'." % tag[2])
tags.append(tag)
i += 1
#except Exception:
# write_message("Could not read data from configuration file, please check for errors")
# raise StandardError
return tags
def get_valid_range(rank_method_code):
"""Returns which records are valid for this rank method, according to which collections it is enabled for."""
#if options["verbose"] >=9:
# write_message("Getting records from collections enabled for rank method.")
#res = run_sql("SELECT collection.name FROM collection,collection_rnkMETHOD,rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name='%s'" % rank_method_code)
#l_of_colls = []
#for coll in res:
# l_of_colls.append(coll[0])
#if len(l_of_colls) > 0:
# recIDs = perform_request_search(c=l_of_colls)
#else:
# recIDs = []
valid = intbitset(trailing_bits=1)
valid.discard(0)
#valid.addlist(recIDs)
return valid
def check_term(term, termlength):
"""Check if term contains not allowed characters, or for any other reasons for not using this term."""
write_message("%s/%s---%s---%s" % (terms[i][0],terms[i][1], Gi[i][0],Gi[len(Gi) - i - 1][0]))
def update_rnkWORD(table, terms):
"""Updates rnkWORDF and rnkWORDR with Gi and Nj values. For each term in rnkWORDF, a Gi value for the term is added. And for each term in each document, the Nj value for that document is added. In rnkWORDR, the Gi value for each term in each document is added. For description on how things are computed, look in the hacking docs.
table - name of forward index to update
terms - modified terms"""
stime = time.time()
Gi = {}
Nj = {}
N = run_sql("select count(id_bibrec) from %sR" % table[:-1])[0][0]
if len(terms) == 0 and task_get_option("quick") == "yes":
write_message("No terms to process, ending...")
return ""
elif task_get_option("quick") == "yes": #not used -R option, fast calculation (not accurate)
write_message("Beginning post-processing of %s terms" % len(terms))
#Locating all documents related to the modified/new/deleted terms, if fast update,
#only take into account new/modified occurences
write_message("Phase 1: Finding records containing modified terms")
terms = terms.keys()
i = 0
while i < len(terms):
terms_docs = get_from_forward_index(terms, i, (i+5000), table)
for (t, hitlist) in terms_docs:
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
del term_docs["Gi"]
for (j, tf) in term_docs.iteritems():
if (task_get_option("quick") == "yes" and tf[1] == 0) or task_get_option("quick") == "no":
Nj[j] = 0
write_message("Phase 1: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
i += 5000
write_message("Phase 1: Finished finding records containing modified terms")
#Find all terms in the records found in last phase
write_message("Phase 2: Finding all terms in affected records")
records = Nj.keys()
i = 0
while i < len(records):
docs_terms = get_from_reverse_index(records, i, (i + 5000), table)
for (j, termlist) in docs_terms:
doc_terms = deserialize_via_marshal(termlist)
for (t, tf) in doc_terms.iteritems():
Gi[t] = 0
write_message("Phase 2: ......processed %s/%s records " % ((i+5000>len(records) and len(records) or (i+5000)), len(records)))
i += 5000
write_message("Phase 2: Finished finding all terms in affected records")
else: #recalculate
max_id = run_sql("SELECT MAX(id) FROM %s" % table)
max_id = max_id[0][0]
write_message("Beginning recalculation of %s terms" % max_id)
progress = run_sql("SELECT progress FROM schTASK WHERE id=%s", (_TASK_PARAMS['task_id'], ))
if progress:
progress = progress[0][0]
else:
progress = ''
g = re.match(r'Postponed (\d+) time\(s\)', progress)
if g:
postponed_times = int(g.group(1))
else:
postponed_times = 0
if _TASK_PARAMS['sequence-id']:
## Also postponing other dependent tasks.
run_sql("UPDATE schTASK SET runtime=%s, progress=%s WHERE sequenceid=%s AND status='WAITING'", (new_runtime, 'Postponed as task %s' % _TASK_PARAMS['task_id'], _TASK_PARAMS['sequence-id'])) # kwalitee: disable=sql
sys.stderr.write(" --profile=STATS\tPrint profile information. STATS is a comma-separated\n\t\t\tlist of desired output stats (calls, cumulative,\n\t\t\tfile, line, module, name, nfl, pcalls, stdname, time).\n")
sys.stderr.write(" --stop-on-error\tIn case of unrecoverable error stop the bibsched queue.\n")
sys.stderr.write(" --continue-on-error\tIn case of unrecoverable error don't stop the bibsched queue.\n")
sys.stderr.write(" --post-process=BIB_TASKLET_NAME[parameters]\tPostprocesses the specified\n\t\t\tbibtasklet with the given parameters between square\n\t\t\tbrackets.\n")
sys.stderr.write(" --email-logs-to=EMAILS Sends an email with the results of the execution\n\t\t\tof the task, and attached the logs (EMAILS could be a comma-\n\t\t\tseparated lists of email addresses)\n")
if description:
sys.stderr.write(description)
sys.exit(exitcode)
def _task_sig_sleep(sig, frame):
"""Signal handler for the 'sleep' signal sent by BibSched."""
signal.signal(signal.SIGTSTP, signal.SIG_IGN)
write_message("task_sig_sleep(), got signal %s frame %s"
% (sig, frame), verbose=9)
write_message("sleeping as soon as possible...")
_db_login(relogin=1)
task_update_status("ABOUT TO SLEEP")
def _task_sig_stop(sig, frame):
"""Signal handler for the 'stop' signal sent by BibSched."""
write_message("task_sig_stop(), got signal %s frame %s"
% (sig, frame), verbose=9)
write_message("stopping as soon as possible...")
_db_login(relogin=1) # To avoid concurrency with an interrupted run_sql call
task_update_status("ABOUT TO STOP")
def _task_sig_suicide(sig, frame):
"""Signal handler for the 'suicide' signal sent by BibSched."""
write_message("task_sig_suicide(), got signal %s frame %s"
% (sig, frame), verbose=9)
write_message("suiciding myself now...")
task_update_status("SUICIDING")
write_message("suicided")
_db_login(relogin=1)
task_update_status("SUICIDED")
sys.exit(1)
def _task_sig_dumb(sig, frame):
"""Dumb signal handler."""
pass
_RE_PSLINE = re.compile('^\s*(\w+)\s+(\w+)')
def guess_apache_process_user_from_ps():
"""Guess Apache process user by parsing the list of running processes."""
apache_users = []
try:
# Tested on Linux, Sun and MacOS X
for line in os.popen('ps -A -o user,comm').readlines():
g = _RE_PSLINE.match(line)
if g:
username = g.group(1)
process = os.path.basename(g.group(2))
if process in ('apache', 'apache2', 'httpd') :
if username not in apache_users and username != 'root':
apache_users.append(username)
except Exception, e:
print >> sys.stderr, "WARNING: %s" % e
return tuple(apache_users)
def guess_apache_process_user():
"""
Return the possible name of the user running the Apache server process.
(Look at running OS processes or look at OS users defined in /etc/passwd.)
for other_task_id, other_proc, dummy_other_runtime, other_status, \
other_priority, other_host, other_sequenceid in chain(
self.node_relevant_sleeping_tasks,
self.active_tasks_all_nodes):
if task_id != other_task_id and \
not self.is_task_safe_to_execute(proc, other_proc):
### !!! WE NEED TO CHECK FOR TASKS THAT CAN ONLY BE EXECUTED ON ONE MACHINE AT ONE TIME
### !!! FOR EXAMPLE BIBUPLOADS WHICH NEED TO BE EXECUTED SEQUENTIALLY AND NEVER CONCURRENTLY
## There's at least a higher priority task running that
## cannot run at the same time of the given task.
## We give up
if debug:
Log("Cannot run because task_id: %s, proc: %s is in the queue and incompatible" % (other_task_id, other_proc))
return False
if sequenceid:
## Let's normalize the prority of all tasks in a sequenceid to the
## max priority of the group
max_priority = run_sql("""SELECT MAX(priority) FROM schTASK
WHERE status='WAITING'
AND sequenceid=%s""",
(sequenceid, ))[0][0]
if run_sql("""UPDATE schTASK SET priority=%s
WHERE status='WAITING' AND sequenceid=%s""",
(max_priority, sequenceid)):
Log("Raised all waiting tasks with sequenceid "
"%s to the max priority %s" % (sequenceid, max_priority))
## Some priorities where raised
return True
## Let's normalize the runtime of all tasks in a sequenceid to
## the compatible runtime.
current_runtimes = run_sql("""SELECT id, runtime FROM schTASK WHERE sequenceid=%s AND status='WAITING' ORDER by id""", (sequenceid, ))
runtimes_adjusted = False
if current_runtimes:
last_runtime = current_runtimes[0][1]
for the_task_id, runtime in current_runtimes:
if runtime < last_runtime:
run_sql("""UPDATE schTASK SET runtime=%s WHERE id=%s""", (last_runtime, the_task_id))
if debug:
Log("Adjusted runtime of task_id %s to %s in order to be executed in the correct sequenceid order" % (the_task_id, last_runtime))
runtimes_adjusted = True
runtime = last_runtime
last_runtime = runtime
if runtimes_adjusted:
## Some runtime have been adjusted
return True
if sequenceid is not None:
for other_task_id, dummy_other_proc, dummy_other_runtime, dummy_other_status, dummy_other_priority, dummy_other_host, other_sequenceid in self.active_tasks_all_nodes:
if sequenceid == other_sequenceid and task_id > other_task_id:
Log('Task %s need to run after task %s since they have the same sequence id: %s' % (task_id, other_task_id, sequenceid))
## If there is a task with same sequence number then do not run the current task
return False
if proc in CFG_BIBTASK_MONOTASKS and higher:
## This is a monotask
if debug:
Log("Cannot run because this is a monotask and there are higher priority tasks: %s" % (higher, ))
return False
## No higher priority task have issue with the given task.
if proc not in CFG_BIBTASK_FIXEDTIMETASKS and len(higher) >= CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS:
if debug:
Log("Cannot run because all resources (%s) are used (%s), higher: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(higher), higher))
return False
## Check for monotasks wanting to run
for other_task_id, other_proc, dummy_other_runtime, other_status, other_priority, other_host, other_sequenceid in self.mono_tasks_all_nodes:
if priority < other_priority:
if debug:
Log("Cannot run because there is a monotask with higher priority: %s %s" % (other_task_id, other_proc))
return False
## We check if it is necessary to stop/put to sleep some lower priority
## Only tasks with priority higher than 100 have the power
## to put task to stop.
if debug:
Log("Cannot run because there are task to stop: %s and priority < 100" % tasks_to_stop)
return False
procname = proc.split(':')[0]
if not tasks_to_stop and (not tasks_to_sleep or (proc not in CFG_BIBTASK_MONOTASKS and len(self.node_relevant_active_tasks) < CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS)):
if proc in CFG_BIBTASK_MONOTASKS and self.active_tasks_all_nodes:
if debug:
Log("Cannot run because this is a monotask and there are other tasks running: %s" % (self.node_relevant_active_tasks, ))
if len(node_active_tasks) >= CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS:
if debug:
Log("Cannot run because all resources (%s) are used (%s), active: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(node_active_tasks), node_active_tasks))
return False
if status in ("SLEEPING", "ABOUT TO SLEEP"):
if host == self.hostname:
## We can only wake up tasks that are running on our own host
for other_task_id, other_proc, dummy_other_runtime, other_status, dummy_other_priority, other_host, dummy_other_sequenceid in self.node_relevant_active_tasks:
## But only if there are not other tasks still going to sleep, otherwise
## we might end up stealing the slot for an higher priority task.
if other_task_id != task_id and other_status in ('ABOUT TO SLEEP', 'ABOUT TO STOP') and other_host == self.hostname:
if debug:
Log("Not yet waking up task #%d since there are other tasks (%s #%d) going to sleep (higher priority task incoming?)" % (task_id, other_proc, other_task_id))
## Let's set a reasonable timeout for URL request (e.g. FFT)
socket.setdefaulttimeout(40)
def parse_identifier(identifier):
"""Parse the identifier and determine if it is temporary or fixed"""
id_str = str(identifier)
if not id_str.startswith("TMP:"):
return (False, identifier)
else:
return (True, id_str[4:])
def resolve_identifier(tmps, identifier):
"""Resolves an identifier. If the identifier is not temporary, this
function is an identity on the second argument. Otherwise, a resolved
value is returned or an exception raised"""
is_tmp, tmp_id = parse_identifier(identifier)
if is_tmp:
if not tmp_id in tmps:
raise StandardError("Temporary identifier %s not present in the dictionary" % (tmp_id, ))
if tmps[tmp_id] == -1:
# the identifier has been signalised but never assigned a value - probably error during processing
raise StandardError("Temporary identifier %s has been declared, but never assigned a value. Probably an error during processign of an appropriate FFT has happened. Please see the log" % (tmp_id, ))
write_message('Found recid %s for extoaiid="%s" with provenance="%s"' % (id_bibrec, extoaiid, extoaisrc), verbose=9)
ret.add(id_bibrec)
break
if this_extoaisrc is None:
write_message('WARNING: Found recid %s for extoaiid="%s" that doesn\'t specify any provenance, while input record does.' % (id_bibrec, extoaiid), stream=sys.stderr)
if extoaisrc is None:
write_message('WARNING: Found recid %s for extoaiid="%s" that specify a provenance (%s), while input record does not have a provenance.' % (id_bibrec, extoaiid, this_extoaisrc), stream=sys.stderr)
return ret
def find_record_from_oaiid(oaiid):
"""
Try to find record in the database from the OAI ID number and OAI SRC.
Return record ID if found, None otherwise.
"""
bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x'
bibrec_bibxxx = 'bibrec_' + bibxxx
res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
%(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
(CFG_OAI_ID_FIELD, oaiid,))
if res:
return res[0][0]
else:
return None
def find_record_from_doi(doi):
"""
Try to find record in the database from the given DOI.
Return record ID if found, None otherwise.
"""
bibxxx = 'bib02x'
bibrec_bibxxx = 'bibrec_' + bibxxx
res = run_sql("""SELECT bb.id_bibrec, bb.field_number
FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b
WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
('0247_a', doi,))
# For each of the result, make sure that it is really tagged as doi
for (id_bibrec, field_number) in res:
res = run_sql("""SELECT bb.id_bibrec
FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b
WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id and bb.field_number=%%s and bb.id_bibrec=%%s""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
('0247_2', "doi", field_number, id_bibrec))
if res and res[0][0] == id_bibrec:
return res[0][0]
return None
def extract_tag_from_record(record, tag_number):
""" Extract the tag_number for record."""
# first step verify if the record is not already in the database
write_message('Just updating description and comment for %s with format %s with description %s, comment %s and flags %s' % (docname, docformat, description, comment, flags), verbose=9)
try:
if not pretend:
bibdoc.set_description(description, docformat)
bibdoc.set_comment(comment, docformat)
for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
if flag in flags:
bibdoc.set_flag(flag, docformat)
else:
bibdoc.unset_flag(flag, docformat)
except StandardError, e:
write_message("('%s', '%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, docformat, description, comment, flags, e))
raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name)
if newname2 != newname:
raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name)
if restriction2 != restriction:
raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name)
if version2 != version:
raise StandardError, "fft '%s' specifies a different version than the previous fft with docname '%s'" % (str(fft), name)
for (dummyurl2, format2, dummydescription2, dummycomment2, dummyflags2, dummytimestamp2) in urls:
if docformat == format2:
raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, docformat, name)
write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr)
else:
# For each tag there is a list of tuples representing datafields
for single_tuple in record[tag]:
# We retrieve the information of the tag
subfield_list = single_tuple[0]
ind1 = single_tuple[1]
ind2 = single_tuple[2]
if '%s%s%s' % (tag, ind1 == ' ' and '_' or ind1, ind2 == ' ' and '_' or ind2) in (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[:5]):
## We don't want to append the external identifier
## if it is already existing.
if record_find_field(rec_old, tag, single_tuple)[0] is not None:
query = """UPDATE oaiHARVESTLOG SET date_inserted=NOW(), inserted_to_db=%s, id_bibrec=%s WHERE oai_id = %s AND bibupload_task_id = %s ORDER BY date_harvested LIMIT 1"""
# Admin want BibSched to stop, or the task is not set to
# run at a later date: we must stop the queue.
write_message("An error occurred. Task is configured to stop")
return False
else:
# An error happened, but it can be recovered at next run
# (task is re-scheduled) and admin set BibSched to
# continue even after failure.
write_message("An error occurred, but task is configured to continue")
if CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN:
try:
raise InvenioOAIHarvestWarning("OAIHarvest (task #%s) failed at fully harvesting source(s) %s. BibSched has NOT been stopped, and OAIHarvest will try to recover at next run" % (task_get_task_param("task_id"), ", ".join([repo[0][6] for repo in reposlist]),))
text += '</td><td colspan="3"><small><small><em>Optional: if you leave it blank it will be automatically set to "%s", with the implicit convention that any record belonging to it can be harvested by not specifying any set.</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#set" target="_blank">?</a>]</small></small>' % CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
text += "</td></tr><tr><td>"
text += input_text(title = "OAI Set name:",
name = "oai_set_name", value = oai_set_name)
text += '</td><td colspan="3"><small><small><em>Optional: leave blank if not needed</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Set" target="_blank">?</a>]</small></small>'
text += "</td></tr><tr><td> </td></tr><tr><td>"
text += '</td></tr><tr><td colspan="4">Choose below the search query that defines the records that belong to this set:</td></tr><tr><td>'
text += "</td></tr><tr><td> </td></tr><tr><td>"
# text += input_text(title = "OAI Set description", name = "oai_set_description", value = oai_set_description)
#text += "</td><td colspan=2>"
#menu = create_drop_down_menu_content("SELECT distinct(name) from collection")
from invenio.oai_repository_config import CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
CFG_VERBS = {
'GetRecord' : ['identifier', 'metadataPrefix'],
'Identify' : [],
'ListIdentifiers' : ['from', 'until',
'metadataPrefix',
'set',
'resumptionToken'],
'ListMetadataFormats': ['identifier'],
'ListRecords' : ['from', 'until',
'metadataPrefix',
'set',
'resumptionToken'],
'ListSets' : ['resumptionToken']
}
CFG_ERRORS = {
"badArgument": "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax:",
"badResumptionToken": "The value of the resumptionToken argument is invalid or expired:",
"badVerb": "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated:",
"cannotDisseminateFormat": "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository:",
"idDoesNotExist": "The value of the identifier argument is unknown or illegal in this repository:",
"noRecordsMatch": "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list:",
"noMetadataFormats": "There are no metadata formats available for the specified item:",
"noSetHierarchy": "The repository does not support sets:"
Gets list of field 'field' for the record with 'recid' system number.
"""
digit = field[0:2]
bibbx = "bib%sx" % digit
bibx = "bibrec_bib%sx" % digit
query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx))
return [row[0] for row in run_sql(query, (recid, field))]
"""Returns the first database BIB ID for the OAI identifier 'identifier', if it exists."""
recid = None
if identifier:
query = "SELECT DISTINCT(bb.id_bibrec) FROM bib%sx AS bx, bibrec_bib%sx AS bb WHERE bx.tag=%%s AND bb.id_bibxxx=bx.id AND bx.value=%%s" % (CFG_OAI_ID_FIELD[0:2], CFG_OAI_ID_FIELD[0:2])
res = run_sql(query, (CFG_OAI_ID_FIELD, identifier))
for row in res:
recid = row[0]
return recid
def get_set_last_update(set_spec=""):
"""
Returns the last_update of a given set (or of all sets) in UTC
"""
if set_spec:
last_update = run_sql("SELECT DATE_FORMAT(MAX(last_updated),'%%Y-%%m-%%d %%H:%%i:%%s') FROM oaiREPOSITORY WHERE setSpec=%s", (set_spec, ))[0][0]
else:
last_update = run_sql("SELECT DATE_FORMAT(MAX(last_updated),'%Y-%m-%d %H:%i:%s') FROM oaiREPOSITORY")[0][0]
## check availability (OAI requests for Identify, ListSets and
## ListMetadataFormats are served immediately, otherwise we
## shall wait for CFG_OAI_SLEEP seconds between requests):
if os.path.exists("%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in ["Identify", "ListMetadataFormats", "ListSets"] and not argd.get('resumptionToken')):
Provides cache for tokenizer information for fields corresponding to indexes.
This class is not to be used directly; use function
get_field_tokenizer_type() instead.
"""
def __init__(self):
def cache_filler():
try:
res = run_sql("""SELECT fld.code, ind.tokenizer FROM idxINDEX AS ind, field AS fld, idxINDEX_field AS indfld WHERE ind.id = indfld.id_idxINDEX AND indfld.id_field = fld.id""")
colls_out_for_display = colls # nope, we need to display all 'colls' successively
# remove duplicates:
#colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
# combine the current hitset with resulting hitset using the current operator
if current_operator == '+':
result_hitset = result_hitset & current_hitset
elif current_operator == '-':
result_hitset = result_hitset - current_hitset
elif current_operator == '|':
result_hitset = result_hitset | current_hitset
else:
assert False, "Unknown operator in search_pattern_parenthesised()"
return result_hitset
# If searching with parenteses fails, perform search ignoring parentheses
except SyntaxError:
write_warning(_("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again."), req=req)
# remove the parentheses in the query. Current implementation removes all the parentheses,
# but it could be improved to romove only these that are not inside quotes
index_min = max(len(recIDs) - irec_max, 0) #just to be sure that the min index is not negative
## check arguments:
if not sort_field:
return recIDs[index_min:]
if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
if of.startswith('h'):
write_warning(_("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning", req=req)
write_warning("Hosted collections (perform_search_request): there were no hosted collections to be searched", req=req)
## let's define some useful boolean variables:
# True means there are actual or potential hosted collections results to be printed
kwargs['hosted_colls_actual_or_potential_results_p'] = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts))
# True means there are hosted collections timeouts to take care of later
# (useful for more accurate printing of results later)
kwargs['hosted_colls_potential_results_p'] = not (not hosted_colls or not hosted_colls_timeouts)
# True means we only have hosted collections to deal with
kwargs['only_hosted_colls_actual_or_potential_results_p'] = not colls_to_search and hosted_colls_actual_or_potential_results_p
"""Update the last time this alert was ran in the database."""
return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],))
def get_alert_queries(frequency):
"""Return all the queries for the given frequency."""
return run_sql('select distinct id, urlargs from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.frequency=%s and uqb.date_lastrun <= now();', (frequency,))
def get_alert_queries_for_user(uid):
"""Returns all the queries for the given user id."""
return run_sql('select distinct id, urlargs, uqb.frequency from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.id_user=%s and uqb.date_lastrun <= now();', (uid,))
def get_alerts(query, frequency):
"""Returns a dictionary of all the records found for a specific query and frequency along with other informationm"""
r = run_sql('select id_user, id_query, id_basket, frequency, date_lastrun, alert_name, notification, alert_desc, alert_recipient from user_query_basket where id_query=%s and frequency=%s;', (query['id_query'], frequency,))
if index > CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
index = CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
if nrec > 0:
nrec_to_add = nrec < index and nrec or index
if CFG_WEBALERT_DEBUG_LEVEL > 0:
print "-> adding %i records into basket %s: %s" % (nrec_to_add, basket_id, filtered_records[0][:nrec_to_add])
if nrec > nrec_to_add:
print "-> not added %i records into basket %s: %s due to maximum limit restrictions." % (nrec - nrec_to_add, basket_id, filtered_records[0][nrec_to_add:])
if CFG_WEBALERT_DEBUG_LEVEL > 2 and filtered_out_recids:
print "-> these records have been filtered out, as user id %s did not have access:\n%s" % \
(owner_uid, repr(filtered_out_recids))
if index < CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
for external_collection_results in filtered_records[1][0]:
nrec = len(external_collection_results[1][0])
# index_tmp: the number of maximum allowed records to be added to
# the basket for the next collection.
index_tmp = CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL - index
index += nrec
if index > CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
index = CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
if nrec > 0 and index_tmp > 0:
nrec_to_add = nrec < index_tmp and nrec or index_tmp
if CFG_WEBALERT_DEBUG_LEVEL > 0:
print "-> adding %s external records (collection \"%s\") into basket %s: %s" % (nrec_to_add, external_collection_results[0], basket_id, external_collection_results[1][0][:nrec_to_add])
if nrec > nrec_to_add:
print "-> not added %s external records (collection \"%s\") into basket %s: %s due to maximum limit restriction" % (nrec - nrec_to_add, external_collection_results[0], basket_id, external_collection_results[1][0][nrec_to_add:])
print "-> not added %s external records (collection \"%s\") into basket %s: %s due to maximum limit restriction" % (nrec, external_collection_results[0], basket_id, external_collection_results[1][0])
elif CFG_WEBALERT_DEBUG_LEVEL > 0:
for external_collection_results in filtered_records[1][0]:
nrec = len(external_collection_results[1][0])
if nrec > 0:
print "-> not added %i external records (collection \"%s\") into basket %s: %s due to maximum limit restrictions" % (nrec, external_collection_results[0], basket_id, external_collection_results[1][0])
def get_query(alert_id):
"""Returns the query for that corresponds to this alert id."""
r = run_sql('select urlargs from query where id=%s', (alert_id,))
return r[0][0]
def email_notify(alert, records, argstr):
"""Send the notification e-mail for a specific alert."""
if CFG_WEBALERT_DEBUG_LEVEL > 2:
print "+" * 80 + '\n'
uid = alert[0]
user_info = collect_user_info(uid)
frequency = alert[3]
alert_name = alert[5]
alert_description = alert[7]
alert_recipient_email = alert[8] # set only by admin. Bypasses access-right checks.
filtered_out_recids = [] # only set in debug mode
if not alert_recipient_email:
# Filter out records that user (who setup the alert) should
# not see. This does not apply to external records (hosted
""" account_list_searches: list the searches of the user
input: the user id
output: resume of the searches"""
out = ""
# first detect number of queries:
nb_queries_total = 0
res = run_sql("SELECT COUNT(*) FROM user_query WHERE id_user=%s", (uid,), 1)
try:
nb_queries_total = res[0][0]
except:
pass
# load the right language
_ = gettext_set_language(ln)
out += _("You have made %(x_nb)s queries. A %(x_url_open)sdetailed list%(x_url_close)s is available with a possibility to (a) view search results and (b) subscribe to an automatic email alerting service for these queries.") % {'x_nb': nb_queries_total, 'x_url_open': '<a href="../youralerts/display?ln=%s">' % ln, 'x_url_close': '</a>'}
out = _("You have %(x_nb_perso)s personal baskets and are subscribed to %(x_nb_group)s group baskets and %(x_nb_public)s other users public baskets.") %\
{'x_nb_perso': personal_text,
'x_nb_group': group_text,
'x_nb_public': external_text}
return out
def page_start(req, of='xm'):
"""Set the content type and send the headers for the page."""
DATE_FORMAT(max(c.date_creation), '%%Y-%%m-%%d %%H:%%i:%%S') as date_last_comment,
count(distinct c.id_user) as users,
count(*) as count
FROM cmtRECORDCOMMENT c
%s
GROUP BY c.id_bibrec
ORDER BY count(*) DESC
LIMIT %s
"""
where_clause = "WHERE " + (comments and 'c.star_score=0' or 'c.star_score>0') + ' AND c.status="ok" AND c.nb_abuse_reports < %s' % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
select_fields = not comments and 'c.star_score, ' or ''
where_clause = "WHERE " + (comments and 'c.star_score=0' or 'c.star_score>0') + ' AND c.status="ok" AND c.nb_abuse_reports < %s' % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
res = run_sql(query % (select_fields, where_clause, top))
return '<span style="color:#f00">Configuration could not be read. Please check that %s/webjournal/%s/%s-config.xml exists and can be read by the server.</span><br/>' % (CFG_ETCDIR, journal_name, journal_name)
Go to the <a href="%(CFG_SITE_URL)s/journal/%(name)s">%(name)s journal</a> to
see the result.</span>''' % {'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD,
'name': journal_name,
'recid': recid}
elif result == 1:
msg = '''<span style="color:#f00"><a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recid)s">record %(recid)s</a> is already featured. Choose another one or remove it first.</span>''' % \
{'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD,
'recid': recid}
else:
msg = '''<span style="color:#f00">Record could not be featured. Check file permission.</span>'''
<legend>Remove featured record</legend><span style="color:#f00">Are you sure you want to remove <a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recid)s">record %(recid)s</a> from the list of featured record?
Add a new journal or configure the settings of an existing journal.
Parameters:
journal_name - the journal to configure, or name of the new journal
xml_config - the xml configuration of the journal (string)
action - One of ['edit', 'editDone', 'add', 'addDone']
ln - language
"""
msg = None
if action == 'edit':
# Read existing config
if journal_name is not None:
if not can_read_xml_config(journal_name):
return '<span style="color:#f00">Configuration could not be read. Please check that %s/webjournal/%s/%s-config.xml exists and can be read by the server.</span><br/>' % (CFG_ETCDIR, journal_name, journal_name)
return '<span style="color:#f00">You must specify a journal name</span>'
if action in ['editDone', 'addDone']:
# Save config
if action == 'addDone':
res = add_journal(journal_name, xml_config)
if res == -1:
msg = '<span style="color:#f00">A journal with that name already exists. Please choose another name.</span>'
action = 'add'
elif res == -2:
msg = '<span style="color:#f00">Configuration could not be written (no permission). Please manually copy your config to %s/webjournal/%s/%s-config.xml</span><br/>' % (CFG_ETCDIR, journal_name, journal_name)
action = 'edit'
elif res == -4:
msg = '<span style="color:#f00">Cache file could not be written (no permission). Please manually create directory %s/webjournal/%s/ and make it writable for your Apache user</span><br/>' % (CFG_CACHEDIR, journal_name)
text += """<option value="%s" %s>%s</option>""" % (id, (func in ["0", 0] and confirm in ["0", 0] and int(rnkID) == int(id)) and 'selected="selected"' or '' , name)
if confirm in ["1", 1] and func in ["0", 0] and int(rnkID) != -1:
output += write_outcome(finresult)
elif confirm not in ["0", 0] and func in ["0", 0]:
output += """<b><span class="info">Please select a rank method.</span></b>"""
coll_list = get_col_rnk(colID, ln)
if coll_list:
text = """
<span class="adminlabel">Disable:</span>
<select name="rnkID" class="admin_w200">
<option value="-1">- select rank method-</option>
"""
for (id, name) in coll_list:
text += """<option value="%s" %s>%s</option>""" % (id, (func in ["1", 1] and confirm in ["0", 0] and int(rnkID) == int(id)) and 'selected="selected"' or '' , name)
if confirm not in [-1, "-1"] and not (add_son and add_dad and rtype):
output2 += """<b><span class="info">All fields must be filled.</span></b><br /><br />
"""
elif add_son and add_dad and rtype:
add_son = int(add_son)
add_dad = int(add_dad)
if confirm not in [-1, "-1"]:
if add_son == add_dad:
output2 += """<b><span class="info">Cannot add a collection as a pointer to itself.</span></b><br /><br />
"""
elif check_col(add_dad, add_son):
res = add_col_dad_son(add_dad, add_son, rtype)
output2 += write_outcome(res)
if res[0] == 1:
output2 += """<b><span class="info"><br /> The collection will appear on your website after the next webcoll run. You can either run it manually or wait until bibsched does it for you.</span></b><br /><br />
"""
else:
output2 += """<b><span class="info">Cannot add the collection '%s' as a %s subcollection of '%s' since it will either create a loop, or the association already exists.</span></b><br /><br />
""" % (col_dict[add_son], (rtype=="r" and 'regular' or 'virtual'), col_dict[add_dad])
res = run_sql("SELECT dbquery FROM collection WHERE id=%s" % colID)
dbquery = res[0][0]
if not dbquery:
dbquery = ''
reg_sons = len(get_col_tree(colID, 'r'))
vir_sons = len(get_col_tree(colID, 'v'))
if reg_sons > 1:
if dbquery:
output += "Warning: This collection got subcollections, and should because of this not have a collection query, for further explanation, check the WebSearch Guide<br />"
elif reg_sons <= 1:
if not dbquery:
output += "Warning: This collection does not have any subcollections, and should because of this have a collection query, for further explanation, check the WebSearch Guide<br />"
if switch and switch_col_treescore(tree[move_up], tree[switch]):
output += """<b><span class="info">Moved the %s collection '%s' up and '%s' down.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]], col_dict[tree[switch][0]])
else:
output += """<b><span class="info">Could not move the %s collection '%s' up and '%s' down.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]], col_dict[tree[switch][0]])
elif move_down:
move_down = int(move_down)
switch = find_next(tree, move_down)
if switch and switch_col_treescore(tree[move_down], tree[switch]):
output += """<b><span class="info">Moved the %s collection '%s' down and '%s' up.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_down][0]], col_dict[tree[switch][0]])
else:
output += """<b><span class="info">Could not move the %s collection '%s' up and '%s' down.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]],col_dict[tree[switch][0]])
elif delete:
delete = int(delete)
if confirm in [0, "0"]:
if col_dict[tree[delete][0]] != col_dict[tree[delete][3]]:
text = """<b>Do you want to remove the %s collection '%s' and its subcollections in the %s collection '%s'.</b>
""" % ((tree[delete][4]=="r" and 'regular' or 'virtual'), col_dict[tree[delete][0]], (rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]])
else:
text = """<b>Do you want to remove all subcollections of the %s collection '%s'.</b>
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]])
if move_from_id == move_to_id and move_from_rtype == move_to_rtype:
output += """<b><span class="info">Cannot move to itself.</span></b><br /><br />
"""
elif tree_from[move_from_id][3] == tree_to[move_to_id][0] and move_from_rtype==move_to_rtype:
output += """<b><span class="info">The collection is already there.</span></b><br /><br />
"""
elif check_col(tree_to[move_to_id][0], tree_from[move_from_id][0]) or (tree_to[move_to_id][0] == 1 and tree_from[move_from_id][3] == tree_to[move_to_id][0] and move_from_rtype != move_to_rtype):
text = """<b>Move %s collection '%s' to the %s collection '%s'.</b>
""" % ((tree_from[move_from_id][4]=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (tree_to[move_to_id][4]=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]])
output += """<b><span class="info">Cannot move the collection '%s' and set it as a subcollection of '%s' since it will create a loop.</span></b><br /><br />
if (move_to_id != 0 and move_col_tree(tree_from[move_from_id], tree_to[move_to_id])) or (move_to_id == 0 and move_col_tree(tree_from[move_from_id], tree_to[move_to_id], move_to_rtype)):
output += """<b><span class="info">Moved %s collection '%s' to the %s collection '%s'.</span></b><br /><br />
""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (move_to_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]])
else:
output += """<b><span class="info">Could not move %s collection '%s' to the %s collection '%s'.</span></b><br /><br />
""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (move_to_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]])
text = """Do you want to remove the %s '%s' %s from the collection '%s'.""" % (field, fld_dict[fldID], (fldvID not in["", "None"] and "with value '%s'" % fldv_dict[fldvID] or ''), col_dict[colID])
run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (vscore, colID, fldID, fldvID))
vscore -= 1
output += write_outcome((1, ""))
else:
output += write_outcome((0, (0, "No values to order")))
fld_distinct = run_sql("SELECT distinct(id_field) FROM collection_field_fieldvalue WHERE type='seo' AND id_collection=%s ORDER by score desc", (colID, ))
if CFG_SITE_NAME != run_sql("SELECT name from collection WHERE id=1")[0][0]:
res = run_sql("update collection set name=%s where id=1", (CFG_SITE_NAME, ))
if res:
fin_output += """<b><span class="info">The name of the root collection has been modified to be the same as the %(sitename)s installation name given prior to installing %(sitename)s.</span><b><br />""" % {'sitename' : CFG_SITE_NAME}
<td>1. <small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&ln=%s&mtype=perform_addcollection">Create new collection</a></small></td>
<td>2. <small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&ln=%s&mtype=perform_addcollectiontotree">Attach collection to tree</a></small></td>
text += """</td><td></td><td></td><td></td><td><table border="0" cellspacing="0" cellpadding="0"><tr><td>
"""
if i == 0:
tstack.append((id_son, dad, 1))
else:
tstack.append((id_son, dad, tables))
if up == 1 and edit:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&ln=%s&move_up=%s&rtype=%s#%s"><img border="0" src="%s/img/smallup.gif" title="Move collection up"></a>""" % (CFG_SITE_URL, colID, ln, i, rtype, tree[i][0], CFG_SITE_URL)
else:
text += """ """
text += "</td><td>"
if down == 1 and edit:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&ln=%s&move_down=%s&rtype=%s#%s"><img border="0" src="%s/img/smalldown.gif" title="Move collection down"></a>""" % (CFG_SITE_URL, colID, ln, i, rtype, tree[i][0], CFG_SITE_URL)
else:
text += """ """
text += "</td><td>"
if edit:
if move_from and move_to:
tmove_from = move_from
move_from = ''
if not (move_from == "" and i == 0) and not (move_from != "" and int(move_from[1:len(move_from)]) == i and rtype == move_from[0]):
check = "true"
if move_from:
#if tree_from[move_from_id][0] == tree_to[i][0] or not check_col(tree_to[i][0], tree_from[move_from_id][0]):
# check = ''
#elif not check_col(tree_to[i][0], tree_from[move_from_id][0]):
# check = ''
#if not check and (tree_to[i][0] == 1 and tree_from[move_from_id][3] == tree_to[i][0] and move_from_rtype != rtype):
# check = "true"
if check:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&ln=%s&move_from=%s&move_to=%s%s&rtype=%s#tree"><img border="0" src="%s/img/move_to.gif" title="Move '%s' to '%s'"></a>
<dd>When deleting a collection, you also deletes all data related to the collection like translations, relations to other collections and information about which rank methods to use.
<br />For more information, please go to the <a title="See guide" href="%s/help/admin/websearch-admin-guide">WebSearch guide</a> and read the section regarding deleting a collection.</dd>
</dl>
</strong>
</span>
""" % CFG_SITE_URL
col_dict = dict(get_def_name('', "collection"))
if colID != 1 and colID and col_dict.has_key(int(colID)):
output = """<b><span class="info">Can not delete a collection that is a part of the collection tree, remove collection from the tree and try again.</span></b>"""
else:
subtitle = """4. Delete collection"""
output = """<b><span class="info">Not possible to delete the root collection</span></b>"""
<td>11. <small><a href="editcollection?colID=%s&ln=%s&mtype=perform_manage_external_collections#11">Configuration of related external collections</a></small></td>
<td>12. <small><a href="editcollection?colID=%s&ln=%s&mtype=perform_showdetailedrecordoptions#12">Detailed record page options</a></small></td>
if collection_table_update_time > collection_web_update_time:
output += """<br /><b><span class="info">Warning: The collections have been modified since last time Webcoll was executed, to process the changes, Webcoll must be executed.</span></b><br />"""
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='webcoll' and runtime< now() ORDER by runtime")
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='bibindex' and runtime< now() ORDER by runtime")
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='webcoll' and runtime > now() ORDER by runtime")
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='bibindex' and runtime > now() ORDER by runtime")
output += """<br /><b><span class="info">Warning: Webcoll is not scheduled for a future run by bibsched, any updates to the collection will not be processed.</span></b><br />"""
if bibindex_future == "":
output += """<br /><b><span class="info">Warning: Bibindex is not scheduled for a future run by bibsched, any updates to the records will not be processed.</span></b><br />"""
output += """<br /><span class=info>New collection \"%s\" has been added to the database table \"externalcollection\".</span><br />""" % (collection)
else:
output += """<br /><span class=info>Collection \"%s\" has already been added to the database table \"externalcollection\" or was already there.</span><br />""" % (collection)
elif update == "del":
# icl : the "inconsistent list" comes as a string, it has to be converted back into a list
icl = eval(icl)
#icl = icl[1:-1].split(',')
for collection in icl:
#collection = str(collection[1:-1])
query_select = "SELECT id FROM externalcollection WHERE name like '%(name)s';" % {'name': collection}
results_select = run_sql(query_select)
if results_select:
query_delete = "DELETE FROM externalcollection WHERE id like '%(id)s';" % {'id': results_select[0][0]}
query_delete_states = "DELETE FROM collection_externalcollection WHERE id_externalcollection like '%(id)s';" % {'id': results_select[0][0]}
run_sql(query_delete)
run_sql(query_delete_states)
output += """<br /><span class=info>Collection \"%s\" has been deleted from the database table \"externalcollection\".</span><br />""" % (collection)
else:
output += """<br /><span class=info>Collection \"%s\" has already been delete from the database table \"externalcollection\" or was never there.</span><br />""" % (collection)
external_collections_file = []
external_collections_db = []
for coll in external_collections_dictionary.values():
external_collections_file.append(coll.name)
external_collections_file.sort()
query = """SELECT name from externalcollection"""
results = run_sql(query)
for result in results:
external_collections_db.append(result[0])
external_collections_db.sort()
number_file = len(external_collections_file)
number_db = len(external_collections_db)
if external_collections_file == external_collections_db:
output += """<br /><span class="info">External collections are consistent.</span><br /><br />
- database table \"externalcollection\" has %(number_db)s collections<br />
- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections""" % {
Click here</a> to force remove the extra collections from your database (warning: use with caution!). If the problem persists please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file,
"diff" : external_collections_diff,
"site_url" : CFG_SITE_URL,
"colID" : colID,
"ln" : ln}
else:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
- database table \"externalcollection\" has %(number_db)s collections<br />
- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><span class="warning">The external collections do not match.</span>
<br />To fix the problem please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file}
else:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
- database table \"externalcollection\" has %(number_db)s collections<br />
- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><span class="warning">The number of external collections is the same but the collections do not match.</span>
<br />To fix the problem please check your configuration manually.""" % {
tree = tree[0:ssize] + ntree + tree[ssize:len(tree)]
return tree
except StandardError, e:
register_exception()
return ()
def add_col_dad_son(add_dad, add_son, rtype):
"""Add a son to a collection (dad)
add_dad - add to this collection id
add_son - add this collection id
rtype - either regular or virtual"""
try:
res = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s ORDER BY score ASC", (add_dad, ))
highscore = 0
for score in res:
if int(score[0]) > highscore:
highscore = int(score[0])
highscore += 1
res = run_sql("INSERT INTO collection_collection(id_dad,id_son,score,type) values(%s,%s,%s,%s)", (add_dad, add_son, highscore, rtype))
return (1, highscore)
except StandardError, e:
register_exception()
return (0, e)
def compare_on_val(first, second):
"""Compare the two values"""
return cmp(first[1], second[1])
def get_col_fld(colID=-1, type = '', id_field=''):
"""Returns either all portalboxes associated with a collection, or based on either colID or language or both.
colID - collection id
ln - language id"""
sql = "SELECT id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue, field WHERE id_field=field.id"
params = []
if colID > -1:
sql += " AND id_collection=%s"
params.append(colID)
if id_field:
sql += " AND id_field=%s"
params.append(id_field)
if type:
sql += " AND type=%s"
params.append(type)
sql += " ORDER BY type, score desc, score_fieldvalue desc"
res = run_sql(sql, tuple(params))
return res
def get_col_pbx(colID=-1, ln='', position = ''):
"""Returns either all portalboxes associated with a collection, or based on either colID or language or both.
colID - collection id
ln - language id"""
sql = "SELECT id_portalbox, id_collection, ln, score, position, title, body FROM collection_portalbox, portalbox WHERE id_portalbox = portalbox.id"
params = []
if colID > -1:
sql += " AND id_collection=%s"
params.append(colID)
if ln:
sql += " AND ln=%s"
params.append(ln)
if position:
sql += " AND position=%s"
params.append(position)
sql += " ORDER BY position, ln, score desc"
res = run_sql(sql, tuple(params))
return res
def get_col_fmt(colID=-1):
"""Returns all formats currently associated with a collection, or for one specific collection
colID - the id of the collection"""
if colID not in [-1, "-1"]:
res = run_sql("SELECT id_format, id_collection, code, score FROM collection_format, format WHERE id_format = format.id AND id_collection=%s ORDER BY score desc", (colID, ))
else:
res = run_sql("SELECT id_format, id_collection, code, score FROM collection_format, format WHERE id_format = format.id ORDER BY score desc")
return res
def get_col_rnk(colID, ln):
""" Returns a list of the rank methods the given collection is attached to
colID - id from collection"""
try:
res1 = dict(run_sql("SELECT id_rnkMETHOD, '' FROM collection_rnkMETHOD WHERE id_collection=%s", (colID, )))
res2 = get_def_name('', "rnkMETHOD")
result = filter(lambda x: res1.has_key(x[0]), res2)
return result
except StandardError, e:
return ()
def get_pbx():
"""Returns all portalboxes"""
res = run_sql("SELECT id, title, body FROM portalbox ORDER by title,body")
return res
def get_fld_value(fldvID = ''):
"""Returns fieldvalue"""
sql = "SELECT id, name, value FROM fieldvalue"
params = []
if fldvID:
sql += " WHERE id=%s"
params.append(fldvID)
sql += " ORDER BY name"
res = run_sql(sql, tuple(params))
return res
def get_pbx_pos():
"""Returns a list of all the positions for a portalbox"""
position = {}
position["rt"] = "Right Top"
position["lt"] = "Left Top"
position["te"] = "Title Epilog"
position["tp"] = "Title Prolog"
position["ne"] = "Narrow by coll epilog"
position["np"] = "Narrow by coll prolog"
return position
def get_sort_nametypes():
"""Return a list of the various translationnames for the fields"""
type = {}
type['soo'] = 'Sort options'
type['seo'] = 'Search options'
type['sew'] = 'Search within'
return type
def get_fmt_nametypes():
"""Return a list of the various translationnames for the output formats"""
type = []
type.append(('ln', 'Long name'))
return type
def get_fld_nametypes():
"""Return a list of the various translationnames for the fields"""
type = []
type.append(('ln', 'Long name'))
return type
def get_col_nametypes():
"""Return a list of the various translationnames for the collections"""
type = []
type.append(('ln', 'Long name'))
return type
def find_last(tree, start_son):
"""Find the previous collection in the tree with the same father as start_son"""
id_dad = tree[start_son][3]
while start_son > 0:
start_son -= 1
if tree[start_son][3] == id_dad:
return start_son
def find_next(tree, start_son):
"""Find the next collection in the tree with the same father as start_son"""
id_dad = tree[start_son][3]
while start_son < len(tree):
start_son += 1
if tree[start_son][3] == id_dad:
return start_son
def remove_col_subcol(id_son, id_dad, type):
"""Remove a collection as a son of another collection in the tree, if collection isn't used elsewhere in the tree, remove all registered sons of the id_son.
id_son - collection id of son to remove
id_dad - the id of the dad"""
try:
if id_son != id_dad:
tree = get_col_tree(id_son)
run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s", (id_son, id_dad))
else:
tree = get_col_tree(id_son, type)
run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s and type=%s", (id_son, id_dad, type))
if not run_sql("SELECT id_dad,id_son,type,score from collection_collection WHERE id_son=%s and type=%s", (id_son, type)):
for (id, up, down, dad, rtype) in tree:
run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s", (id, dad))
return (1, "")
except StandardError, e:
return (0, e)
def check_col(add_dad, add_son):
"""Check if the collection can be placed as a son of the dad without causing loops.
add_dad - collection id
add_son - collection id"""
try:
stack = [add_dad]
res = run_sql("SELECT id_dad FROM collection_collection WHERE id_dad=%s AND id_son=%s", (add_dad, add_son))
if res:
raise StandardError
while len(stack) > 0:
colID = stack.pop()
res = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID, ))
for id in res:
if int(id[0]) == int(add_son):
# raise StandardError # this was the original but it didnt work
return(0)
else:
stack.append(id[0])
return (1, "")
except StandardError, e:
return (0, e)
def attach_rnk_col(colID, rnkID):
"""attach rank method to collection
rnkID - id from rnkMETHOD table
colID - id of collection, as in collection table """
try:
res = run_sql("INSERT INTO collection_rnkMETHOD(id_collection, id_rnkMETHOD) values (%s,%s)", (colID, rnkID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def detach_rnk_col(colID, rnkID):
"""detach rank method from collection
rnkID - id from rnkMETHOD table
colID - id of collection, as in collection table """
try:
res = run_sql("DELETE FROM collection_rnkMETHOD WHERE id_collection=%s AND id_rnkMETHOD=%s", (colID, rnkID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def switch_col_treescore(col_1, col_2):
try:
res1 = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s and id_son=%s", (col_1[3], col_1[0]))
res2 = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s and id_son=%s", (col_2[3], col_2[0]))
res = run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s and id_son=%s", (res2[0][0], col_1[3], col_1[0]))
res = run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s and id_son=%s", (res1[0][0], col_2[3], col_2[0]))
score - decides which portalbox is the most important
position - position on page the portalbox should appear."""
try:
if score:
res = run_sql("INSERT INTO collection_portalbox(id_portalbox, id_collection, ln, score, position) values (%s,%s,'%s',%s,%s)", (pbxID, colID, ln, score, position))
else:
res = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and ln=%s and position=%s ORDER BY score desc, ln, position", (colID, ln, position))
if res:
score = int(res[0][0])
else:
score = 0
res = run_sql("INSERT INTO collection_portalbox(id_portalbox, id_collection, ln, score, position) values (%s,%s,%s,%s,%s)", (pbxID, colID, ln, (score + 1), position))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def add_col_fmt(colID, fmtID, score=''):
"""Add a output format to the collection.
colID - the id of the collection involved
fmtID - the id of the format.
score - the score of the format, decides sorting, if not given, place the format on top"""
try:
if score:
res = run_sql("INSERT INTO collection_format(id_format, id_collection, score) values (%s,%s,%s)", (fmtID, colID, score))
else:
res = run_sql("SELECT score FROM collection_format WHERE id_collection=%s ORDER BY score desc", (colID, ))
if res:
score = int(res[0][0])
else:
score = 0
res = run_sql("INSERT INTO collection_format(id_format, id_collection, score) values (%s,%s,%s)", (fmtID, colID, (score + 1)))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def add_col_fld(colID, fldID, type, fldvID=''):
"""Add a sort/search/field to the collection.
colID - the id of the collection involved
fldID - the id of the field.
fldvID - the id of the fieldvalue.
type - which type, seo, sew...
score - the score of the format, decides sorting, if not given, place the format on top"""
try:
if fldvID and fldvID not in [-1, "-1"]:
run_sql("DELETE FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type=%s and id_fieldvalue is NULL", (colID, fldID, type))
res = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type=%s ORDER BY score desc", (colID, fldID, type))
if res:
score = int(res[0][0])
res = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type=%s ORDER BY score_fieldvalue desc", (colID, fldID, type))
else:
res = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and type=%s ORDER BY score desc", (colID, type))
if res:
score = int(res[0][0]) + 1
else:
score = 1
res = run_sql("SELECT id_collection,id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue where id_field=%s and id_collection=%s and type=%s and id_fieldvalue=%s", (fldID, colID, type, fldvID))
if not res:
run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=score_fieldvalue+1 WHERE id_field=%s AND id_collection=%s and type=%s", (fldID, colID, type))
res = run_sql("INSERT INTO collection_field_fieldvalue(id_field, id_fieldvalue, id_collection, type, score, score_fieldvalue) values (%s,%s,%s,%s,%s,%s)", (fldID, fldvID, colID, type, score, 1))
else:
return (0, (1, "Already exists"))
else:
res = run_sql("SELECT id_collection,id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s AND type=%s and id_field=%s and id_fieldvalue is NULL", (colID, type, fldID))
if res:
return (0, (1, "Already exists"))
else:
run_sql("UPDATE collection_field_fieldvalue SET score=score+1")
res = run_sql("INSERT INTO collection_field_fieldvalue(id_field, id_collection, type, score,score_fieldvalue) values (%s,%s,%s,%s, 0)", (fldID, colID, type, 1))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def modify_dbquery(colID, dbquery=None):
"""Modify the dbquery of an collection.
colID - the id of the collection involved
dbquery - the new dbquery"""
# BTW, sometimes '' is passed instead of None, so change it to None
if not dbquery:
dbquery = None
try:
res = run_sql("UPDATE collection SET dbquery=%s WHERE id=%s", (dbquery, colID))
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (colID, id_1, fldvID_1))
res2 = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (colID, id_1, fldvID_2))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
else:
res = run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (res2[0][0], colID, id_1, fldvID_1))
res = run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (res1[0][0], colID, id_1, fldvID_2))
return (1, "")
except Exception, e:
register_exception()
return (0, e)
def switch_pbx_score(colID, id_1, id_2, sel_ln):
"""Switch the scores of id_1 and id_2 in the table given by the argument.
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and id_portalbox=%s and ln=%s", (colID, id_1, sel_ln))
res2 = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and id_portalbox=%s and ln=%s", (colID, id_2, sel_ln))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln=%s", (res2[0][0], colID, id_1, sel_ln))
res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln=%s", (res1[0][0], colID, id_2, sel_ln))
return (1, "")
except Exception, e:
register_exception()
return (0, e)
def switch_score(colID, id_1, id_2, table):
"""Switch the scores of id_1 and id_2 in the table given by the argument.
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score FROM collection_%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (colID, id_1))
res2 = run_sql("SELECT score FROM collection_%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (colID, id_2))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
res = run_sql("UPDATE collection_%s SET score=%%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (res2[0][0], colID, id_1))
res = run_sql("UPDATE collection_%s SET score=%%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (res1[0][0], colID, id_2))
return page(title=_("Role authorization request"), req=req, body=_("This request for an authorization has already been authorized."), uid=webuser.getUid(req), navmenuid='youraccount', language=args['ln'], secure_page_p=1)
return page(title=title,
body=webaccount.perform_back(
_("You have successfully obtained an authorization as %(x_role)s! "
"This authorization will last until %(x_expiration)s and until "
"you close your browser if you are a guest user.") %
body = "<p>" + _("You have confirmed the validity of your email"
" address!") + "</p>"
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
body += "<p>" + _("Please, wait for the administrator to "
"enable your account.") + "</p>"
else:
uid = webuser.update_Uid(req, email)
body += "<p>" + _("You can now go to %(x_url_open)syour account page%(x_url_close)s.") % {'x_url_open' : '<a href="/youraccount/display?ln=%s">' % args['ln'], 'x_url_close' : '</a>'} + "</p>"
body = "<p>" + _("You have already confirmed the validity of your email address!") + "</p>"
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
body += "<p>" + _("Please, wait for the administrator to "
"enable your account.") + "</p>"
else:
body += "<p>" + _("You can now go to %(x_url_open)syour account page%(x_url_close)s.") % {'x_url_open' : '<a href="/youraccount/display?ln=%s">' % args['ln'], 'x_url_close' : '</a>'} + "</p>"
text=_("This request for an authorization is not valid or"
" is expired."), navmenuid='youraccount')
def resetpassword(self, req, form):
args = wash_urlargd(form, {
'k' : (str, ''),
'reset' : (int, 0),
'password' : (str, ''),
'password2' : (str, '')
})
_ = gettext_set_language(args['ln'])
title = _('Reset password')
reset_key = args['k']
try:
email = mail_cookie_check_pw_reset(reset_key)
except InvenioWebAccessMailCookieDeletedError:
return page(title=title, req=req, body=_("This request for resetting a password has already been used."), uid=webuser.getUid(req), navmenuid='youraccount', language=args['ln'], secure_page_p=1)
res = run_sql("SELECT email FROM user WHERE id=%s", (uid,))
if res:
email = res[0][0]
else:
email = None
if not email:
mess += '<p>' + _("Unable to switch to external login method %s, because your email address is unknown.") % cgi.escape(args['login_method'])
else:
try:
if not CFG_EXTERNAL_AUTHENTICATION[args['login_method']].user_exists(email):
mess += '<p>' + _("Unable to switch to external login method %s, because your email address is unknown to the external login system.") % cgi.escape(args['login_method'])
mess += '<p>' + _("The external login method %s does not support email address based logins. Please contact the site administrators.") % cgi.escape(args['login_method'])
mess = _("Desired nickname %s already exists in the database.") % cgi.escape(args['p_nickname'])
mess += " " + _("Please try again.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 5:
mess = _("Users cannot register themselves, only admin can register them.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 6:
mess = _("The site is having troubles in sending you an email for confirming your email address.") + _("The error has been logged and will be taken in consideration as soon as possible.")
raise InvenioWebSubmitWarning(_('Unable to move field at position %s to position %s on page %s of submission \'%s%s\' - Invalid Field Position Numbers') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
## failed to swap 2 fields - couldn't move field1 to temp position
try:
raise InvenioWebSubmitWarning(_('Unable to swap field at position %s with field at position %s on page %s of submission %s - could not move field at position %s to temporary field location') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
## failed to swap 2 fields on submission page - couldn't move field2 to field1 position
try:
raise InvenioWebSubmitWarning(_('Unable to swap field at position %s with field at position %s on page %s of submission %s - could not move field at position %s to position %s. Please ask Admin to check that a field was not stranded in a temporary position') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
## failed to swap 2 fields in submission page - couldnt swap field at temp position to field2 position
try:
raise InvenioWebSubmitWarning(_('Unable to swap field at position %s with field at position %s on page %s of submission %s - could not move field that was located at position %s to position %s from temporary position. Field is now stranded in temporary position and must be corrected manually by an Admin') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype, movefieldfromposn, movefieldtoposn))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - Field-order is now broken and must be corrected by Admin""" \
## failed to decrement the position of all fields below the field that was moved to a temp position
try:
raise InvenioWebSubmitWarning(_('Unable to move field at position %s to position %s on page %s of submission %s - could not decrement the position of the fields below position %s. Tried to recover - please check that field ordering is not broken') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype, movefieldfromposn))
## failed to increment position of fields in and below position into which 'movefromfieldposn' is to be inserted
try:
raise InvenioWebSubmitWarning(_('Unable to move field at position %s to position %s on page %s of submission %s%s - could not increment the position of the fields at and below position %s. The field that was at position %s is now stranded in a temporary position.') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype, movefieldtoposn, movefieldfromposn))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - Field-order is now broken and must be corrected by Admin""" \
raise InvenioWebSubmitWarning(_('Moved field from position %s to position %s on page %s of submission \'%s%s\'.') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
full_field['typename'] = field_instance[1] ## TODO: Investigate this, Not used?
## It also seems to refer to pagenum.
# The 'R' fields must be executed in the engine's environment,
# as the runtime functions access some global and local
# variables.
if full_field ['type'] == 'R':
try:
co = compile (full_field ['htmlcode'].replace("\r\n","\n"), "<string>", "exec")
the_globals['text'] = ''
exec co in the_globals
text = the_globals['text']
except:
register_exception(req=req, alert_admin=True, prefix="Error in evaluating response element %s with globals %s" % (pprint.pformat(full_field), pprint.pformat(the_globals)))
raise
else:
text = websubmit_templates.tmpl_submit_field (ln = ln, field = full_field)
# we now determine the exact type of the created field
if full_field['type'] not in [ 'D','R']:
field.append(full_field['name'])
level.append(field_instance[5])
fullDesc.append(field_instance[4])
txt.append(field_instance[6])
check.append(field_instance[7])
# If the field is not user-defined, we try to determine its type
# (select, radio, file upload...)
# check whether it is a select field or not
if re.search("SELECT", text, re.IGNORECASE) is not None:
select.append(1)
else:
select.append(0)
# checks whether it is a radio field or not
if re.search(r"TYPE=[\"']?radio", text, re.IGNORECASE) is not None:
radio.append(1)
else:
radio.append(0)
# checks whether it is a file upload or not
if re.search(r"TYPE=[\"']?file", text, re.IGNORECASE) is not None:
upload.append(1)
else:
upload.append(0)
# if the field description contains the "<COMBO>" string, replace
# it by the category selected on the document page submission page
combofile = "combo%s" % doctype
if os.path.exists("%s/%s" % (curdir, combofile)):
f = open("%s/%s" % (curdir, combofile), "r")
combo = f.read()
f.close()
else:
combo = ""
text = text.replace("<COMBO>", combo)
# if there is a <YYYY> tag in it, replace it by the current year
year = time.strftime("%Y");
text = text.replace("<YYYY>", year)
# if there is a <TODAY> tag in it, replace it by the current year
today = time.strftime("%d/%m/%Y");
text = text.replace("<TODAY>", today)
fieldhtml.append(text)
else:
select.append(0)
radio.append(0)
upload.append(0)
# field.append(value) - initial version, not working with JS, taking a submitted value
field.append(field_instance[3])
level.append(field_instance[5])
txt.append(field_instance[6])
fullDesc.append(field_instance[4])
check.append(field_instance[7])
fieldhtml.append(text)
full_field['fullDesc'] = field_instance[4]
full_field['text'] = text
# If a file exists with the name of the field we extract the saved value
text = ''
if os.path.exists(os.path.join(curdir, full_field['name'])):
## There was a serious function-error. Execution ends.
if CFG_DEVEL_SITE:
raise
else:
return warning_page(_("A serious function-error has been encountered. Adminstrators have been alerted. <br /><em>Please not that this might be due to wrong characters inserted into the form</em> (e.g. by copy and pasting some text from a PDF file)."), req, ln)
except InvenioWebSubmitFunctionStop, e:
## For one reason or another, one of the functions has determined that
## the data-processing phase (i.e. the functions execution) should be
## halted and the user should be returned to the form interface once
## more. (NOTE: Redirecting the user to the Web-form interface is
## currently done using JavaScript. The "InvenioWebSubmitFunctionStop"
## exception contains a "value" string, which is effectively JavaScript
## - probably an alert box and a form that is submitted). **THIS WILL
## CHANGE IN THE FUTURE WHEN JavaScript IS REMOVED!**
if e.value is not None:
function_content = e.value
else:
function_content = e
else:
## No function exceptions (InvenioWebSubmitFunctionStop,
## InvenioWebSubmitFunctionError) were raised by the functions. Propose
## the next action (if applicable), and log the submission as finished:
## If the action was mandatory we propose the next
## mandatory action (if any)
if action_score != -1 and last_step == 1:
next_action = Propose_Next_Action(doctype, \
action_score, \
access, \
current_level, \
indir)
## If we are in the last step of an action, we can update
log_function(curdir, "***Warning*** from %s: %s" \
% (function_name, str(err)), start_time)
## Reset "func_returnval" to None:
func_returnval = None
register_exception(req=req, alert_admin=True, prefix="Warning in executing function %s with globals %s" % (pprint.pformat(currfunction), pprint.pformat(the_globals)))
## The function the NoneType. Don't keep that value as
## the currfunction->text. Replace it with the empty
## string.
currfunction['text'] = ""
else:
currfunction['error'] = 1
functions.append(currfunction)
except InvenioWebSubmitFunctionStop, err:
## The submission asked to stop execution. This is
## ok. Do not alert admin, and raise exception further
log_function(curdir, "***Stop*** from %s: %s" \
% (function_name, str(err)), start_time)
raise
except:
register_exception(req=req, alert_admin=True, prefix="Error in executing function %s with globals %s" % (pprint.pformat(currfunction), pprint.pformat(the_globals)))
raise
t = websubmit_templates.tmpl_function_output(
ln = ln,
display_on = (dismode == 'S'),
action = action,
doctype = doctype,
step = step,
functions = functions,
)
else :
if dismode == 'S':
t = "<br /><br /><b>" + _("The chosen action is not supported by the document type.") + "</b>"
raise ValueError("No version information returned")
if [int(number) for number in output.split('.')] < [int(number) for number in CFG_GS_MINIMAL_VERSION_FOR_PDFX.split('.')]:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the minimal gs version for the executable %s is not met: it should be %s but %s has been found" % (CFG_PATH_GS, CFG_GS_MINIMAL_VERSION_FOR_PDFX, output)
return False
except Exception, err:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because it's not possible to retrieve the gs version using the executable %s: %s" % (CFG_PATH_GS, err)
return False
if not CFG_PATH_PDFINFO:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the pdfinfo executable is not available"
return False
if not os.path.exists(CFG_ICC_PATH):
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because %s does not exists. Have you run make install-pdfa-helper-files?" % CFG_ICC_PATH
return False
return True
def can_pdfa(verbose=False):
"""Return True if it's possible to generate PDF/As."""
if not CFG_PATH_PDFTOPS:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the pdftops executable is not available"
return False
if not CFG_PATH_GS:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the gs executable is not available"
raise ValueError("No version information returned")
if [int(number) for number in output.split('.')] < [int(number) for number in CFG_GS_MINIMAL_VERSION_FOR_PDFA.split('.')]:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the minimal gs version for the executable %s is not met: it should be %s but %s has been found" % (CFG_PATH_GS, CFG_GS_MINIMAL_VERSION_FOR_PDFA, output)
return False
except Exception, err:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because it's not possible to retrieve the gs version using the executable %s: %s" % (CFG_PATH_GS, err)
return False
if not CFG_PATH_PDFINFO:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the pdfinfo executable is not available"
return False
if not os.path.exists(CFG_ICC_PATH):
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because %s does not exists. Have you run make install-pdfa-helper-files?" % CFG_ICC_PATH
return False
return True
def can_perform_ocr(verbose=False):
"""Return True if it's possible to perform OCR."""
if not CFG_CAN_DO_OCR:
if verbose:
print >> sys.stderr, "OCR is not supported because either the pyPdf of ReportLab Python libraries are missing"
return False
if not CFG_PATH_OCROSCRIPT:
if verbose:
print >> sys.stderr, "OCR is not supported because the ocroscript executable is not available"
return False
if not CFG_PATH_PDFTOPPM:
if verbose:
print >> sys.stderr, "OCR is not supported because the pdftoppm executable is not available"
get_file_converter_logger().debug("Extracted title is %s" % title)
if os.path.exists(CFG_ICC_PATH):
shutil.copy(CFG_ICC_PATH, working_dir)
else:
raise InvenioWebSubmitFileConverterError('ERROR: ISOCoatedsb.icc file missing. Have you run "make install-pdfa-helper-files" as part of your Invenio deployment?')
get_file_converter_logger().debug("Extracted title is %s" % title)
if os.path.exists(CFG_ICC_PATH):
shutil.copy(CFG_ICC_PATH, working_dir)
else:
raise InvenioWebSubmitFileConverterError('ERROR: ISOCoatedsb.icc file missing. Have you run "make install-pdfa-helper-files" as part of your Invenio deployment?')
@param ln is a two letter language code to give the OCR tool a hint.
@param return_working_dir if set to True, will return output_file path and the working_dir path, instead of deleting the working_dir. This is useful in case you need the intermediate images to build again a PDF.
parser.add_option("-o", "--output", dest="output_name", help="the desired output FILE (if not specified a new file will be generated with the desired output format)")
parser.add_option("--without-pdfa", action="store_false", dest="pdf_a", default=True, help="don't force creation of PDF/A PDFs")
parser.add_option("--without-pdfopt", action="store_false", dest="pdfopt", default=True, help="don't force optimization of PDFs files")
parser.add_option("--without-ocr", action="store_false", dest="ocr", default=True, help="don't force OCR")
parser.add_option("--can-convert", dest="can_convert", help="display all the possible format that is possible to generate from the given format", metavar="FORMAT")
parser.add_option("--is-ocr-needed", dest="check_ocr_is_needed", help="check if OCR is needed for the FILE specified", metavar="FILE")
parser.add_option("-t", "--title", dest="title", help="specify the title (used when creating PDFs)", metavar="TITLE")
parser.add_option("-l", "--language", dest="ln", help="specify the language (used when performing OCR, e.g. en, it, fr...)", metavar="LN", default='en')
fields = [x.group("value") for x in fields if x is not None]
fields = [x for x in fields if x not in ("Select", "select")]
else:
raise InvenioWebSubmitFunctionError("cannot find fields to modify")
#output some text
t = t+"<CENTER bgcolor=\"white\">The document <B>%s</B> has been found in the database.</CENTER><br />Please modify the following fields:<br />Then press the 'END' button at the bottom of the page<br />\n" % rn
for field in fields:
subfield = ""
value = ""
marccode = ""
text = ""
# retrieve and display the modification text
t = t + "<FONT color=\"darkblue\">\n"
res = run_sql("SELECT modifytext FROM sbmFIELDDESC WHERE name=%s", (field,))
if len(res)>0:
t = t + "<small>%s</small> </FONT>\n" % res[0][0]
# retrieve the marc code associated with the field
res = run_sql("SELECT marccode FROM sbmFIELDDESC WHERE name=%s", (field,))
if len(res) > 0:
marccode = res[0][0]
# then retrieve the previous value of the field
if os.path.exists("%s/%s" % (curdir, "Create_Modify_Interface_DONE")):
# Page has been reloaded - get field value from text file on server, not from DB record
value = Create_Modify_Interface_getfieldval_fromfile(curdir, field)
else:
# First call to page - get field value from DB record
value = Create_Modify_Interface_getfieldval_fromDBrec(marccode, sysno)
# If field is a date value, transform date into format DD/MM/YYYY:
value = Create_Modify_Interface_transform_date(value)
res = run_sql("SELECT * FROM sbmFIELDDESC WHERE name=%s", (field,))
## No BibUpload scheduled? Then we don't care about bibsched
return ""
## Let's get an estimate about how many processes are waiting in the queue.
## Our bibupload might be somewhere in it, but it's not really so important
## WRT informing the user.
_ = gettext_set_language(ln)
res = run_sql("SELECT id,proc,runtime,status,priority FROM schTASK WHERE (status='WAITING' AND runtime<=NOW()) OR status='SLEEPING'")
pre = _("Note that your submission as been inserted into the bibliographic task queue and is waiting for execution.\n")
if server_pid():
## BibSched is up and running
msg = _("The task queue is currently running in automatic mode, and there are currently %s tasks waiting to be executed. Your record should be available within a few minutes and searchable within an hour or thereabouts.\n") % (len(res))
else:
msg = _("Because of a human intervention or a temporary problem, the task queue is currently set to the manual mode. Your submission is well registered but may take longer than usual before it is fully integrated and searchable.\n")
return pre + msg
def txt2html(msg):
"""Transform newlines into paragraphs."""
rows = msg.split('\n')
rows = [cgi.escape(row) for row in rows]
rows = "<p>" + "</p><p>".join(rows) + "</p>"
return rows
def get_all_values_in_curdir(curdir):
"""
Return a dictionary with all the content of curdir.
@param curdir: the path to the current directory.
@type curdir: string
@return: the content
@rtype: dict
"""
ret = {}
for filename in os.listdir(curdir):
if not filename.startswith('.') and os.path.isfile(os.path.join(curdir, filename)):
### If no doctype given, check list of acceptable formats for input file ext doctype
### FIXME: This should go into the for-loop to match each individual input filename
if outputfmt:
inputext = os.path.splitext(inputfn)[1]
inputfmt = fmts.byextension(inputext)
if inputfmt:
for fmt in outputfmt:
if inputfmt[0].doctype == fmt.doctype:
doctype = inputfmt[0].doctype
outputfmt = fmt
break
else:
outputfmt = outputfmt[0]
# print >> sys.stderr, 'unoconv: format `%s\' is part of multiple doctypes %s, selecting `%s\'.' % (format, [fmt.doctype for fmt in outputfmt], outputfmt[0].doctype)
else:
outputfmt = outputfmt[0]
### No format found, throw error
if not outputfmt:
if doctype:
print >> sys.stderr, 'unoconv: format [%s/%s] is not known to unoconv.' % (op.doctype, op.format)
else:
print >> sys.stderr, 'unoconv: format [%s] is not known to unoconv.' % op.format
die(1)
return outputfmt
def convert(self, inputfn):
global exitcode
document = None
outputfmt = self.getformat(inputfn)
if op.verbose > 0:
print >> sys.stderr, 'Input file:', inputfn
if not os.path.exists(inputfn):
print >> sys.stderr, 'unoconv: file `%s\' does not exist.' % inputfn
error("unoconv: could not find an existing connection to LibreOffice at %s:%s." % (op.server, op.port))
if op.connection:
info(0, "Please start an LibreOffice instance on server '%s' by doing:\n\n unoconv --listener --server %s --port %s\n\nor alternatively:\n\n soffice -nologo -nodefault -accept=\"%s\"" % (op.server, op.server, op.port, op.connection))
else:
info(0, "Please start an LibreOffice instance on server '%s' by doing:\n\n unoconv --listener --server %s --port %s\n\nor alternatively:\n\n soffice -nologo -nodefault -accept=\"socket,host=%s,port=%s;urp;\"" % (op.server, op.server, op.port, op.server, op.port))
info(0, "Please start an soffice instance on server '%s' by doing:\n\n soffice -nologo -nodefault -accept=\"socket,host=localhost,port=%s;urp;\"" % (op.server, op.port))
exitcode = 1
# except UnboundLocalError:
# die(252, "Failed to connect to remote listener.")
except OSError:
error("Warning: failed to launch Office suite. Aborting.")
res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,))
title = res[0][0]
sth = run_sql("select lname from sbmCATEGORIES where doctype=%s and sname=%s order by lname",(doctype, categ,))
if len(sth) != 0:
categname = sth[0][0]
else:
categname = "Unknown"
docs = []
sth = run_sql("select rn,status from sbmCPLXAPPROVAL where doctype=%s and categ=%s and type=%s order by status DESC,rn DESC",(doctype, categ, apptype))
for arr in sth:
docs.append({
'RN': arr[0],
'status': arr[1],
})
t = websubmit_templates.tmpl_publiline_selectcplxdocument(
return run_sql("SELECT COUNT(*) FROM sbmCPLXAPPROVAL WHERE doctype=%s AND categ=%s AND status=%s AND type=%s",(doctype, categ, status, apptype,))[0][0]
def __db_get_infos (key):
return run_sql("SELECT status,id_group,id_bskBASKET,id_EdBoardGroup,dFirstReq,dLastReq,dEdBoardSel,dRefereeSel,dRefereeRecom,dEdBoardRecom,dPubComRecom,dProjectLeaderAction FROM sbmCPLXAPPROVAL WHERE rn=%s and type=%s", key)
def __db_set_EdBoardSel_time (key):
run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardSel=NOW() WHERE rn=%s and type=%s", key)
Scientific Note approval for document %s has been submitted to the CERN Document Server.
Your approval is requested for this document. Once you have received recommendations from both the referee and the publication committee chair, you will be able to make your decision.
Requested subcategory: %s
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/%s/%s>
The %s has made a recommendation for the document. He/she said the following:
%s
You can approve this document by visiting this page:
<%s>
You can also check the status of the document from:
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../sub/",
navmenuid='submit')
try:
raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"'
if 'cms-publication-committee-chair [CERN]' not in user_info['group']:
return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of the CMS Publication Committee Chair.",
navmenuid='submit')
elif doctype == 'ATLPUB' and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch':
register_exception(req=req, prefix="Wrong GET parameter set in calling a legacy publisher handler for %s: expected_args=%s, found_args=%s" % (possible_handler, repr(expected_args), repr(req.form.keys())), alert_admin=CFG_DEVEL_SITE)
raise InvenioWebAccessFireroleError("Error while compiling rule %s (line %s): %s is a reserved key and can not be used in FireRole rules!" % (row, line, field))
raise InvenioWebAccessFireroleError("Syntax error while compiling rule %s (line %s): %s is not a valid date with format YYYY-MM-DD because %s!" % (row, line, expr, msg))
raise InvenioWebAccessFireroleError("Syntax error while compiling rule %s (line %s): %s is not a valid ip group because %s!" % (row, line, expr, msg))
else:
expressions_list.append((False, expr[1:-1]))
expressions_list = tuple(expressions_list)
if field in ('from', 'until'):
if len(expressions_list) != 1:
raise InvenioWebAccessFireroleError("Error when compiling rule %s (line %s): exactly one date is expected when using 'from' or 'until', but %s were found" % (row, line, len(expressions_list)))
if not_p:
raise InvenioWebAccessFireroleError("Error when compiling rule %s (line %s): 'not' is not allowed when using 'from' or 'until'" % (row, line))
emails = run_sql("SELECT user.email FROM usergroup JOIN user_usergroup ON usergroup.id=user_usergroup.id_usergroup JOIN user ON user.id=user_usergroup.id_user WHERE usergroup.name=%s", (expr, ))
for email in emails:
authorized_emails.add(email[0].lower().strip())
elif field == 'email':
for reg_p, expr in expressions_list:
if reg_p:
continue
authorized_emails.add(expr.lower().strip())
elif field == 'uid':
for reg_p, expr in expressions_list:
if reg_p:
continue
email = run_sql("SELECT email FROM user WHERE id=%s", (expr, ))
raise InvenioWebCommentWarning(_('You have been subscribed to this discussion. From now on, you will receive an email whenever a new comment is posted.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_SUBSCRIBED',))
elif subscribed == -1:
try:
raise InvenioWebCommentWarning(_('You have been unsubscribed from this discussion.'))
Get the comments collapsed for given user on given recid page
"""
# Collapsed state is not an attribute of cmtRECORDCOMMENT table
# (vary per user) so it cannot be found when querying for the
# comment. We must therefore provide a efficient way to retrieve
# the collapsed state for a given discussion page and user.
query = """SELECT id_cmtRECORDCOMMENT from cmtCOLLAPSED WHERE id_user=%s and id_bibrec=%s"""
params = (uid, recid)
return [res[0] for res in run_sql(query, params)]
def is_comment_deleted(comid):
"""
Return True of the comment is deleted. Else False
@param comid: ID of comment to check
"""
query = "SELECT status from cmtRECORDCOMMENT WHERE id=%s"
params = (comid,)
res = run_sql(query, params)
if res and res[0][0] != 'ok':
return True
return False
def perform_display_your_comments(user_info,
page_number=1,
selected_order_by_option="lcf",
selected_display_number_option="all",
selected_display_format_option="rc",
ln=CFG_SITE_LANG):
"""
Display all comments submitted by the user.
@TODO: support reviews too
@param user_info: standard user info object.
@param comments: ordered list of tuples (id_bibrec, comid, date_creation, body, status, in_reply_to_id_cmtRECORDCOMMENT)
@param page_number: page on which the user is.
@type page_number: integer
@param selected_order_by_option: seleccted ordering option. Can be one of:
- ocf: Oldest comment first
- lcf: Latest comment first
- grof: Group by record, oldest commented first
- grlf: Group by record, latest commented first
@type selected_order_by_option: string
@param selected_display_number_option: number of results to show per page. Can be a string-digit or 'all'.
@type selected_display_number_option: string
@param selected_display_format_option: how to show records. Can be one of:
- rc: Records and comments
- ro: Records only
- co: Comments only
@type selected_display_format_option: string
@ln: language
@type ln: string
"""
query_params = ""
nb_total_pages = 0
if selected_display_format_option in ('rc', 'co'):
nb_total_results = run_sql("SELECT count(id) from cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0", \
(user_info['uid'], ))[0][0]
else:
if selected_order_by_option in ('grlf', 'grof'):
nb_total_results = run_sql("SELECT count(distinct(id_bibrec)) from cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0", \
(user_info['uid'], ))[0][0]
else:
nb_total_results = run_sql("SELECT count(id_bibrec) from cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0", \
(user_info['uid'], ))[0][0]
if page_number < 1:
page_number = 1
if selected_display_number_option != 'all' and \
not selected_display_number_option.isdigit():
# must be some garbage
selected_display_number_option = 'all'
query = ''
if selected_order_by_option == "lcf":
query_params += " ORDER BY date_creation DESC"
elif selected_order_by_option == "ocf":
query_params += " ORDER BY date_creation ASC"
elif selected_order_by_option == "grlf":
query = "SELECT cmt.id_bibrec, cmt.id, cmt.date_creation, cmt.body, cmt.status, cmt.in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT as cmt left join (SELECT max(date_creation) as maxdatecreation, id_bibrec FROM cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0 GROUP BY id_bibrec) as grp on cmt.id_bibrec = grp.id_bibrec WHERE id_user=%s AND star_score = 0 ORDER BY grp.maxdatecreation DESC, cmt.date_creation DESC"
elif selected_order_by_option == "grof":
query = "SELECT cmt.id_bibrec, cmt.id, cmt.date_creation, cmt.body, cmt.status, cmt.in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT as cmt left join (SELECT min(date_creation) as mindatecreation, id_bibrec FROM cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0 GROUP BY id_bibrec) as grp on cmt.id_bibrec = grp.id_bibrec WHERE id_user=%s AND star_score = 0 ORDER BY grp.mindatecreation ASC"
<p>You are going to be redirected to the desired content within 5 seconds. If the redirection does not happen automatically please click on <a href="%(url)s">%(url_ok)s</a>.</p>
</body>
</html>""" % {
'url': escape(req.unparsed_uri, True),
'url_ok': escape(req.unparsed_uri)
})
raise apache.SERVER_RETURN(apache.DONE)
req.headers_out["Location"] = url
if req.response_sent_p:
raise IOError("Cannot redirect after headers have already been sent.")
req.status = redirection_type
req.write('<p>Please go to <a href="%s">here</a></p>\n' % url)
raise apache.SERVER_RETURN, apache.DONE
def rewrite_to_secure_url(url, secure_base=None):
"""
Rewrite URL to a Secure URL
@param url URL to be rewritten to a secure URL.
@param secure_base: Base URL of secure site (defaults to CFG_SITE_SECURE_URL).
"""
if secure_base is None:
secure_base = cfg.get('CFG_SITE_SECURE_URL')
url_parts = list(urlparse(url))
url_secure_parts = urlparse(secure_base)
url_parts[0] = url_secure_parts[0]
url_parts[1] = url_secure_parts[1]
return urlunparse(url_parts)
def get_referer(req, replace_ampersands=False):
""" Return the referring page of a request.
Referer (wikipedia): Referer is a common misspelling of the word
"referrer"; so common, in fact, that it made it into the official
specification of HTTP. When visiting a webpage, the referer or
referring page is the URL of the previous webpage from which a link was
followed.
@param req: request
@param replace_ampersands: if 1, replace & by & in url